ComparaTutorial
From WormBaseWiki
Jump to navigationJump to search
Places
Compara tarball for WS163 (mysql dumps)
Compara Page (including how to install / API documentation)
EnsEMBL Registry
As the default GenomeDB locations in the database point to the Sanger setup, you should use a EnsEMBL registry to access yours.
# ensembl.reg use Bio::EnsEMBL::Utils::ConfigRegistry; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor; new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306, -species => 'Caenorhabditis elegans',-group => 'core',-dbname => 'worm_WB160'); new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306, -species => 'Caenorhabditis briggsae',-group => 'core',-dbname => 'worm_ensembl_briggsae'); new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306, -species => 'Caenorhabditis remanei',-group => 'core',-dbname => 'worm_ensembl_remanei'); new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306, -species => 'Compara',-dbname => 'worm_compara_lagan'); 1;
Please replace whatever host/port/username/passowrd you use.
It is a good idea to setup an environment variable to point at it: setenv ENSEMBL_REGISTRY </path_to_registry/ensembl.reg>
Genomic Alignments
# 2-way.pl use strict; use Bio::EnsEMBL::Registry; use Bio::SimpleAlign; use Bio::AlignIO; use Bio::LocatableSeq; Bio::EnsEMBL::Registry->load_all(); # adaptors setup my $mlss_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'MethodLinkSpeciesSet' ); my $gdb_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomeDB' ); my $ga_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomicAlignBlock' ); my $dnaf_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'DnaFrag' ); # genome databases setup my $edb = $gdb_a->fetch_by_name_assembly('Caenorhabditis elegans'); my $bdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis briggsae'); my $rdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis remanei'); my $mlss = $mlss_a->fetch_by_method_link_type_GenomeDBs( 'MLAGAN', [ $edb, $bdb, $rdb ] ); my $alignIO = Bio::AlignIO->newFh( -interleaved => 0, -fh => \*STDOUT, -format => 'clustalw', -idlength => 10 ); my @all_aligns; # get a DNA fragment my $dna_frag = $dnaf_a->fetch_by_GenomeDB_and_name( $edb, 'CHROMOSOME_I' ); my $all_blocks = $ga_a->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dna_frag, 2000001, 300000 ); # for all aligned blocks foreach my $align (@$all_blocks) { my $simple_align=$align->get_SimpleAlign; push(@all_aligns,$simple_align); } foreach my $aln(@all_aligns){ print $alignIO $aln; }
Syntenic Regions
# syntenic_regions.pl use strict; use Bio::EnsEMBL::Registry; Bio::EnsEMBL::Registry->load_all(); my $mlssa = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'MethodLinkSpeciesSet' ); my $gdb_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomeDB' ); # genome databases setup my $edb = $gdb_a->fetch_by_name_assembly('Caenorhabditis elegans'); my $bdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis briggsae'); my $rdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis remanei'); my $method_link_species_set = $mlssa->fetch_by_method_link_type_GenomeDBs("SYNTENY", [ $edb, $bdb, $rdb ]); my $dnaf_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'DnaFrag' ); # get a DNA fragment my $dnafrag = $dnaf_a->fetch_by_GenomeDB_and_name( $edb, 'CHROMOSOME_I'); my $synteny_region_adaptor = Bio::EnsEMBL::Registry->get_adaptor('Compara', "compara", "SyntenyRegion"); my $synteny_regions = $synteny_region_adaptor->fetch_by_MethodLinkSpeciesSet_DnaFrag($method_link_species_set,$dnafrag, 100000, 200000); foreach my $this_synteny_region (@$synteny_regions) { my $these_dnafrag_regions = $this_synteny_region->children(); foreach my $this_dnafrag_region (@$these_dnafrag_regions) { print $this_dnafrag_region->dnafrag->genome_db->name, ": ", $this_dnafrag_region->slice->name, "\n"; } print "\n"; }