Difference between revisions of "ComparaTutorial"

From WormBaseWiki
Jump to navigationJump to search
(New page: =Places= [ftp://ftp.sanger.ac.uk/pub2/wormbase/data/compara.tar.bz2 Compara tarball for WS163] (mysql dumps) [http://www.ensembl.org/info/software/compara/index.html Compara Page] (includ...)
 
 
Line 108: Line 108:
 
   [http://www.perldoc.com/perl5.6/pod/func/print.html <span><font color="#000066">print</font></span>] <span><font color="#ff0000">"<span>'''<font color="#000099">\n</font>'''</span>"</font></span><nowiki>;
 
   [http://www.perldoc.com/perl5.6/pod/func/print.html <span><font color="#000066">print</font></span>] <span><font color="#ff0000">"<span>'''<font color="#000099">\n</font>'''</span>"</font></span><nowiki>;
 
  </nowiki><span><font color="#66cc66">}</font></span>
 
  </nowiki><span><font color="#66cc66">}</font></span>
 +
 +
 +
 +
[[Category:Developer documentation]]

Latest revision as of 18:18, 11 August 2010

Places

Compara tarball for WS163 (mysql dumps)

Compara Page (including how to install / API documentation)

EnsEMBL Registry

As the default GenomeDB locations in the database point to the Sanger setup, you should use a EnsEMBL registry to access yours.


# ensembl.reg
use Bio::EnsEMBL::Utils::ConfigRegistry;
 use Bio::EnsEMBL::DBSQL::DBAdaptor;
 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
  
 new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306,
 -species => 'Caenorhabditis elegans',-group => 'core',-dbname => 'worm_WB160');
 new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306,
-species => 'Caenorhabditis briggsae',-group => 'core',-dbname => 'worm_ensembl_briggsae');
 new Bio::EnsEMBL::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306,
-species => 'Caenorhabditis remanei',-group => 'core',-dbname => 'worm_ensembl_remanei');
  
 new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor(-host => 'ia64b',-user=>'wormro',-port=> 3306,
-species => 'Compara',-dbname => 'worm_compara_lagan');
  
 1;

Please replace whatever host/port/username/passowrd you use.

It is a good idea to setup an environment variable to point at it: setenv ENSEMBL_REGISTRY </path_to_registry/ensembl.reg>

Genomic Alignments

# 2-way.pl
 
use strict;
use Bio::EnsEMBL::Registry;
 use Bio::SimpleAlign;
 use Bio::AlignIO;
 use Bio::LocatableSeq;
 Bio::EnsEMBL::Registry->load_all();
  
 # adaptors setup
my $mlss_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'MethodLinkSpeciesSet' );
 my $gdb_a  = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomeDB' );
 my $ga_a   = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomicAlignBlock' );
 my $dnaf_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'DnaFrag' );
  
 # genome databases setup
my $edb = $gdb_a->fetch_by_name_assembly('Caenorhabditis elegans');
 my $bdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis briggsae');
 my $rdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis remanei');
  
 my $mlss = $mlss_a->fetch_by_method_link_type_GenomeDBs( 'MLAGAN', [ $edb, $bdb, $rdb ] );
  
 my $alignIO = Bio::AlignIO->newFh(
        -interleaved => 0,
        -fh => \*STDOUT,
        -format => 'clustalw',
        -idlength => 10
        );
 my @all_aligns;
  
 # get a DNA fragment
my $dna_frag = $dnaf_a->fetch_by_GenomeDB_and_name( $edb, 'CHROMOSOME_I' );
  
 my $all_blocks = $ga_a->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dna_frag, 2000001, 300000 );
  
 # for all aligned blocks
foreach my $align (@$all_blocks) {
        my $simple_align=$align->get_SimpleAlign;
         push(@all_aligns,$simple_align);
 }
 
foreach my $aln(@all_aligns){
        print $alignIO $aln;
 }

Syntenic Regions

# syntenic_regions.pl
 
use strict;
use Bio::EnsEMBL::Registry;
 Bio::EnsEMBL::Registry->load_all();
  
 my $mlssa = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'MethodLinkSpeciesSet' );
 my $gdb_a  = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'GenomeDB' );
 # genome databases setup
my $edb = $gdb_a->fetch_by_name_assembly('Caenorhabditis elegans');
 my $bdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis briggsae');
 my $rdb = $gdb_a->fetch_by_name_assembly('Caenorhabditis remanei');
  
  
 my $method_link_species_set = $mlssa->fetch_by_method_link_type_GenomeDBs("SYNTENY", [ $edb, $bdb, $rdb ]);
 my $dnaf_a = Bio::EnsEMBL::Registry->get_adaptor( 'Compara', 'compara', 'DnaFrag' );
 
 # get a DNA fragment
my $dnafrag = $dnaf_a->fetch_by_GenomeDB_and_name( $edb, 'CHROMOSOME_I');
  
 my $synteny_region_adaptor = Bio::EnsEMBL::Registry->get_adaptor('Compara', "compara", "SyntenyRegion");
 my $synteny_regions = $synteny_region_adaptor->fetch_by_MethodLinkSpeciesSet_DnaFrag($method_link_species_set,$dnafrag, 100000, 200000);
  
 foreach my $this_synteny_region (@$synteny_regions) {
  my $these_dnafrag_regions = $this_synteny_region->children();
   foreach my $this_dnafrag_region (@$these_dnafrag_regions) {
    print $this_dnafrag_region->dnafrag->genome_db->name, ": ", $this_dnafrag_region->slice->name, "\n";
   }
  print "\n";
 }