Thursday, January 14, 2016

Language: Perl (Bioperl)............

http://perldoc.perl.org/
perldoc
perldoc -f sprintf
perldoc List::Util
--------------------------------------------------------
bioperl: perl modules for life sciences data and analysis
Modules are interfaces to data types: Sequences, Alignments, Features, Locations, Databases
Common modules: List::Util, Getopt::Long, Statistics::Descriptive
http://bioperl.org
http://github.com/bioperl 
http://www.bioperl.org/ 
http://www.bioperl.org/wiki/HOWTOs


Bio Modules: SeqIO, DB : : Fasta, DB::GenBank, TreeIO, AlignIO, SerachIO
Objects: Bio::Seq, Bio : : DB : : Fasta, Bio::DB::GenBank, Bio::TreeIO, Bio : : AlignIO
Methods: seq() , length() , id() , description() 
---------------------------------------------------------
SeqIO can both read and write sequences

---------------------------------------------------------
#Count the number of FASTA sequences
#!/ u s r / b in / p e r l −w
use s t r i c t ;
use Bio : : SeqIO ;
my $ s e q f i l e = " sequences .fa" ;
my $ i n = Bio : : SeqIO−>new(−format=>’fasta ’ ,
− f i l e => $ s e q f i l e ) ;
my $count = 0 ;
w h i l e ( my $seq = $in−>n e x t s e q ) {
$count++;
}

p r i n t " Sequence number is $count \n" ;
---------------------------------------------------------
#Count the number of bases
#!/ u s r / b in / p e r l −w
use s t r i c t ;
use Bio : : SeqIO ;
my $ s e q f i l e = " sequences .fa" ;
my $ i n = Bio : : SeqIO−>new(−format=>’fasta ’ ,
− f i l e => $ s e q f i l e ) ;
my $count = 0 ;
w h i l e ( my $seq = $in−>n e x t s e q ) {
$count += $seq−>l e n g t h ;
}
p r i n t " Number of bases is $count  \n" ;
---------------------------------------------------------
#Convert file formats and output the sequences
#!/ u s r / b in / p e r l −w
use Bio : : SeqIO ;
my $ s e q f i l e = " sequences .gbk" ;
my $ i n = Bio : : SeqIO−>new(−format=>’genbank ’ ,
− f i l e => $ s e q f i l e ) ;
my $out = Bio : : SeqIO−>new(−format=>’fasta ’ ,
− f i l e => ">outputfile .fa" ) ) ;
w h i l e ( my $seq = $in−>n e x t s e q ) {
$out−>w r i t e s e q ( $seq ) ;
}
---------------------------------------------------------
#Fast random access to Fasta seq databases
use Bio : : DB : : Fasta;
my $ d i r = s h i f t @ARGV;
my $dbh = Bio : : DB : : Fasta−>new ( $ d i r ) ;
my $seq = $dbh−>get_Seq_by_acc ( " SEQ128 " ) ;
my $ s e q s t r = $dbh−>seq ( "chr1" , 9087 , 12375);
---------------------------------------------------------
#To query GenBank
use Bio : : DB : : GenBank ;
use Bio : : SeqIO ;
my $db = Bio : : DB : : GenBank−>new ;
my $seq = $db−>g e t S e q b y a c c ( " NM_206028 .1" ) ;
my $out = Bio : : SeqIO−>new(−format => ’fasta ’ ) ;
$out−>w r i t e s e q ( $seq ) ;
---------------------------------------------------------
#Convert from nexus to newick format
use Bio : : TreeIO ;
my $ i n = Bio : : TreeIO−>new(−format => ’nexus ’ ,
− f i l e => s h i f t @ARGV) ;
my $out = Bio : : TreeIO−>new(−format => ’newick ’ ) ;
w h i l e ( my $ t r e e = $in−>n e x t t r e e ) {
$out−>w r i t e t r e e ( $ t r e e ) ;

}
---------------------------------------------------------
#Multiple alignment
use Bio : : AlignIO ;
my $ i n = Bio : : AlignIO −>new(−format => ’clustalw ’ ,
− f i l e => s h i f t @ARGV) ;
my $out = Bio : : AlignIO −>new(−format => ’phylip ’ ,
− f i l e => s h i f t @ARGV) ;
w h i l e (my $ a l n = $in−>n e x t a l n ) {
$out−>w r i t e a l n ( $ a l n ) ;
}
---------------------------------------------------------
#Seq database search 
my $ i n = Bio : : SearchIO−>new(−format => ’blast ’ ,
− f i l e => s h i f t @ARGV) ;
w h i l e ( my $r = $in−>n e x t r e s u l t ){
p r i n t $r−>query name , "\n" ;
w h i l e ( my $h = $r−>n e x t h i t ) {
p r i n t "\t" , $h−>name , " " , $h−>s i g n i f i c a n c e \n";
while ( my $hsp = $h -> next_hsp ) {
print "\ t \ t ", $hsp ->query ->start , " . . ",$hsp ->query ->end , "\n";
print "\ t \ t ", $hsp ->hit ->start , " . . ",$hsp ->hit ->end , "\n";
print "\ t \ t ", $hsp ->evalue , " ",$hsp -> frac_identical , " ",
$hsp -> frac_conserved , "\n";
print "\ t \ t ", $hsp -> query_string , "\n";
}
}
}
---------------------------------------------------------

No comments:

Post a Comment