Friday, April 15, 2016

Language: Perl (from James Tisdall book).....(2)...

GENETIC CODE, REGEX, GENBANK
1# Read a DNA FASTA file, translate to protein, and format output (here make 25-character-long lines) 
#!/usr/bin/perl 

use strict;
use warnings;
use BeginPerlBioinfo;  

my @file_data = (  );
my $dna = '';
my $protein = '';

@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);
$protein = dna2peptide($dna);
print_sequence($protein, 25);

exit;

 2# Translate a DNA sequence in all three reading frames  (2 are in plus, 1 in minus strand)
#!/usr/bin/perl  
use strict;
use warnings;
use BeginPerlBioinfo;

my @file_data = (  );
my $dna = '';
my $revcom = '';
my $protein = '';
@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);

print "\n -------Reading Frame 1--------\n\n";
$protein = translate_frame($dna, 1);
print_sequence($protein, 50);

print "\n -------Reading Frame 2--------\n\n";
$protein = translate_frame($dna, 2);
print_sequence($protein, 50);

$revcom = revcom($dna);
print "\n -------Reading Frame 3--------\n\n";
$protein = translate_frame($revcom, 3);
print_sequence($protein, 50);
exit;

#subroutine (2 of them i.e revcom, translate_frame)
sub revcom { 
    my($dna) = @_; 
    my($revcom) = reverse($dna); 
    $revcom =~ tr/ACGTacgt/TGCAtgca/; 
    return $revcom;

}

sub translate_frame {
    my($seq, $start, $end) = @_;
    my $protein;
    unless($end) {
        $end = length($seq);
    }        
return dna2peptide ( substr ( $seq, $start - 1,
$end -$start + 1) );
}
3#Find motifs/regex
if( $dna =~ /CT[CGT]ACG/ ) {
    print "I found the motif!!\n";

}
# Make restriction map from user queries 
#!/usr/bin/perl 
use strict;
use warnings;
use BeginPerlBioinfo;


my %rebase_hash = (  );
my @file_data = (  );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = (  );

@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);
%rebase_hash = parseREBASE('bionet');

do {
    print "Search for what restriction site for (or quit)?: ";
   

    $query = <STDIN>;
chomp $query;
    # Exit if empty query
    if ($query =~ /^\s*$/ ) {
        exit;
    }

    if ( exists $rebase_hash{$query} ) {
        ($recognition_site, $regexp) = split ( " ", $rebase_hash{$query});    
        @locations = match_positions($regexp, $dna);
        if (@locations) {
            print "Searching for $query $recognition_site $regexp\n";
 print "A restriction site for $query at locations:\n";
            print join(" ", @locations), "\n";
        } else {
            print "A restriction site for $query is not in the DNA:\n";
        }
    }
    print "\n";
} until ( $query =~ /quit/ );

exit;

# Subroutine  
sub match_positions {
    my($regexp, $sequence) = @_;
    use strict;
    use BeginPerlBioinfo;

    my @positions = (  );


    while ( $sequence =~ /$regexp/ig ) {
 push ( @positions, pos($sequence) - length($&) + 1);
    }
     return @positions;

}

4#Extract annotation and sequence from GenBank file  
#!/usr/bin/perl 
use strict;
use warnings;
use BeginPerlBioinfo;  
my $sequence = '';
my $filename = 'record.gb';

parse1(\@annotation, \$sequence, $filename);
print @annotation;
print_sequence($sequence, 50);
exit;

# Subroutine 
sub parse1 {
    my($annotation, $dna, $filename) = @_;  
    my $in_sequence = 0;
    my @GenBankFile = (  );    

    @GenBankFile = get_file_data($filename);    
    foreach my $line (@GenBankFile) {
        if( $line =~ /^\/\/\n/ ) {
            last;
        } elsif( $in_sequence) {
            $$dna .= $line;
        } elsif ( $line =~ /^ORIGIN/ ) {
            $in_sequence = 1;
        } else{
            push( @$annotation, $line);
        }
    }    
    $$dna =~ s/[\s0-9]//g;

}

No comments:

Post a Comment