Exploring the choppy water of coding: Language: Perl (from James Tisdall book).....(2)...

GENETIC CODE, REGEX, GENBANK
1# Read a DNA FASTA file, translate to protein, and format output (here make 25-character-long lines)
#!/usr/bin/perl

use strict;
use warnings;
use BeginPerlBioinfo;

my @file_data = ( );
my $dna = '';
my $protein = '';

@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);
$protein = dna2peptide($dna);
print_sequence($protein, 25);

exit;

2# Translate a DNA sequence in all three reading frames (2 are in plus, 1 in minus strand)
#!/usr/bin/perl
use strict;
use warnings;
use BeginPerlBioinfo;

my @file_data = ( );
my $dna = '';
my $revcom = '';
my $protein = '';
@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);

print "\n -------Reading Frame 1--------\n\n";
$protein = translate_frame($dna, 1);
print_sequence($protein, 50);

print "\n -------Reading Frame 2--------\n\n";
$protein = translate_frame($dna, 2);
print_sequence($protein, 50);

$revcom = revcom($dna);
print "\n -------Reading Frame 3--------\n\n";
$protein = translate_frame($revcom, 3);
print_sequence($protein, 50);
exit;

#subroutine (2 of them i.e revcom, translate_frame)
sub revcom {
my($dna) = @_;
my($revcom) = reverse($dna);
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return $revcom;

}

sub translate_frame {
my($seq, $start, $end) = @_;
my $protein;
unless($end) {
$end = length($seq);
}
return dna2peptide ( substr ( $seq, $start - 1,
$end -$start + 1) );
}
3#Find motifs/regex
if( $dna =~ /CT[CGT]ACG/ ) {
print "I found the motif!!\n";

}
# Make restriction map from user queries
#!/usr/bin/perl
use strict;
use warnings;
use BeginPerlBioinfo;

my %rebase_hash = ( );
my @file_data = ( );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ( );

@file_data = get_file_data("sample.dna");
$dna = extract_sequence_from_fasta_data(@file_data);
%rebase_hash = parseREBASE('bionet');

do {
print "Search for what restriction site for (or quit)?: ";

$query = <STDIN>;
chomp $query;
# Exit if empty query
if ($query =~ /^\s*$/ ) {
exit;
}

if ( exists $rebase_hash{$query} ) {
($recognition_site, $regexp) = split ( " ", $rebase_hash{$query});
@locations = match_positions($regexp, $dna);
if (@locations) {
print "Searching for $query $recognition_site $regexp\n";
print "A restriction site for $query at locations:\n";
print join(" ", @locations), "\n";
} else {
print "A restriction site for $query is not in the DNA:\n";
}
}
print "\n";
} until ( $query =~ /quit/ );

exit;

# Subroutine
sub match_positions {
my($regexp, $sequence) = @_;
use strict;
use BeginPerlBioinfo;

my @positions = ( );

while ( $sequence =~ /$regexp/ig ) {
push ( @positions, pos($sequence) - length($&) + 1);
}
return @positions;

}

4#Extract annotation and sequence from GenBank file
#!/usr/bin/perl
use strict;
use warnings;
use BeginPerlBioinfo;
my $sequence = '';
my $filename = 'record.gb';

parse1(\@annotation, \$sequence, $filename);
print @annotation;
print_sequence($sequence, 50);
exit;

# Subroutine
sub parse1 {
my($annotation, $dna, $filename) = @_;
my $in_sequence = 0;
my @GenBankFile = ( );

@GenBankFile = get_file_data($filename);
foreach my $line (@GenBankFile) {
if( $line =~ /^\/\/\n/ ) {
last;
} elsif( $in_sequence) {
$$dna .= $line;
} elsif ( $line =~ /^ORIGIN/ ) {
$in_sequence = 1;
} else{
push( @$annotation, $line);
}
}
$$dna =~ s/[\s0-9]//g;

}

Exploring the choppy water of coding

Friday, April 15, 2016

Language: Perl (from James Tisdall book).....(2)...

No comments:

Post a Comment