Exploring the choppy water of coding: Language: Perl (from James Tisdall book).....(1)...

LOOPS, SUBROUTINES, PASS-BY-REFERENCE, RANDOMIZATION
1#Iterative printing (reading protein sequence data from a file )
#!/usr/bin/perl -w
# Reading protein sequence data from a file, take 2
# The filename of the file containing the protein sequence
$proteinfilename = 'file.pep';
# open the file, associate "filehandle" with it. # PROTEINFILE for readability.
open(PROTEINFILE, $proteinfilename);
# read and print first line
$protein = <PROTEINFILE>;
print "\nHere is the first line of the protein file:\n\n";
print $protein;
# read and print Second line
$protein = <PROTEINFILE>;
print "\nHere is the second line of the protein file:\n\n";
print $protein;
# read and print third line
$protein = <PROTEINFILE>;
print "\nHere is the third line of the protein file:\n\n";
print $protein;
#close the file.
close PROTEINFILE;
exit;
----------------------------------------------
2# Conditional if-elsif-else (to find the motif)
#!/usr/bin/perl -w
$word = 'MIFGRKL';
# if-elsif-else conditionals
if($word eq 'QRRTVD') {
print "QRRTVD\n";
} elsif($word eq 'ERHDESE') {
print "ERHDESE\n";
} elsif ( $word eq 'MIFGRKL' ) {
print "MIFGRKL--the required word!\n";
} else {
print "Is \"$word\" a peptide? This program is not
sure.\n";
}
exit;
----------------------------------------------
3# Loop (reading protein sequence data from a file)
#!/usr/bin/perl -w
$proteinfilename = 'file.pep';

#If failed to open print
unless ( open(PROTEINFILE, $proteinfilename) ) {
print "Could not open file $proteinfilename!\n";
exit;
}
# Read using a "while" loop, then print
while( $protein = <PROTEINFILE> ) {
print " ###### Here is the next line of the file:\n";
print $protein;
}
# Close the file
close PROTEINFILE;

exit;
----------------------------------------------
4#  Conditional do-until, if-else and array usage (motif finding)
#!/usr/bin/perl -w
# Ask the user to mention filename
print "Please type the filename of the protein sequence
data: ";
$proteinfilename = <STDIN>;
# Remove newline from the filename
chomp $proteinfilename;
# open the file, or exit

unless ( open(PROTEINFILE, $proteinfilename) ) {
print "Cannot open file \"$proteinfilename\"\n\n";
exit;
}
# Read the protein sequence data from the file, and store in array variable @protein
@protein = <PROTEINFILE>;
# Close the file
close PROTEINFILE;
# Put the protein sequence data (array) into a single string (into a single scalar variable)
$protein = join( '', @protein);
# Remove whitespace
$protein =~ s/\s//g;
# Search for the motif, print if found, if not exit
do {
print "Enter a motif to search for: ";
$motif = <STDIN>;
# Remove the newline at the end of $motif
chomp $motif;
# Look for the motif (pattern matching)
if ( $protein =~ /$motif/ ) {
print "I found it!\n\n";

} else {
  print "I couldn\'t find it.\n\n"; }
# exit on an empty user input (the regex /^\s*$/ indicates space)
} until ( $motif =~ /^\s*$/ );
# exit the program

exit;
----------------------------------------------
5# Scalar variable-initialize-split-join- foreach loop (nucleotide frequency finding)
#It can be done using for loop or while loop as well
#!/usr/bin/perl -w
# Determining frequency of nucleotides
# Get the name of the file from user
print "Please type the filename of the DNA sequence data: ";
$dna_filename = <STDIN>;
# Remove the newline from file
chomp $dna_filename;
# open the file, or exit
unless ( open(DNAFILE, $dna_filename) ) {
print "Cannot open file \"$dna_filename\"\n\n";
exit;
}
# Read the data and store it in array variable @DNA
@DNA = <DNAFILE>;
# Close the file
close DNAFILE;
# put the DNA sequence data into a single string (using join)
$DNA = join( '', @DNA);
# Remove whitespace
$DNA =~ s/\s//g;

# Explore the array variable @DNA to find each base
@DNA = split( '', $DNA );
# Initialize the counts (with scalar variables)
$count_of_A = 0;
$count_of_C = 0;
$count_of_G = 0;
$count_of_T = 0;
$errors = 0;
# Count increment of the 4 nucleotides (A,T, C, G)
foreach $base (@DNA) {
if ( $base eq 'A' ) {
++$count_of_A;
} elsif ( $base eq 'C' ) {
++$count_of_C;
} elsif ( $base eq 'G' ) {
++$count_of_G;
} elsif ( $base eq 'T' ) {
++$count_of_T;
} else {
print "Error - I don\'t recognize this
base: $base\n";
++$errors;
}
}
# print the results
print "A = $count_of_A\n";
print "C = $count_of_C\n";
print "G = $count_of_G\n";
print "T = $count_of_T\n";
print "errors = $errors\n";
# exit the program
exit;
----------------------------------------------
6# Number manipulation (print, add, concatenate)
#!/usr/bin/perl -w
$num = 1234;
$str = '1234';
# print the variables
print $num, " ", $str, "\n";
# add the variables as numbers
$num_or_str = $num + $str; print $num_or_str, "\n";
# concatenate the variables as strings
$num_or_str = $num . $str;
print $num_or_str, "\n";
exit;
----------------------------------------------
#A subroutine is called by typing its name, followed by a parenthesized list of arguments
#A keyword sub, followed by a block, closed in braces. Exit comes before, subroutine call.
7#   Using a subroutine to append ACGT to DNA
#!/usr/bin/perl -w
# A program with a subroutine to append ACGT to DNA
# The DNA seq contained in the scalar variable
$dna = 'AGAGGCTTCGGA';
# Call to the subroutine "addGGTA".
# The argument being passed in is $dna; the result is saved in $new_dna_seq
$new_dna_seq  = addACGT($dna);
print "Added GGTA to $dna and got $new_dna_seq \n\n";
exit;

# The subroutine "addGGTA" (my construct is essential)
sub addGGTA {
my($dna) = @_;
$dna .= 'GGTA';
return $dna;
}

7#   Using a subroutine to append ACGT to DNA
#!/usr/bin/perl -w

$dna = 'AAAAA';
$result = A_to_T($dna);
print "Changed all the A's in $dna to T's and got $result\n\n";
exit;

sub A_to_T {
my($input) = @_;
my($dna) = $input;
$dna =~ s/A/T/g;
return $dna;
}
----------------------------------------------
8#   Counting the G's in some DNA
#!/usr/bin/perl -w
use strict;
my($USAGE) = "$0 DNA\n\n";
unless(@ARGV) {
print $USAGE;
exit;
}
my($dna) = $ARGV[0];
my($num_of_Gs) = countG ( $dna );
print "\nThe DNA $dna has $num_of_Gs G\'s in it!\n\n";
exit;

sub countG {
my($dna) = @_;
my($count) = 0;
$count = ( $dna =~ tr/Gg//);
return $count;

}The other "take-home message"
9# Pass-by-reference
#!/usr/bin/perl
use strict;
use warnings;
my @i = ('1', '2', '3');
my @j = ('a', 'b', 'c');
print "In main program before calling subroutine: i = " . "@i\n";
print "In main program before calling subroutine: j = " . "@j\n";
reference_sub(\@i, \@j);
print "In main program after calling subroutine: i = " . "@i\n";
print "In main program after calling subroutine: j = " . "@j\n";
exit;

sub reference_sub {
my($i, $j) = @_;
print "In subroutine : i = " . "@$i\n";
print "In subroutine : j = " . "@$j\n";
push(@$i, '4');
shift(@$j);

}
10# Randomization (story telling by assembling phrases)
#!/usr/bin/perl
use strict;
use warnings;
# Declare the variables
my $count;
my $input;
my $number;
my $sentence;
my $story;
# Here are the arrays of parts of sentences:
my @nouns = (
'Dad',
'TV',
'Mom',
'Groucho',
'Rebecca',
'Harpo',
'Robin Hood',
'Joe and Moe',

);
my @verbs = (
'ran to',
'giggled with',
'put hot sauce into the orange juice of',
'exploded',
'dissolved',
'sang stupid songs with',
'jumped with',
);
my @prepositions = (
'at the store',
'over the rainbow',
'just for the fun of it',

'at the beach',
'before dinner',
'in New York City',
'in a dream',
'around the world',
);
# Seed the random number generator.
# time|$$ combines the current time with current process id to form random seed (srand)
srand(time|$$);
# The do-until loop composes six-sentence, until the user types "quit".
do {
# (Re)set $story to the empty string each time through the loop

$story = '';
# Make 6 sentences per story.
for ($count = 0; $count < 6; $count++) {
$sentence = $nouns[int(rand(scalar @nouns))] . " "
. $verbs[int(rand(scalar @verbs))] . " "
. $nouns[int(rand(scalar @nouns))] . " "
. $prepositions[int(rand(scalar @prepositions))] . '. ';
$story .= $sentence;
}
print "\n",$story,"\n";
print "\nType \"quit\" to quit, or press Enter to continue: ";
$input = <STDIN>;
} until($input =~ /^\s*q/i);

exit;
#Randomization ( Selecting a random position in a string )
#!/usr/bin/perl -w
my $dna = 'TGCATTGGGCATGTAACAGCA';
srand(time|$$);
for (my $i=0 ; $i < 20 ; ++$i ) {
print randomposition($dna), " ";
}
print "\n";

exit;

sub randomposition {
my($string) = @_;
return int rand length $string;

}
#Randomization ( Choosing a random base from a DNA)
#!/usr/bin/perl -w
my @nucleotides = ('A', 'T', 'C', 'G');
srand(time|$$);
for (my $i=0 ; $i < 20 ; ++$i ) {
print randomnucleotide(@nucleotides), " ";
}
print "\n";
exit;

sub randomnucleotide {
my(@nucs) = @_;
return $nucs[rand @nucs];
}
#Randomization ( Mutating a base in a DNA seq)
#!/usr/bin/perl
use strict;
use warnings;
my $DNA = 'CCCCCCCCCCC';
my $i;
my $mutant;
srand(time|$$);
$mutant = mutate($DNA);
print "\nMutate DNA\n\n";
print "\nOriginal DNA is:\n\n";
print "$DNA\n";
print "\Mutant DNA is:\n\n";
print "$mutant\n";

print "\nHere are 10 more successive mutations:\n\n";
for ($i=0 ; $i < 10 ; ++$i) {
$mutant = mutate($mutant);
print "$mutant\n";
}
exit;

#Use of 4 subroutines (sub mutate, sub randomelement, sub randomnucleotide, sub randomposition)
sub mutate {
my($dna) = @_;
my(@nucleotides) = ('A', 'C', 'G', 'T');
my($position) = randomposition($dna);
my($newbase);
do {
$newbase = randomnucleotide(@nucleotides);
}until ( $newbase ne substr($dna, $position,1) );
substr($dna,$position,1,$newbase);
return $dna;

}

sub randomelement {
my(@array) = @_;
return $array[rand @array];

}

sub randomnucleotide {
my(@nucleotides) = ('A', 'C', 'G', 'T');
return randomelement(@nucleotides);

}

sub randomposition {
my($string) = @_;
return int rand length $string;

}

#Randomization ( Generating a random DNA)
#!/usr/bin/perl
use strict;
use warnings;

my $size_of_set = 10;
my $maximum_length = 25;
my $minimum_length = 12;

my @random_DNA = ( );
srand(time|$$);

@random_DNA = make_random_DNA_set( $minimum_length,

$maximum_length, $size_of_set );
print "The array of $size_of_set randomly generated DNA sequences\n";
print " with lengths between $minimum_length and
$maximum_length:\n\n";
foreach my $dna (@random_DNA) {
print "$dna\n";
}
print "\n";

exit;

# Subroutines
sub make_random_DNA_set {
my($minimum_length, $maximum_length, $size_of_set) = @_;
my $length;
my $dna;
my @set;
for (my $i = 0; $i < $size_of_set ; ++$i) {
$length = randomlength ($minimum_length, $maximum_length);
$dna = make_random_DNA ( $length );
push( @set, $dna );
}
return @set;
}

sub randomlength {
my($minlength, $maxlength) = @_;
return ( int(rand($maxlength - $minlength + 1)) + $minlength );
}

sub make_random_DNA {
my($length) = @_;
my $dna;
for (my $i=0 ; $i < $length ; ++$i) {
$dna .= randomnucleotide( );
}
return $dna;

}

sub randomnucleotide {
my(@nucleotides) = ('A', 'C', 'G', 'T');
return randomelement(@nucleotides);
}

sub randomelement {
my(@array) = @_;
return $array[rand @array];

}

#Randomization ( Calculating % identity between DNA sequences)
#!/usr/bin/perl
use strict;
use warnings;

my $percent;
my @percentages;
my $result;
my @random_DNA = ( );

srand(time|$$);

@random_DNA = make_random_DNA_set( 10, 10, 10 );
for (my $k = 0 ; $k < scalar @random_DNA - 1 ; ++$k) {
for (my $i = ($k + 1) ; $i < scalar @random_DNA ; ++$i)
{
$percent = matching_percentage($random_DNA[$k], $random_DNA[$i]);

push(@percentages, $percent);
}
}

$result = 0;
foreach $percent (@percentages) {
$result += $percent;
}

$result = $result / scalar(@percentages);
$result = int ($result * 100);

print "In this run of the experiment, the average
percentage of \n";
print "matching positions is $result%\n\n";

exit;

# Subroutines
sub matching_percentage {
my($string1, $string2) = @_;
my($length) = length($string1);
my($position);
my($count) = 0;

for ($position=0; $position < $length ; ++$position) {
if(substr($string1,$position,1) eq
substr($string2,$position,1)) {
++$count;
}
}
return $count / $length;

}

sub make_random_DNA_set {
my($minimum_length, $maximum_length, $size_of_set) = @_;
my $length;
my $dna;

my @set;
for (my $i = 0; $i < $size_of_set ; ++$i) {
$length = randomlength ($minimum_length, $maximum_length);
$dna = make_random_DNA ( $length );
push( @set, $dna );
}
return @set;
}

sub randomlength {
my($minlength, $maxlength) = @_;
return ( int(rand($maxlength - $minlength + 1)) + $minlength );
}

sub make_random_DNA {
my($length) = @_;
my $dna;
for (my $i=0 ; $i < $length ; ++$i) {
$dna .= randomnucleotide( );
}
return $dna;
}

sub randomnucleotide {
my(@nucleotides) = ('A', 'C', 'G', 'T');
return randomelement(@nucleotides);
}

sub randomelement {
my(@array) = @_;

return $array[rand @array];
}

Exploring the choppy water of coding

Thursday, April 14, 2016

Language: Perl (from James Tisdall book).....(1)...

No comments:

Post a Comment