Thursday, April 14, 2016

Language: Perl (from James Tisdall book).....(1)...

LOOPS, SUBROUTINES, PASS-BY-REFERENCE, RANDOMIZATION
1#Iterative printing (reading protein sequence data from a file ) 
#!/usr/bin/perl -w
# Reading protein sequence data from a file, take 2 
# The filename of the file containing the protein sequence 
$proteinfilename = 'file.pep';
# open the file, associate "filehandle" with it.  # PROTEINFILE for readability. 
open(PROTEINFILE, $proteinfilename);
# read and print first line 
$protein = <PROTEINFILE>;
print "\nHere is the first line of the protein file:\n\n";
print $protein;
# read and print Second line 
$protein = <PROTEINFILE>;
print "\nHere is the second line of the protein file:\n\n";
print $protein;
read and print third line 
$protein = <PROTEINFILE>;
print "\nHere is the third line of the protein file:\n\n";
print $protein;
#close the file. 
close PROTEINFILE;
exit;
----------------------------------------------
2# Conditional if-elsif-else  (to find the motif)
#!/usr/bin/perl -w
$word = 'MIFGRKL';
# if-elsif-else conditionals 
if($word eq 'QRRTVD') {
     print "QRRTVD\n";
} elsif($word eq 'ERHDESE') {
     print "ERHDESE\n";
} elsif ( $word eq 'MIFGRKL' ) {
     print "MIFGRKL--the required word!\n";
} else {
     print "Is \"$word\" a peptide? This program is not
sure.\n";
 }
 exit;
----------------------------------------------
3#  Loop (reading protein sequence data from a file)
#!/usr/bin/perl -w 
$proteinfilename = 'file.pep';

#If failed to open print
unless ( open(PROTEINFILE, $proteinfilename) ) {
    print "Could not open file $proteinfilename!\n";
    exit;
}
# Read using a "while" loop, then print
while( $protein = <PROTEINFILE> ) {
    print "  ######  Here is the next line of the file:\n";
    print $protein;
}
# Close the file
close PROTEINFILE;

exit;
----------------------------------------------
4#  Conditional do-until, if-else and array usage (motif finding)
#!/usr/bin/perl -w
# Ask the user to mention filename
print "Please type the filename of the protein sequence
data: ";
$proteinfilename = <STDIN>;
# Remove newline from the filename
chomp $proteinfilename;
# open the file, or exit 

unless ( open(PROTEINFILE, $proteinfilename) ) {
print "Cannot open file \"$proteinfilename\"\n\n";
    exit;
}
# Read the protein sequence data from the file, and store in array variable @protein 
@protein = <PROTEINFILE>;
# Close the file
close PROTEINFILE;
# Put the protein sequence data (array) into a single string (into a single scalar variable)
$protein = join( '', @protein);
# Remove whitespace 
$protein =~ s/\s//g;
# Search for the motif, print if found, if not exit
do {
    print "Enter a motif to search for: ";
    $motif = <STDIN>;
    # Remove the newline at the end of $motif  
    chomp $motif;
    # Look for the motif  (pattern matching)
    if ( $protein =~ /$motif/ ) {
        print "I found it!\n\n";

    } else {
  print "I couldn\'t find it.\n\n";     }
# exit on an empty user input  (the regex /^\s*$/  indicates space)
} until ( $motif =~ /^\s*$/ );
# exit the program

exit;
----------------------------------------------
5#  Scalar variable-initialize-split-join- foreach loop (nucleotide frequency finding)
#It can be done using for loop or while loop as well
#!/usr/bin/perl -w 
# Determining frequency of nucleotides  
# Get the name of the file from user
print "Please type the filename of the DNA sequence data: ";
$dna_filename = <STDIN>;
# Remove the newline from file
chomp $dna_filename;
# open the file, or exit 
unless ( open(DNAFILE, $dna_filename) ) {
    print "Cannot open file \"$dna_filename\"\n\n";
    exit;
}
# Read the  data  and store it in array variable @DNA 
@DNA = <DNAFILE>;
# Close the file 
close DNAFILE;
# put the DNA sequence data into a single string (using join)
$DNA = join( '', @DNA);
# Remove whitespace 
$DNA =~ s/\s//g;

# Explore the  array variable @DNA to find each base
@DNA = split( '', $DNA );
# Initialize the counts (with scalar variables)
$count_of_A = 0;
$count_of_C = 0;
$count_of_G = 0;
$count_of_T = 0;
$errors     = 0;
# Count increment of the 4 nucleotides (A,T, C, G)  
foreach $base (@DNA) {
    if     ( $base eq 'A' ) {
        ++$count_of_A;
    } elsif ( $base eq 'C' ) {
        ++$count_of_C;
    } elsif ( $base eq 'G' ) {
        ++$count_of_G;
    } elsif ( $base eq 'T' ) {
        ++$count_of_T;
    } else {
        print "Error - I don\'t recognize this
base: $base\n";
        ++$errors;
    }
}
# print the results 
print "A = $count_of_A\n";
print "C = $count_of_C\n";
print "G = $count_of_G\n";
print "T = $count_of_T\n";
print "errors = $errors\n";
# exit the program 
exit;
----------------------------------------------
6#  Number manipulation (print, add, concatenate)
#!/usr/bin/perl -w  
$num = 1234;
$str = '1234';
# print the variables
print $num, " ", $str, "\n";
# add the variables as numbers 
$num_or_str = $num + $str; print $num_or_str, "\n";
# concatenate the variables as strings 
$num_or_str = $num . $str;
print $num_or_str, "\n";
exit;
----------------------------------------------
#A subroutine is called by typing its name, followed by a parenthesized list of arguments
#A keyword sub, followed by a block, closed in braces. Exit comes before, subroutine call.
7#   Using a subroutine to append ACGT to DNA 
#!/usr/bin/perl -w 
# A program with a subroutine to append ACGT to DNA 
# The DNA  seq contained in the scalar variable
$dna = 'AGAGGCTTCGGA';
# Call to the subroutine "addGGTA". 
# The argument being passed in is $dna; the result is saved  in $new_dna_seq 
$new_dna_seq  = addACGT($dna);
print "Added GGTA to $dna and got $new_dna_seq \n\n";
exit;

# The subroutine "addGGTA"  (my construct is essential)
sub addGGTA {
    my($dna) = @_;
    $dna .= 'GGTA';
    return $dna;
}

7#   Using a subroutine to append ACGT to DNA 
#!/usr/bin/perl -w 

$dna = 'AAAAA';
$result = A_to_T($dna);
print "Changed all the A's in $dna to T's and got $result\n\n";
exit;

sub A_to_T {
    my($input) = @_;
    my($dna) = $input;
    $dna =~ s/A/T/g;
    return $dna;
}
----------------------------------------------
8#   Counting the G's in some DNA 
#!/usr/bin/perl -w
use strict;
my($USAGE) = "$0 DNA\n\n";
unless(@ARGV) {
    print $USAGE;
    exit;
}
my($dna) = $ARGV[0];
my($num_of_Gs) = countG ( $dna );
print "\nThe DNA $dna has $num_of_Gs G\'s in it!\n\n";
 exit;

 sub countG {
    my($dna) = @_;
    my($count) = 0;
$count = ( $dna =~ tr/Gg//);
    return $count;

}The  other  "take-home  message"
9# Pass-by-reference
#!/usr/bin/perl 
use strict;
use warnings;
my @i = ('1', '2', '3');
my @j = ('a', 'b', 'c');
print "In main program before calling subroutine: i = " .  "@i\n";
print "In main program before calling subroutine: j = " .  "@j\n";
reference_sub(\@i, \@j);
print "In main program after calling subroutine: i = " . "@i\n";
print "In main program after calling subroutine: j = " .  "@j\n";
exit;

sub reference_sub {
    my($i, $j) = @_;
    print "In subroutine : i = " . "@$i\n";
    print "In subroutine : j = " . "@$j\n";
    push(@$i, '4');
    shift(@$j);

}
10# Randomization (story telling by assembling phrases)
#!/usr/bin/perl 
use strict;
use warnings;
# Declare the variables 
my $count;
my $input;
my $number;
my $sentence;
my $story;
# Here are the arrays of parts of sentences: 
my @nouns = (
'Dad',
'TV',
'Mom',
'Groucho',
'Rebecca',
'Harpo',
'Robin Hood',
'Joe and Moe',

);
my @verbs = (
'ran to',
'giggled with',
'put hot sauce into the orange juice of',
'exploded',
'dissolved',
'sang stupid songs with',
'jumped with',
);
my @prepositions = (
'at the store',
'over the rainbow',
'just for the fun of it',

'at the beach',
'before dinner',
'in New York City',
'in a dream',
'around the world',
);
# Seed the random number generator. 
# time|$$ combines the current time with current process id  to form random seed (srand)
srand(time|$$);  
# The do-until loop composes six-sentence, until the user types "quit".
do {
    # (Re)set $story to the empty string each time through the loop

    $story = '';  
# Make 6 sentences per story.
    for ($count = 0; $count < 6; $count++) {  
        $sentence   = $nouns[int(rand(scalar @nouns))]                     . " "
                    . $verbs[int(rand(scalar @verbs))]                      . " "
                    . $nouns[int(rand(scalar @nouns))]                      . " "
                    . $prepositions[int(rand(scalar  @prepositions))]                      . '. ';
        $story .= $sentence;
    }
 print "\n",$story,"\n";
print "\nType \"quit\" to quit, or press Enter to continue: ";
    $input = <STDIN>;
}  until($input =~ /^\s*q/i);

exit;
#Randomization ( Selecting a random position in a string )
#!/usr/bin/perl -w
my $dna = 'TGCATTGGGCATGTAACAGCA';
srand(time|$$);
for (my $i=0 ; $i < 20 ; ++$i ) {
    print randomposition($dna), " ";
}
print "\n";

exit;

sub randomposition {
    my($string) = @_;
    return int rand length $string;

}
#Randomization ( Choosing a random base from a DNA)
#!/usr/bin/perl -w
my @nucleotides = ('A', 'T', 'C', 'G');
srand(time|$$);
for (my $i=0 ; $i < 20 ; ++$i ) {
 print randomnucleotide(@nucleotides), " ";
}
 print "\n";
exit;

sub randomnucleotide {
    my(@nucs) = @_;
    return $nucs[rand @nucs];
}
#Randomization ( Mutating a base in a DNA seq)
#!/usr/bin/perl 
use strict;
use warnings; 
my $DNA = 'CCCCCCCCCCC';
my $i; 
my $mutant;
srand(time|$$);
$mutant = mutate($DNA); 
print "\nMutate DNA\n\n"; 
print "\nOriginal DNA is:\n\n";
print "$DNA\n"; 
print "\Mutant DNA is:\n\n";
print "$mutant\n";

print "\nHere are 10 more successive mutations:\n\n"; 
for ($i=0 ; $i < 10 ; ++$i) {
    $mutant = mutate($mutant);
    print "$mutant\n";

exit;

#Use of 4 subroutines (sub mutate, sub randomelement, sub randomnucleotide, sub randomposition)
sub mutate { 
    my($dna) = @_; 
    my(@nucleotides) = ('A', 'C', 'G', 'T'); 
    my($position) = randomposition($dna); 
    my($newbase);
 do {
        $newbase = randomnucleotide(@nucleotides); 
 }until ( $newbase ne substr($dna, $position,1) );
    substr($dna,$position,1,$newbase);
    return $dna;

}

sub randomelement { 
    my(@array) = @_; 
    return $array[rand @array];

}

sub randomnucleotide { 
    my(@nucleotides) = ('A', 'C', 'G', 'T');
    return randomelement(@nucleotides);

}

sub randomposition { 
    my($string) = @_; 
    return int rand length $string;

}

#Randomization ( Generating a random DNA)
#!/usr/bin/perl 
use strict;
use warnings;

my $size_of_set = 10;
my $maximum_length = 25;
my $minimum_length = 12;

my @random_DNA = (  );
srand(time|$$);

@random_DNA = make_random_DNA_set( $minimum_length,

$maximum_length, $size_of_set );
print "The array of $size_of_set randomly generated DNA sequences\n";
print "  with lengths between $minimum_length and
$maximum_length:\n\n";
foreach my $dna (@random_DNA) {
    print "$dna\n";
}
print "\n";

exit;

# Subroutines 
sub make_random_DNA_set {
    my($minimum_length,  $maximum_length,  $size_of_set)  =  @_;
    my $length;
    my $dna;
    my @set;
    for (my $i = 0; $i < $size_of_set ; ++$i) {
        $length = randomlength ($minimum_length, $maximum_length);
         $dna = make_random_DNA ( $length );
         push( @set, $dna );
    }
    return @set;
}

sub randomlength {
    my($minlength, $maxlength) = @_;
     return ( int(rand($maxlength - $minlength + 1)) + $minlength );
}


sub make_random_DNA {
     my($length) = @_;
    my $dna;
    for (my $i=0 ; $i < $length ; ++$i) {
        $dna .= randomnucleotide(  );
    }
    return $dna;

}

sub randomnucleotide {
    my(@nucleotides) = ('A', 'C', 'G', 'T');
    return randomelement(@nucleotides);
}

sub randomelement {
    my(@array) = @_;
    return $array[rand @array];

}

#Randomization ( Calculating % identity between  DNA sequences)
#!/usr/bin/perl 
use strict;
use warnings;

my $percent;
my @percentages;
my $result;
my @random_DNA = (  );

srand(time|$$);

@random_DNA = make_random_DNA_set( 10, 10, 10 );
for (my $k = 0 ; $k < scalar @random_DNA - 1 ; ++$k) {
    for (my $i = ($k + 1) ; $i < scalar @random_DNA ; ++$i)

        $percent = matching_percentage($random_DNA[$k], $random_DNA[$i]);

        push(@percentages, $percent);
 }


$result = 0; 
foreach $percent (@percentages) {
  $result += $percent;
}

$result = $result / scalar(@percentages);
$result = int ($result * 100);

print "In this run of the experiment, the average
percentage of \n";
print "matching positions is $result%\n\n"; 

exit;

# Subroutines 
sub matching_percentage { 
    my($string1, $string2) = @_;
    my($length) = length($string1);
    my($position);
    my($count) = 0;

    for ($position=0; $position < $length ; ++$position) {
        if(substr($string1,$position,1) eq
substr($string2,$position,1)) {
            ++$count;
        }
    } 
    return $count / $length;

}

sub make_random_DNA_set {
    my($minimum_length,  $maximum_length,  $size_of_set)  =  @_;
    my $length;
    my $dna;

    my @set;
    for (my $i = 0; $i < $size_of_set ; ++$i) {
        $length = randomlength ($minimum_length, $maximum_length);
        $dna = make_random_DNA ( $length );
        push( @set, $dna );
    } 
    return @set;
}

sub randomlength {
    my($minlength, $maxlength) = @_; 
    return ( int(rand($maxlength - $minlength + 1)) + $minlength );
}

sub make_random_DNA {
    my($length) = @_; 
    my $dna; 
    for (my $i=0 ; $i < $length ; ++$i) {
        $dna .= randomnucleotide(  );
    } 
    return $dna;
}

sub randomnucleotide { 
    my(@nucleotides) = ('A', 'C', 'G', 'T');    
    return randomelement(@nucleotides);


sub randomelement { 
    my(@array) = @_; 

    return $array[rand @array];

No comments:

Post a Comment