click on the Biowiki logo to go to homepage Home Home | EditEdit | Attach Attach | New New | Site Map Site Map | Help Help
Research | Teaching | Fall10
Biowiki > Fall10 > TWikiUsers > AleoMok > AleoMokHomework06

Search

Advanced search...

Topics
BioE131
BioE241
%SESSION_IF_AUTHENTICATED% %GET_SESSION_VARIABLE{AUTHUSER}% %SESSION_ELSE% Login
Register %SESSION_ENDIF%
Changes
Help

-- AleoMok - 05 Oct 2010

Return to Aleo Mok's Page: AleoMok

Homework 06 (collaborated with James Pollard)

nwdef.jpg

SCHEME 1 GRADING

File Available Here

hw6.pl.txt

Code

#! /usr/bin/perl -w

# check if there is the correct number of arguments
die "Error: incorrect number of arguments\n" unless (@ARGV == 2);

# check if the arguments are valid files
die "Error: filename \"$ARGV[0]\" does not exist\n" unless (-e $ARGV[0]);
die "Error: filename \"$ARGV[1]\" does not exist\n" unless (-e $ARGV[1]);

# check if the files are readable
die "Error: cannot read filename \"$ARGV[0]\"\n" unless (-r $ARGV[0]);
die "Error: cannot read filename \"$ARGV[1]\"\n" unless (-r $ARGV[1]);

# interpret first file (matrix file)
open MATRIX, $ARGV[0];
$line = <MATRIX>;
chomp($line);
die "Error: invalid matrix format\n" unless ($line =~ /[^a-z]+/i);
@letters = split(/ +/, $line);
for ($x = 0; $x < @letters; $x++) {
   $line = <MATRIX>;
   chomp($line);
   @values = split(/ +/, $line);
   die "Error: invalid matrix values\n" unless (@values == @letters);
   for ($y = 0; $y < @values; $y++) {
      $pair = $letters[$x] . $letters[$y];
      $matrix{$pair} = $values[$y];
   }
}
$line = <MATRIX>;
chomp($line);
die "Error: invalid gap line\n" unless ($line =~ /^[+-]?\d+$/);
$gap = $line;
if ($line = <MATRIX>) {
   die "Error: too many lines in matrix file\n";
}
close MATRIX;

# interpret second file (string file)
open STRINGS, $ARGV[1];
$numseq = 1;
$seq1 = "";
$seq2 = "";
$line = <STRINGS>;
$name1 = substr($line, 1);
chomp($name1);
while ($line = <STRINGS>) { 
   if ($line =~ /^\>/) {
      $name2 = substr($line, 1);
      chomp($name2);
      $numseq++;
      last;
   }
   chomp($line);
   $seq1 .= $line;
}
while ($line = <STRINGS>) {
   if ($line =~ /^\>/) {
      last;
   }
   chomp($line);
   $seq2 .= $line;
}
die "Error: not enough sequences $numseq\n" unless ($numseq == 2);
close STRINGS;

# check validity of sequences
$letters = "";
foreach $ltr (@letters) {
   $letters .= $ltr;
}
die "Error: first sequence $seq1 does not correspond with letters in similarity matrix $letters\n" unless ($seq1 !~ /[^$letters]/);
die "Error: second sequence $seq2 does not correspond with letters in similarity matrix $letters\n" unless ($seq2 !~ /[^$letters]/);

# construct score matrix
@score = ();
for ($row = 0; $row <= length($seq2); $row++) {
   $score[$row][0] = $row * $gap;
}
for ($col = 1; $col <= length($seq1); $col++) {
   $score[0][$col] = $col * $gap;
}

$row = 1;
for ($row = 1; $row <= length($seq2); $row++) {
   for ($col = 1; $col <= length($seq1); $col++) {
      $score[$row][$col] = 
         &max (   $score[$row - 1][$col - 1] + $matrix{substr($seq1, $col - 1, 1) . substr($seq2, $row - 1, 1)}, 
            $score[$row - 1][$col] + $gap,
            $score[$row][$col - 1] + $gap);
   }
}

# determine sequence
$align1 = "";
$align2 = "";
$cntr1 = length($seq1);
$cntr2 = length($seq2);
while ($cntr1 != 0 && $cntr2 != 0) {
   $val = $score[$cntr2][$cntr1];
   if ($val == $score[$cntr2 - 1][$cntr1] + $gap) {
      $align1 = '-' . $align1;
      $align2 = substr($seq2, $cntr2 - 1, 1) . $align2;
      $cntr2--;
   }
   elsif ($val == $score[$cntr2][$cntr1 - 1] + $gap) {
      $align1 = substr($seq1, $cntr1 - 1, 1) . $align1;
      $align2 = '-' . $align2;
      $cntr1--;
   }
   else {
      $align1 = substr($seq1, $cntr1 - 1, 1) . $align1;
      $align2 = substr($seq2, $cntr2 - 1, 1) . $align2;
      $cntr1--;
      $cntr2--;
   }
}

# print output
&print_matrix($seq1, $seq2, @score);
print "$name1 aligned to $name2\n";
while (length($align1) > 80) {
   print "substr($align1, 0, 80)\n";
   print "substr($align2, 0, 80)\n";
   $align1 = substr($align1, 80);
   $align2 = substr($align2, 80);
}
print "$align1\n$align2\n";

sub print_matrix {
   my ($seq1, $seq2, @score) = @_;
   print "|*|*|";
   for ($i = 0; $i < length($seq1); $i++) {
      $ltr = substr($seq1, $i, 1) . '|';
      print $ltr;
   }
   print "\n|*|";
   for ($j = 0; $j <= length($seq1); $j++) {
      $ltr = $score[0][$j] . '|';
      print $ltr;
   }
   print "\n";
   for ($row = 1; $row <= length($seq2); $row++) {
      $ltr = '|' . substr($seq2, $row - 1, 1) . '|';
      print "$ltr";
      for ($col = 0; $col <= length($seq1); $col++) {
         print "$score[$row][$col]|";
      }
      print "\n";
   }
}

sub max {
   my @array = @_;
   my $max = $array[0];
   foreach $num (@array) {
      if ($num > $max) {
         $max = $num;
      }
   }
   return $max;
}
I Attachment History Action Size Date Who Comment
Txttxt hw6.pl.txt r1 manage 4.0 K 2010-11-06 - 00:12 UnknownUser  
Jpgjpg nwdef.jpg r1 manage 10.6 K 2010-11-06 - 00:11 UnknownUser  
Actions: Edit | Attach | New | Ref-By | Printable view | Raw view | Normal view | See diffs | Help | More...