-- %TEACHINGWEB%.AleoMok - 08 Oct 2010
Return to Aleo Mok's Page:
AleoMok
Homework 03 (collaborated with Brian Ng)
File Available Here
hw3.pl.txt
Code
#! /usr/bin/perl -w
if (@ARGV == 1) {
die "Error: invalid argument\n" unless ($ARGV[0] eq "-h");
# display help message
print "Help:\n";
print "This program may take in 0, 2, or 4 arguments. (or -h for help)\n";
print "The first argument may be \"-calc\" or \"-load\". The second argument must be a file to read.\n";
print "If the first argument is \"-calc\" the second argument must be a FASTA file that can be read.\n";
print "If the first argument is \"-load\" the second argument must be a file that can be read in the following format:\n";
print ">output size = \"number of base-pairs\" | nucleotide composition A/C/G/T = \"A-percentage/C-percentage/G-percentage/T-percentage\"\n";
print "The third argument must be \"-save\". The fourth argument must be a file to save the data.\n";
}
else {
$size = 0; # number of base pairs
@ratio = (0, 0, 0, 0); # nucleotide composition A/C/G/T
if (@ARGV == 0) {
$size = 1000;
@ratio = (25, 25, 25, 25);
}
else {
die "Error: incorrect number of arguments\n" unless (@ARGV == 2 || @ARGV == 4);
die "Error: invalid first argument \"$ARGV[0]\"\n" unless ($ARGV[0] eq "-calc" || $ARGV[0] eq "-load");
die "Error: filename \"$ARGV[1]\" does not exist\n" unless (-e $ARGV[1]);
die "Error: filename \"$ARGV[1]\" is not readable\n" unless (-r $ARGV[1]);
open FILE, $ARGV[1];
if ($ARGV[0] eq "-calc") {
$line = <FILE>;
die "Error: filename \"$ARGV[1]\" is not in proper FASTA format\n" unless ($line =~ /^\>/);
# interpret FASTA file
$seq = "";
while ($line = <FILE>) {
chomp($line);
$seq .= uc($line);
}
$size += length($seq);
@seq = split(//, $seq);
foreach $bp (@seq) {
if ($bp eq "A") {
$ratio[0]++;
}
elsif ($bp eq "C") {
$ratio[1]++;
}
elsif ($bp eq "G") {
$ratio[2]++
}
elsif ($bp eq "T") {
$ratio[3]++
}
else {
die "Error: $bp is not a valid DNA nucleotide\n";
}
}
foreach $num (@ratio) {
$num = int(($num / $size) * 100 + 0.5);
}
}
else {
# interpret composition file
@comp = split(/ /, <FILE>);
$size = $comp[3];
@ratio = ($comp[10] =~ m/(\d+)/g);
}
}
close FILE or die $!;
if (@ARGV == 4) {
die "Error: invalid third argument \"$ARGV[2]\"\n" unless ($ARGV[2] eq "-save");
die "Error: filename \"$ARGV[3]\" does not exist\n" unless (-e $ARGV[3]);
die "Error: filename \"$ARGV[3]\" is not writeable\n" unless (-w $ARGV[3]);
$writefile = ">" . $ARGV[3];
open WRITEFILE, $writefile;
print WRITEFILE &Label($size - 6, @ratio);
close WRITEFILE or die $!;
}
print &Label($size, @ratio);
&PrintRandSequence($size, @ratio);
}
sub Label {
my($size, @ratio) = @_;
return ">output size = $size bp | nucleotide composition A/C/G/T = $ratio[0]%/$ratio[1]%/$ratio[2]%/$ratio[3]%\n";
}
sub PrintRandSequence {
my($size, @ratio) = @_;
$seq = "ATG";
$Abound = $ratio[0] - 1;
$Cbound = $Abound + $ratio[1];
$Gbound = $Cbound + $ratio[2];
while ($size > 0) {
$randnum = int(rand(100));
if ($randnum <= $Abound) {
$seq .= "A";
}
elsif ($randnum <= $Cbound) {
$seq .= "C";
}
elsif ($randnum <= $Gbound) {
$seq .= "G";
}
else {
$seq .= "T";
}
$size--;
if (length($seq) % 3 == 0) {
$codon = substr $seq, -3;
if ($codon eq "TAG" || $codon eq "TAA" || $codon eq "TGA") {
$size += 3;
$seq = substr $seq, 0, -3;
}
}
}
$seq .= "TAG";
while (length($seq) > 80) {
$temp = substr $seq, 0, 80;
print "$temp\n";
$seq = substr $seq, 80;
}
print "$seq\n";
}

Copyright © 2008-2013 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki?
Send feedback