#!/usr/bin/perl # This program implements the Nussinov recursion to maximize the number of # base pairs in an RNA sequence, which is input as a command-line argument. # Examine and parse the sequence die "Error: Please enter RNA sequence as argument.\n" unless defined ($ARGV[0]); $seq = $ARGV[0]; $seq = uc($seq); @s = split //, " " . $seq; $_ = $seq; if (/[BDEFHIJKLMNOPQRSTVWXYZ]/) { die "Error: Sequence contains characters other than A, C, G, U.\n"; } $len = length($seq); print "Length is $len\n"; # Initialization: Where the position is j=i or j=i-1, the score is 0 for($i = 1; $i <= $len; $i++) { $posScore[$i][$i-1] = 0; } for($i = 1; $i <= $len; $i++) { $posScore[$i][$i] = 0; } # Recursion: Loop through all possible pairs of bases for($position = 2; $position <= $len; $position++) { $i = 0; for($j = $position; $j <= $len; $j++) { $i++; # Base pair scores defined for self pairing (0) and Watson-Crick pairing (1) $bpScore[$i,$j] = 0; if ($s[$i] eq 'A' && $s[$j] eq 'U') {$bpScore[$i,$j] = 1} elsif($s[$i] eq 'C' && $s[$j] eq 'G') {$bpScore[$i,$j] = 1} elsif($s[$i] eq 'G' && $s[$j] eq 'C') {$bpScore[$i,$j] = 1} elsif($s[$i] eq 'U' && $s[$j] eq 'A') {$bpScore[$i,$j] = 1} # Maximization over four possible ways of getting the optimal substructure $posScore[$i][$j] = $posScore[$i+1][$j]; # i unpaired if($posScore[$i][$j-1] > $posScore[$i][$j]) { $posScore[$i][$j] = $posScore[$i][$j-1] } # j unpaired if($posScore[$i+1][$j-1]+$bpScore[$i,$j] > $posScore[$i][$j]) { $posScore[$i][$j] = $posScore[$i+1][$j-1]+$bpScore[$i,$j] } # base pair for($k = $i+1; $k < $j; $k++) { if($posScore[$i][$k]+$posScore[$k+1][$j] > $posScore[$i][$j]) { $posScore[$i][$j] = $posScore[$i][$k]+$posScore[$k+1][$j] } # bifurcation } } } print "Maximum number of Watson-Crick base pairs is $posScore[1][$len]\n";