#!/usr/bin/perl -w my $window = 200; my $step = 50; my $pfolddir = glob "~/pfold/bin"; my $grammar = "$pfolddir/article.grm"; my $tmpfasta = "TMPFASTA$$"; my $verbose = 0; sub cleanup { unlink $tmpfasta if -e $tmpfasta; unlink "$tmpfasta.col" if -e "$tmpfasta.col"; } END { cleanup() } $SIG{"INT"} = sub { cleanup() }; $SIG{"KILL"} = sub { cleanup() }; my $newick2col = "newick2col"; my $fasta2col = "fasta2col"; my $scfg = "scfg"; # my $tkfalign = "tkfalign"; my $usage = "\n"; $usage .= "Usage: $0 \n"; $usage .= " [-v] verbose\n"; $usage .= " [-w ] window size; default is $window\n"; $usage .= " [-s ] step size; default is $step\n"; $usage .= " [-p ] PFOLD binaries directory; default is '$pfolddir'\n"; $usage .= "[-g ] grammar filename; default is '$grammar'\n"; $usage .= "[-t ] temporary FASTA filename; default is '$tmpfasta'\n"; $usage .= "\n"; my @argv; while (@ARGV) { my $arg = shift; if ($arg =~ /^-/) { if ($arg eq "-v") { $verbose = 1 } elsif ($arg eq "-w") { defined ($window = shift) or die $usage } elsif ($arg eq "-s") { defined ($step = shift) or die $usage } elsif ($arg eq "-p") { defined ($pfolddir = shift) or die $usage } elsif ($arg eq "-g") { defined ($grammar = shift) or die $usage } elsif ($arg eq "-t") { defined ($tmpfasta = shift) or die $usage } else { die $usage } } else { push @argv, $arg; } } die $usage unless @argv == 2; my ($newick, $fasta) = @argv; # convert Newick to col system "$pfolddir/$newick2col $newick >$newick.col"; # read FASTA file my %seq; my @name; my $name; open FASTA, "<$fasta" or die "Couldn't open '$fasta': $!"; while () { if (/^\s*>\s*(\S+)/) { $name = $1; push @name, $name; } else { s/\s//g; die "Bad FASTA file" if /\S/ && !defined $name; $seq{$name} .= $_; } } close FASTA; # check all seqs are same length my $length; foreach my $name (@name) { if (defined $length) { die "Sequences not all same length" unless $length == length $seq{$name}; } else { $length = length $seq{$name}; } } # scan window through file for (my $pos = 0; $pos < $length - $window; $pos += $step) { # create temporary FASTA & col files open TMPFASTA, ">$tmpfasta" or die "Couldn't open '$tmpfasta': $!"; my $subseqlen; foreach my $name (@name) { my $subseq = substr ($seq{$name}, $pos, $window); $subseqlen = length $subseq; print TMPFASTA ">$name\n", $subseq, "\n"; } close TMPFASTA or die "Couldn't open '$tmpfasta': $!"; warn "[column $pos+$subseqlen]\n" if $verbose; system "$pfolddir/$fasta2col $tmpfasta >$tmpfasta.col"; # run PFOLD my $command = "$pfolddir/$scfg --treefile $newick.col $grammar $tmpfasta.col"; warn "[running '$command']\n" if $verbose; open PFOLD, "$command|" or die "Couldn't run '$command': $!"; my $score; while () { warn $_ if $verbose; if (/-log\(InsideOddsRatio\)= (\S+)/) { $score = $1; last; } } close PFOLD; # print output my $prob; if (defined $score) { $prob = 1/(1+2**$score); } else { $prob = $score = ""; } print $pos, " ", $pos+$subseqlen-1, " ", $score, " ", $prob, "\n"; }