########################################################################### # Variables ########################################################################### # qsub QSUB = qsub -cwd -v PATH,PERL5LIB,DARTDIR -b y QSUBNULL = $(QSUB) -o /dev/null -e /dev/null JID = | fields 2 > # xrate XRATE = xrate # MAVID alignment directories ALIGNDIR = align/12fly/mavid MERCDIR = align/12fly/mercator # executables FASTA2STOCKHOLM = fasta2stockholm.pl # use GNU make functions to build lists of MAVID subdirs MAVIDFILES = $(wildcard $(ALIGNDIR)/*/mavid.mfa) MAVIDDIRS = $(dir $(MAVIDFILES)) MAVIDSTOCK = $(addsuffix mavid.stock,$(MAVIDDIRS)) MAVIDHKY85 = $(addsuffix mavid.hky85,$(MAVIDDIRS)) ########################################################################### # Format conversion ########################################################################### # conversion from FASTA to Stockholm format $(ALIGNDIR)/%/mavid.stock: $(ALIGNDIR)/%/mavid.mfa $(FASTA2STOCKHOLM) $< >$@ # pseudotarget representing all FASTA-->Stockholm conversions all-stock: $(MAVIDSTOCK) # featurevole.pl annot/featurevole/%.gff: annot/fb4.3/Dmel/AllChr/%.gff cat $< | featurevole.pl DroMel_CAF1 -genomes $(MERCDIR)/genomes -map $(MERCDIR)/hacked-map -align $(ALIGNDIR) >$@ # gff2dirs.pl annot/featurevole/.%.gff2dirs: annot/featurevole/%.gff gff2dirs.pl -gff $< -align $(ALIGNDIR) -out $*.gff touch $@ # paintstock.pl %.paintstock: %.gff paintstock.pl -gff $< -align $(dir $<)/mavid.stock -out $@ ########################################################################### # Alignment sorting ########################################################################### # Create Pollard tree TMPHTML = tmp.html # Parse 9-fly median tree out of Dan Pollard's website # (commented out to prevent accidental deletion via rebuild from broken remote dependency, aka "fuckup") #tree/Pollard-median-time.nh: # wget -O $(TMPHTML) http://rana.lbl.gov/~dan/trees.html # cat $(TMPHTML) | grep dmel: | lines 1 | perl -pe 's/<[^>]*>//g;s/^\s*//' >$@ # rm $(TMPHTML) # Hack names to match CAF1/MAVID tree/%.hacked-CAF1.nh: tree/%.nh perl -pe 'while(/d([a-z])([a-z][a-z])/){($$s,$$pe)=($$1,$$2);$$S=uc$$s;s/d$$s$$pe/Dro$$S$${pe}_CAF1/}' $< >$@ # add the 3 missing species (a bit hacky) tree/median.nh: tree/Pollard-median-time.hacked-CAF1.nh cat $< | perl -pe '$$s="0.000001";sub b{($$old,$$new)=@_;s/(Dro$$old[^:]+)(:[0-9\.]+)/($$1$$2,Dro$$new$$2):$$s/}b(qw(Sim Sec));b(qw(Pse Per));s/\((.*\d), (\(+DroMoj.*)\);/(($$1, DroWil_CAF1:0.5):$$s, $$2);/' >$@ # Add median tree to all MAVID alignments %/mavid.median: %/mavid.stock tree/median.nh addtree.pl tree/median.nh $*/mavid.stock >$@ ########################################################################### # Trees ########################################################################### # Starting from median tree, estimate all branch lengths using a flat substitution model (JC69) %/mavid.jc69: %/mavid.median $(XRATE) $< -e eg/jc69.eg --noannotate >$@ # Using median tree, fit HKY85 model individually to each alignment %/hky85.eg: %/mavid.jc69 $(XRATE) $< -g eg/hky85.eg -t $@ >/dev/null # Using median tree, fit HKY85 model individually to each alignment, # STARTING FROM LOW BETA SEED (gives marginally better log-likelihoods) %/hky85-lowbeta.eg: %/mavid.jc69 $(XRATE) $< -g eg/hky85-lowbeta.eg -t $@ >/dev/null # Normalize the HKY85 model %/hky85.norm.eg: %/hky85.eg perl -pe '$$norm="(piA * (piC + piG + piT) + piC * (piG + piT) + piG * piT) * (2 + 4 * beta)";if(/mutate.*rate/){s/(\)\)\s*)$$/ \/ ($$norm)$$1/}' $< >$@ # Using each individually-fit & normalized HKY85 model, re-estimate branch lengths for each alignment %/mavid.hky85: %/hky85.norm.eg $(XRATE) $*/mavid.jc69 -e $< --noannotate >$@ ########################################################################### # Substitution rates ########################################################################### # Using the HKY85-derived branch lengths, fit REV to each alignment %.rev: %.hky85 $(XRATE) -p rev $< -t $@ >/dev/null # Using the HKY85-derived branch lengths, fit IRREV to each alignment %.irrev: %.hky85 $(XRATE) -p irrev $< -t $@ >/dev/null # Using the HKY85-derived branch lengths, fit DINUC to each alignment %.dinuc: %.hky85 $(XRATE) -g eg/dinuc.eg $< -t $@ >/dev/null ########################################################################### # Subsets ########################################################################### # Select a random 1% of alignments with HKY85-derived trees 1pc/mavid.hky85: $(MAVIDHKY85) test -d 1pc || mkdir 1pc perl -e '@a=@ARGV;$$n=int(@a/100);for$$i(0..$$n-1){$$j=$$i+int rand(@a-1-$$i);$$d=$$a[$$j];$$a[$$j]=$$a[$$i];$$d=~s/\///;print"# STOCKHOLM 1.0\n#=GF ID $$d\n";system"cat $$d/mavid.tree | lines 2-"}' $(MAVIDDIRS) >$@ ########################################################################### # Tests ########################################################################### # test GNU make functions testenv: echo $(MAVIDFILES) echo $(MAVIDDIRS) # don't delete secondary targets .SECONDARY: