#set up boundaries
seq 0 250000 19750000 > lefts
seq 250000 250000 20000000 > rights
paste lefts rights > bounds.txt
#now build commands to subset the vcf with VCFtools, then pipe the subset into VCF-kit for tree building
awk '{print "vcftools --vcf chr12_for_phylo.vcf --chr chrXII --from-bp "$1 " --to-bp "$2" --recode -c | vk phylo tree upgma - > window_"$1"-"$2".newick &"}' bounds.txt > treeBuildCommands
#check the commands (should have 80 lines)
head treeBuildCommands
wc -l treeBuildCommands
#Double-check you are in an idev session
#If you are in an idev session, and had 80 lines, print to screen and execute
sh treeBuildCommands
#check that you have all trees
ls window*.newick | wc -l
#compile the resulting trees into a single file
#they need to be in order, so we can just simply cat them
#read bounds.txt again to cat them in order
echo ">allTrees.newick" >catTrees
while read bound
do left=$(echo $bound | cut -d " " -f 1)
right=$(echo $bound | cut -d " " -f 2)
echo "cat window_${left}-${right}.newick | tr -d '\n' >> allTrees.newick && echo '' >>allTrees.newick" >> catTrees
done<bounds.txt
#execute the catting commands
sh catTrees
#check that the input is correct size (should be 80)
wc -l allTrees.newick
#now run Twisst on them
~/twisst/twisst.py -t allTrees.newick\
-w topology_weights.tsv\
-g Pun_X -g Pun_Y -g Sin -g Tym\
--groupsFile groups_file.txt\
--method threshold\
--thresholdTable ~/twisst/threshold_tables/binom_threshold_wilson_CI95_0.1.tsv
#send the results to personal computer to plot with plot_twisst_results.R
|