# This command just counts the # of called variants in test.raw.vcf (from individual NA12878)
grep -c -v '^#' test.raw.vcf
# This command just counts the # of called variants in all 3 individuals
grep -c -v '^#' $BI/ngs_course/human_variation/all.samtools.vcf
# Found out how many are common between the two
intersectBed -a test.raw.vcf -b $BI/ngs_course/human_variation/all.samtools.vcf | wc -l
# Take all those that are not in all.samtools.vcf and examine their quality (in field 6 of the vcf file)
subtractBed -a test.raw.vcf -b $BI/ngs_course/human_variation/all.samtools.vcf | \
awk 'BEGIN {max=0} {sum+=$6; if ($6>max) {max=$6}} END {print "Average qual: "sum/NR "\tMax qual: " max}'
# Look at all the qualities from the NA12878 variants
grep -v '^#' test.raw.vcf | awk 'BEGIN {max=0} {sum+=$6; if ($6>max) {max=$6}} END {print "Average qual: "sum/NR "\tMax qual: " max}'
|