...
Code Block | ||||
---|---|---|---|---|
| ||||
#start an idev session if you have not idev #make a directory to do the analysis in cds mkdir rad_intro cd rad_intro mkdir in_silico_digest cd in_silico_digest #download the reference from NCBI wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/222/465/GCF_000222465.1_Adig_1.1/GCF_000222465.1_Adig_1.1_genomic.fna.gz #decompress it gunzip GCF_000222465.1_Adig_1.1_genomic.fna.gz #check the command on some different max insert sizes cat GCF_000222465.1_Adig_1.1_genomic.fna | tr -d "\n" | grep -o -E "GCATGC.{300,400}CCGG" | wc -l cat GCF_000222465.1_Adig_1.1_genomic.fna | tr -d "\n" | grep -o -E "GCATGC.{300,500}CCGG" | wc -l cat GCF_000222465.1_Adig_1.1_genomic.fna | tr -d "\n" | grep -o -E "GCATGC.{300,600}CCGG" | wc -l #use a loop to run command for a series of maximum insert sizes seq 400 100 1000 > maxInserts.txt echo -e "insert_range\tNsites" > prediction_results.txt while read max; do nSites=$(cat GCF_000222465.1_Adig_1.1_genomic.fna | tr -d "\n" | grep -o -E "GCATGC.{300,${max}}CCGG" | wc -l); echo -e "300-${max}\t${nSites}" >> prediction_results.txt; done < maxInserts.txt #look at the results cat prediction_results.txt |