2022 Example alignment work file
Below is an example "work" file.
#!/bin/bash # --------------------------------------------------------------------------------- # This file shows steps taken to process October 2014 Science Park GBM data. # It is a record of what was done, NOT meant to be run as an actual script. # --------------------------------------------------------------------------------- # ============================================================= # At TACC (Ls5) # ============================================================= # Set up environment export PATH=/work/01063/abattenh/local/bin:$PATH # Download NGS1140, ID19541 data from Science Park to $SCRATCH/seq/original/tmp_hold at TACC cd $SCRATCH/seq/original/tmp_hold wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_CTCF_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_H3K4Me3_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_RNAP2_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_H3K4Me1_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_RNAP2_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_H3K27Ac_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_H3K9Ac_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_CTCF_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_H3K4Me3_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_input_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_H3K27Me3_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD01c_H3K9Me3_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_H3K27Ac_B2.tar wget --user=viyer --password=SecretPassword https://spi.mdanderson.org/viyer2/20141007_NGS1140/Sample_SD06_H3K9Ac_B2.tar # on a login node cd $SCRATCH/seq/original/tmp_hold rm -rf tars mkdir tars for f in *.tar; do tar -xvf $f mv $f tars/ done mkdir -p $SCRATCH/seq/original/Bc_Illumina/Oct14.SciPark/ID19541 cd $SCRATCH/seq/original/tmp_hold mv Sample_* $SCRATCH/seq/original/Bc_Illumina/Oct14.SciPark/ID19541 # ---------------------------------- # Alignment prep # ---------------------------------- mkdir -p $SCRATCH/seq/align/2014_10.sciPk/fq mkdir -p $SCRATCH/seq/align/2014_10.sciPk/hg19/gbm cd $SCRATCH/seq/align/2014_10.sciPk/fq find $SCRATCH/seq/original/Bc_Illumina/Oct14.SciPark/ID19541 -name "*.gz" | xargs ln -s -f -t . ls -1 *_R1*gz # SD01c_CTCF_B2_CTTGTA_L008_R1_001.fastq.gz # SD01c_H3K27Ac_B2_TACAGC_L008_R1_001.fastq.gz # SD01c_H3K27Me3_B2_CTATAC_L008_R1_001.fastq.gz # SD01c_H3K4Me3_B2_ATGAGC_L008_R1_001.fastq.gz # SD01c_H3K9Ac_B2_GCGCTA_L008_R1_001.fastq.gz # SD01c_H3K9Me3_B2_CACCGG_L008_R1_001.fastq.gz # SD01c_RNAP2_B2_CCGTCC_L008_R1_001.fastq.gz # SD06_CTCF_B2_CAACTA_L007_R1_001.fastq.gz # SD06_H3K27Ac_B2_TAATCG_L007_R1_001.fastq.gz # SD06_H3K4Me1_B2_TATAAT_L007_R1_001.fastq.gz # SD06_H3K4Me3_B2_ACTTGA_L007_R1_001.fastq.gz # SD06_H3K9Ac_B2_CAGGCG_L007_R1_001.fastq.gz # SD06_input_B2_TCATTC_L007_R1_001.fastq.gz # SD06_RNAP2_B2_CACGAT_L007_R1_001.fastq.gz # count.cmds for f in SD01c_[CR]*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done for f in SD01c_H3K27*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done for f in SD01c_H3K[49]*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done for f in SD06_[CRi]*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done for f in SD06_H3K[29]*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done for f in SD06_H3K4*_R[12]_001.fastq.gz; do echo -e "$((`zcat $f | wc -l` / 4))\t$f" | tee ${f}_stats.txt; done cd $SCRATCH/seq/align/2014_10.sciPk/fq launcher_maker.py -t 1 -n count.cmds -w 8 -q dev -v -a CancerGenetics sbatch count.slurm; showq -u ls *.gz_stats.txt | xargs cat > fq_stats.txt rm *.gz_stats.txt # fqc.cmds fastqc SD01c_[CR]*_R[12]_001.fastq.gz fastqc SD01c_H3K27*_R[12]_001.fastq.gz fastqc SD01c_H3K[49]*_R[12]_001.fastq.gz fastqc SD06_[CRi]*_R[12]_001.fastq.gz fastqc SD06_H3K[29]*_R[12]_001.fastq.gz fastqc SD06_H3K4*_R[12]_001.fastq.gz # done cd $SCRATCH/seq/align/2014_10.sciPk/fq launcher_maker.py -t 4 -n fqc.cmds -w 8 -v -a CancerGenetics -m fastqc sbatch fqc.slurm; showq -u rm *.zip # ----------------------------------------------- # Align Amelia's SciPk GBM ChIP # ----------------------------------------------- # sync code rsync -avrP --delete --exclude=.git abattenh@iyerstor01.icmb.utexas.edu:/home/abattenhouse/gitdir/bioiteam/bioi/ $STOCKYARD/gitdir/bioiteam/bioi/ rsync -avrP --delete --exclude=CVS abattenh@iyerstor01.icmb.utexas.edu:/home/abattenhouse/sequencing/code/ $STOCKYARD/seq/code/ cd $SCRATCH/seq/align/2014_10.sciPk/hg19/gbm ln -s -f ../../fq ls -1 *_R1*gz # SD01c_CTCF_B2_CTTGTA_L008_R1_001.fastq.gz # SD01c_H3K27Ac_B2_TACAGC_L008_R1_001.fastq.gz # SD01c_H3K27Me3_B2_CTATAC_L008_R1_001.fastq.gz # SD01c_H3K4Me3_B2_ATGAGC_L008_R1_001.fastq.gz # SD01c_H3K9Ac_B2_GCGCTA_L008_R1_001.fastq.gz # SD01c_H3K9Me3_B2_CACCGG_L008_R1_001.fastq.gz # SD01c_RNAP2_B2_CCGTCC_L008_R1_001.fastq.gz # SD06_CTCF_B2_CAACTA_L007_R1_001.fastq.gz # SD06_H3K27Ac_B2_TAATCG_L007_R1_001.fastq.gz # SD06_H3K4Me1_B2_TATAAT_L007_R1_001.fastq.gz # SD06_H3K4Me3_B2_ACTTGA_L007_R1_001.fastq.gz # SD06_H3K9Ac_B2_CAGGCG_L007_R1_001.fastq.gz # SD06_input_B2_TCATTC_L007_R1_001.fastq.gz # SD06_RNAP2_B2_CACGAT_L007_R1_001.fastq.gz # aln.cmds /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_CTCF_B2_CTTGTA_L008_R1_001.fastq.gz sd01_ctcf_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_H3K27Ac_B2_TACAGC_L008_R1_001.fastq.gz sd01_h3k27ac_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_H3K27Me3_B2_CTATAC_L008_R1_001.fastq.gz sd01_h3k27me3_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_H3K4Me3_B2_ATGAGC_L008_R1_001.fastq.gz sd01_h3k4me3_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_H3K9Ac_B2_GCGCTA_L008_R1_001.fastq.gz sd01_h3k9ac_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_H3K9Me3_B2_CACCGG_L008_R1_001.fastq.gz sd01_h3k9me3_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD01c_RNAP2_B2_CCGTCC_L008_R1_001.fastq.gz sd01_pol2_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_CTCF_B2_CAACTA_L007_R1_001.fastq.gz sd06_ctcf_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_H3K27Ac_B2_TAATCG_L007_R1_001.fastq.gz sd06_h3k27ac_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_H3K4Me1_B2_TATAAT_L007_R1_001.fastq.gz sd06_h3k4me1_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_H3K4Me3_B2_ACTTGA_L007_R1_001.fastq.gz sd06_h3k4me3_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_H3K9Ac_B2_CAGGCG_L007_R1_001.fastq.gz sd06_h3k9ac_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_input_B2_TCATTC_L007_R1_001.fastq.gz sd06_input_b2 hg19 1 50 /work/projects/BioITeam/common/script/script/align_bwa_illumina.sh global ./fq/SD06_RNAP2_B2_CACGAT_L007_R1_001.fastq.gz sd06_pol2_b2 hg19 1 50 launcher_maker.py -t 24 -n aln.cmds -w 2 -v -a CancerGenetics sbatch aln.slurm; showq -u # combine sd06 inputs mkdir -p $SCRATCH/seq/align/2014_10.sciPk/hg19/gbm/merge cd $SCRATCH/seq/align/2014_10.sciPk/hg19/gbm/merge ln -s ../sd06_input_b2.sort.dup.bam cp -p $IYC/seq/align/2013_07.awh_scipk/hg19/amelia/sd06a_input_b1.sort.dup.bam . # merge.cmds /work/projects/BioITeam/common/script/script/merge_sorted_bams.sh sd06_input_cmb sd06*_input_b[12].sort.dup.bam launcher_maker.py -t 6 -n merge.cmds -w 2 -v -a CancerGenetics sbatch merge.slurm; showq -u # ----------------------------------------------- # Fix file permissions then rsync back # ----------------------------------------------- cd $SCRATCH/seq/original/Bc_Illumina/Oct14.SciPark/ find . -type d | xargs chmod +x chmod -R +r * rsync -avrP $SCRATCH/seq/original/Bc_Illumina/Oct14.SciPark/ \ abattenh@iyercomp03.ccbb.utexas.edu:/seq/original/Bc_Illumina/2014/2014_10.SciPark/ cd $SCRATCH/seq/align/2014_10.sciPk find . -type d | xargs chmod +x chmod -R +r * rsync -avrP $SCRATCH/seq/align/2014_10.sciPk/ abattenh@iyercomp03.ccbb.utexas.edu:/seq/align/2014_10.sciPk/
Welcome to the University Wiki Service! Please use your IID (yourEID@eid.utexas.edu) when prompted for your email address during login or click here to enter your EID. If you are experiencing any issues loading content on pages, please try these steps to clear your browser cache.