Chapter 3 RNA-seq
3.1 Preprocessing
#!/bin/bash
#SBATCH -p CPU # partition (queue)
#SBATCH --job-name=STAR
#SBATCH -n 40
#SBATCH --array=1-2
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/anno.%A_%a.out # STDOUT
#SBATCH -e _log/anno.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address
id=`sed -n ${SLURM_ARRAY_TASK_ID}p sample.txt`
echo "${id}"
fq_path=.
out_path=.
fq1=${fq_path}/${id}_1.clean.fq.gz
fq2=${fq_path}/${id}_2.clean.fq.gz
star_index=star_v43_2.7.9a
mkdir ${out_path}/${id}
STAR --runThreadN 40 \
--genomeDir ${star_index} \
--outSAMtype BAM SortedByCoordinate \
--readFilesIn ${fq1} ${fq2} \
--readFilesCommand zcat \
--outFileNamePrefix ${out_path}/${id}/${id}. \
--runMode alignReads \
--outFilterMultimapNmax 1000 \
--outSAMmultNmax 1 \
--outFilterMismatchNmax 3 \
--outMultimapperOrder Random \
--winAnchorMultimapNmax 1000 \
--alignEndsType EndToEnd \
--alignIntronMax 1 \
--alignMatesGapMax 3503.2 Salmon
#!/bin/bash
#SBATCH -p SVC # partition (queue)
#SBATCH --job-name=p53
#SBATCH -n 8
#SBATCH --array=1-2
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/salmon.%A_%a.out # STDOUT
#SBATCH -e _log/salmon.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address
id=`sed -n ${SLURM_ARRAY_TASK_ID}p sample.txt`
echo "${id}"
fq_path=.
fq1=${fq_path}/${id}_1.clean.fq.gz
fq2=${fq_path}/${id}_2.clean.fq.gz
gtf_file=gencode.v43.annotation.gtf
salmon_index=gencode.v43_salmon-1.10
out_path=salmon_1.10
$SALMON_1_10 quant -p 8 -l IU -i ${salmon_index} -o ${out_path}/${id} -1 ${fq1} -2 ${fq2} -g ${gtf_file} --gcBias --validateMappings3.3 ERVmap
#!/bin/bash
#SBATCH -p CPU # partition (queue)
#SBATCH --job-name=NSC
#SBATCH -n 40
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/rna.%A_%a.out # STDOUT
#SBATCH -e _log/rna.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address
source activate ERVmap
bwa index -p bwa_genome/genome bwa_genome/genome.fa
bowtie2-build Bowtie2_genome/genome.fa Bowtie2_genome/genome
mkdir -p _log ${OUT_PATH}/fastq
FQ1=${FQ_PATH}/${ID}_1.clean.fq.gz
FQ2=${FQ_PATH}/${ID}_2.clean.fq.gz
perl ${SCRIPTS}/interleaved.pl --read1 ${FQ1} --read2 ${FQ2} | gzip -c > ${OUT_PATH}/fastq/${ID}.fastq.gz
perl ${SCRIPTS}/erv_genome.pl \
-start_stage 1 -end_stage 6 \
--fastq ${OUT_PATH}/fastq/${ID}.fastq.gz \
--genome ${REF}/bwa_genome/genome \
--genome_Bowtie2 ${REF}/Bowtie2_genome/genome \
--bed ${REF}/ERVmap.bed \
--genomefile ${REF}/GRCh38.genome_file.txt \
--gtf ${REF}/genes.gtf \
--transcriptome ${REF}/Bowtie2_genome/known \
--adaptor ${REF}/illumina_adapter.txt \
--filter ${SCRIPTS}/parse_bam.pl \
--cell ${ID}_working
#!/bin/bash
#SBATCH -p SVC # partition (queue)
#SBATCH --job-name=STAR
#SBATCH -n 8
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/anno.%A_%a.out # STDOUT
#SBATCH -e _log/anno.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address
erv_file=db/rmsk.ERV.saf
out_path=db/star
index_path=db/star_v43_2.7.9a
featureCounts -M -F SAF -T 40 -s 2 -p -a ${erv_file} -o ${out_path}/total.featureCounts.txt ${out_path}/*/*.Aligned.sortedByCoord.out.bam