Chapter 3 RNA-seq

3.1 Preprocessing

#!/bin/bash
#SBATCH -p CPU # partition (queue)
#SBATCH --job-name=STAR
#SBATCH -n 40
#SBATCH --array=1-2
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/anno.%A_%a.out # STDOUT
#SBATCH -e _log/anno.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address

id=`sed -n ${SLURM_ARRAY_TASK_ID}p sample.txt`
echo "${id}"

fq_path=.
out_path=.

fq1=${fq_path}/${id}_1.clean.fq.gz
fq2=${fq_path}/${id}_2.clean.fq.gz

star_index=star_v43_2.7.9a

mkdir ${out_path}/${id}
STAR --runThreadN 40 \
  --genomeDir ${star_index} \
  --outSAMtype BAM SortedByCoordinate \
  --readFilesIn ${fq1} ${fq2} \
  --readFilesCommand zcat \
  --outFileNamePrefix ${out_path}/${id}/${id}. \
  --runMode alignReads \
  --outFilterMultimapNmax 1000 \
  --outSAMmultNmax 1 \
  --outFilterMismatchNmax 3 \
  --outMultimapperOrder Random \
  --winAnchorMultimapNmax 1000 \
  --alignEndsType EndToEnd \
  --alignIntronMax 1 \
  --alignMatesGapMax 350

3.2 Salmon

#!/bin/bash
#SBATCH -p SVC # partition (queue)
#SBATCH --job-name=p53
#SBATCH -n 8
#SBATCH --array=1-2
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/salmon.%A_%a.out # STDOUT
#SBATCH -e _log/salmon.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address


id=`sed -n ${SLURM_ARRAY_TASK_ID}p sample.txt`
echo "${id}"

fq_path=.

fq1=${fq_path}/${id}_1.clean.fq.gz
fq2=${fq_path}/${id}_2.clean.fq.gz

gtf_file=gencode.v43.annotation.gtf
salmon_index=gencode.v43_salmon-1.10
out_path=salmon_1.10

$SALMON_1_10 quant -p 8 -l IU -i ${salmon_index} -o ${out_path}/${id} -1 ${fq1} -2 ${fq2} -g ${gtf_file} --gcBias --validateMappings

3.3 ERVmap

#!/bin/bash
#SBATCH -p CPU # partition (queue)
#SBATCH --job-name=NSC
#SBATCH -n 40
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/rna.%A_%a.out # STDOUT
#SBATCH -e _log/rna.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address

source activate ERVmap

bwa index -p bwa_genome/genome bwa_genome/genome.fa
bowtie2-build Bowtie2_genome/genome.fa Bowtie2_genome/genome




mkdir -p _log ${OUT_PATH}/fastq

FQ1=${FQ_PATH}/${ID}_1.clean.fq.gz
FQ2=${FQ_PATH}/${ID}_2.clean.fq.gz

perl ${SCRIPTS}/interleaved.pl --read1 ${FQ1} --read2 ${FQ2} | gzip -c > ${OUT_PATH}/fastq/${ID}.fastq.gz

perl ${SCRIPTS}/erv_genome.pl \
  -start_stage 1 -end_stage 6 \
  --fastq ${OUT_PATH}/fastq/${ID}.fastq.gz \
  --genome ${REF}/bwa_genome/genome \
  --genome_Bowtie2 ${REF}/Bowtie2_genome/genome \
  --bed ${REF}/ERVmap.bed \
  --genomefile ${REF}/GRCh38.genome_file.txt \
  --gtf ${REF}/genes.gtf \
  --transcriptome ${REF}/Bowtie2_genome/known \
  --adaptor ${REF}/illumina_adapter.txt \
  --filter ${SCRIPTS}/parse_bam.pl \
  --cell ${ID}_working


#!/bin/bash
#SBATCH -p SVC # partition (queue)
#SBATCH --job-name=STAR
#SBATCH -n 8
#SBATCH -t 7-00:00 # time (D-HH:MM)
#SBATCH -o _log/anno.%A_%a.out # STDOUT
#SBATCH -e _log/anno.%A_%a.err # STDERR
#SBATCH --mail-type=END,FAIL # notifications for job done & fail
#SBATCH --mail-user=XX # send-to address

erv_file=db/rmsk.ERV.saf
out_path=db/star
index_path=db/star_v43_2.7.9a

featureCounts -M -F SAF -T 40 -s 2 -p -a ${erv_file} -o ${out_path}/total.featureCounts.txt ${out_path}/*/*.Aligned.sortedByCoord.out.bam