Commit aac44737 authored by Gonzalo S Nido's avatar Gonzalo S Nido
Browse files

Pulled from origin and merged conflicts

parents eb057860 cae1d186
......@@ -15,17 +15,17 @@ cd samtools-1.10
./configure
make
sudo mkdir /opt/samtools-1.10
sudo cp ./samtools /opt/samtools-1.10/samtools-1.10
sudo ln -s /opt/samtools-1.10/samtools-1.10 /usr/local/bin/samtools-1.10
sudo cp ./samtools /opt/samtools-1.10/samtools
sudo ln -s /opt/samtools-1.10/samtools /usr/local/bin/samtools-1.10
echo "#### Installing bowtie2-2.4.41..."
echo "#### Installing bowtie2-2.4.1..."
cd ~
wget https://github.com/BenLangmead/bowtie2/releases/download/v2.4.1/bowtie2-2.4.1-linux-x86_64.zip
unzip bowtie2-2.4.41-linux-x86_64.zip
rm bowtie2-2.4.41-linux-x86_64.zip
sudo mkdir /opt/bowtie2-2.4.41
sudo cp -r bowtie2-2.4.41-linux-x86_64/* /opt/bowtie2-2.4.41/
sudo ln -s /opt/bowtie2-2.4.41/bowtie2 /usr/local/bin/bowtie2-2.4.41
unzip bowtie2-2.4.1-linux-x86_64.zip
rm bowtie2-2.4.1-linux-x86_64.zip
sudo mkdir /opt/bowtie2-2.4.1
sudo cp -r bowtie2-2.4.1-linux-x86_64/* /opt/bowtie2-2.4.1/
sudo ln -s /opt/bowtie2-2.4.1/bowtie2 /usr/local/bin/bowtie2-2.4.1
echo "#### Installing Bismark 0.22.3..."
wget https://github.com/FelixKrueger/Bismark/archive/0.22.3.tar.gz
......@@ -44,14 +44,14 @@ sudo ln -s /opt/Bismark-0.22.3/bismark /usr/local/bin/bismark-0.22.3
sudo ln -s /opt/Bismark-0.22.3/bam2nuc /usr/local/bin/bam2nuc-0.22.3
echo "Installing R..."
sudo apt install r-base
sudo apt install libcurl4-openssl-dev libxml2-dev
sudo apt -y install r-base
sudo apt -y install libcurl4-openssl-dev libxml2-dev
echo "Installing psrecord for monitoring"
sudo apt install python-pip
sudo apt -y install python-pip
pip install psrecord
export PATH="~/.local/bin/:$PATH"
echo "export PATH=~/.local/bin/:$PATH" >> ~/.bashrc
#R packages
#----------
......
......@@ -19,26 +19,42 @@ mt_ref="${ref_dir}/chrM"
bismark="/usr/local/bin/bismark-0.22.3"
deduplicate_bismark="/usr/local/bin/deduplicate_bismark-0.22.3"
bismark_meth_extract="/usr/local/bin/bismark_methylation_extractor-0.22.3"
bowtie2="/usr/local/bin/bowtie2-2.4.41"
bowtie_path="/opt/bowtie2-2.4.41/"
bowtie2="/usr/local/bin/bowtie2-2.4.1"
bowtie_path="/opt/bowtie2-2.4.1/"
samtools="/usr/local/bin/samtools-1.10"
samtools_path="/opt/samtools-1.10"
ext_chr_srt="${PIPELINEPATH}/apps/ext_chr_srt.py"
meth_prop="${PIPELINEPATH}/apps/methylation_proportion.py"
generate_bs_indices="${PIPELINEPATH}/apps/generate_bs_indices.sh"
psrecord="~/.local/bin/psrecord"
psrecord="psrecord"
# TO ACTIVATE MONITORING SET TO true
########################################
#>>>> OPTIONS <<<<<#
########################################
# To activate CPU and RAM monitoring
MONITOR=false
#MONITOR=true
# To subset reads to 10,000 (debug)
SUBSET_READS=false
#SUBSET_READS=true
########################################
# exit when any command fails
# Exit when any command fails
set -e
trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
trap 'echo "\"${last_command}\" command filed with exit code $?"' EXIT
trap 'if [ "$?" -ne "0" ]; then echo "\"${last_command}\" command filed with exit code different from 0"; fi' EXIT
# Get number of processors in the machine
N_proc=$( grep -c processor /proc/cpuinfo )
# Get RAM in the machine
RAM=$( free -g | grep '^Mem' | awk '{print $2}' )
# Ensure reference files/indices are there, download and index otherwise
if [[ ! -s ${b38_ref}/Bisulfite_Genome/GA_conversion/genome_mfa.GA_conversion.fa ]]; then
......@@ -139,7 +155,16 @@ R2=$( echo "${R2%,}" )
if $MONITOR; then
echo "MONITORING CPU AND RAM..."
echo
fi
if $SUBSET_READS; then
SUBSET_READS="-u 1000000"
echo "SUBSETTING TO 1000000 READS ONLY!!!!"
echo
fi
echo "##################################################"
......@@ -153,14 +178,18 @@ echo "##################################################"
echo -ne " 1.1 Aligning... "
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} -u 10000 \
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 8 \
if [ "${RAM}" -lt "16" ]; then cpus=1; else cpus=$( expr ${RAM} / 16 ); fi
echo -ne "(${cpus} cores) "
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix b38 --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/1_1_aln.stdout 2> ${log_dir}/1_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/1_1_aln.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/1_1_aln.cmd" --log ${log_dir}/1_1_aln.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_1_aln.cmd" --log ${log_dir}/1_1_aln.resources --interval 5 --include-children
else
bash ${log_dir}/1_1_aln.cmd
fi
......@@ -173,7 +202,9 @@ else
echo "DONE"
fi
echo -ne " 1.2 Merging bamfiles... "
bamfile_list=""
IFS=',' read -r -a array <<< "${R1}"
for element in "${array[@]}"; do
......@@ -182,12 +213,13 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}"
done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
CMD="${samtools} cat -o ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
${log_dir}/1_2_merge.stdout 2> \
${log_dir}/1_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/1_2_merge.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/1_2_aln.cmd" --log ${log_dir}/1_2_merge.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_2_merge.cmd" --log ${log_dir}/1_2_merge.resources --interval 1 --include-children
else
bash ${log_dir}/1_2_merge.cmd
fi
......@@ -206,13 +238,15 @@ fi
echo -ne " 1.3 Dedupping alignment... "
CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
--output_dir ${output_dir} ${output_dir}/b38_${sample_id}.bam > \
${log_dir}/1_3_dedup.stdout 2> \
${log_dir}/1_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/1_3_dedup.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/1_3_dedup.cmd" --log ${log_dir}/1_3_dedup.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_3_dedup.cmd" --log ${log_dir}/1_3_dedup.resources --interval 5 --include-children
else
bash ${log_dir}/1_3_dedup.cmd
fi
......@@ -230,15 +264,19 @@ fi
echo -ne " 1.4 Extracting methylation... "
cpus=${N_proc}
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--multicore 16 \
--multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/b38_${sample_id}.dedup.bam > \
${log_dir}/1_4_meth.stdout 2> \
${log_dir}/1_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/1_4_meth.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/1_4_meth.cmd" --log ${log_dir}/1_4_meth.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_4_meth.cmd" --log ${log_dir}/1_4_meth.resources --interval 5 --include-children
else
bash ${log_dir}/1_4_meth.cmd
fi
......@@ -251,6 +289,7 @@ else
fi
echo -ne " 1.5 Extracting MT and Lambda methylation... "
CMD="cat ${output_dir}/CpG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt} ${MT_chr_name} | gzip -nc > \
${output_dir}/CpG_context_chrM_${sample_id}.dedup.txt.gz && \
cat ${output_dir}/CpG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt} ${LAMBDA_chr_name} | gzip -nc > \
......@@ -270,7 +309,8 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt
${output_dir}/Lambda_meth_prop_${sample_id}.txt"
echo -e ${CMD} > ${log_dir}/1_5_extract_MT.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/1_5_extract_MT.cmd" --log ${log_dir}/1_5_extract_MT.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_5_extract_MT.cmd" --log ${log_dir}/1_5_extract_MT.resources --interval 5 --include-children
else
bash ${log_dir}/1_5_extract_MT.cmd
fi
......@@ -289,6 +329,7 @@ echo
echo "##################################################"
echo "# STRATEGY 2 #"
echo "#================================================#"
......@@ -299,14 +340,18 @@ echo "##################################################"
echo -ne " 2.1 Aligning... "
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} -u 10000 \
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 8 \
if [ "${RAM}" -lt "16" ]; then cpus=1; else cpus=$( expr ${RAM} / 16 ); fi
echo -ne "(${cpus} cores) "
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/2_1_aln.stdout 2> ${log_dir}/2_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/2_1_aln.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/2_1_aln.cmd" --log ${log_dir}/2_1_aln.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_1_aln.cmd" --log ${log_dir}/2_1_aln.resources --interval 5 --include-children
else
bash ${log_dir}/2_1_aln.cmd
fi
......@@ -320,6 +365,9 @@ else
fi
echo -ne " 2.2 Merging bamfiles... "
cpus=`expr ${N_proc} / 2`
bamfile_list=""
IFS=',' read -r -a array <<< "${R1}"
for element in "${array[@]}"; do
......@@ -328,12 +376,13 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}"
done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
CMD="${samtools} cat -o ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
${log_dir}/2_2_merge.stdout 2> \
${log_dir}/2_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/2_2_merge.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/2_2_merge.cmd" --log ${log_dir}/2_2_merge.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_2_merge.cmd" --log ${log_dir}/2_2_merge.resources --interval 5 --include-children
else
bash ${log_dir}/2_2_merge.cmd
fi
......@@ -352,13 +401,15 @@ fi
echo -ne " 2.3 Dedupping alignment... "
CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
--output_dir ${output_dir} ${output_dir}/MT_${sample_id}.bam > \
${log_dir}/2_3_dedup.stdout 2> \
${log_dir}/2_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/2_3_dedup.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/2_3_dedup.cmd" --log ${log_dir}/2_3_dedup.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_3_dedup.cmd" --log ${log_dir}/2_3_dedup.resources --interval 5 --include-children
else
bash ${log_dir}/2_3_dedup.cmd
fi
......@@ -376,15 +427,19 @@ fi
echo -ne " 2.4 Extracting methylation... "
cpus=${N_proc}
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \
--gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/MT_${sample_id}.dedup.bam > \
${log_dir}/2_4_meth.stdout 2> \
${log_dir}/2_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/2_4_meth.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/2_4_meth.cmd" --log ${log_dir}/2_4_meth.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_4_meth.cmd" --log ${log_dir}/2_4_meth.resources --interval 5 --include-children
else
bash ${log_dir}/2_4_meth.cmd
fi
......@@ -416,14 +471,18 @@ echo "##################################################"
echo -ne " 3.1 Aligning vs nuclear DNA... "
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} -u 10000 \
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 8 \
if [ "${RAM}" -lt "16" ]; then cpus=1; else cpus=$( expr ${RAM} / 16 ); fi
echo -ne "(${cpus} cores) "
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
--un --ambiguous -o ${output_dir} --prefix b38_noMT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/3_1_aln.stdout 2> ${log_dir}/3_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/3_1_aln.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_1_aln.cmd" --log ${log_dir}/3_1_aln.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_1_aln.cmd" --log ${log_dir}/3_1_aln.resources --interval 5 --include-children
else
bash ${log_dir}/3_1_aln.cmd
fi
......@@ -437,6 +496,7 @@ else
fi
echo -ne " 3.2 Merging unaligned reads... "
unmapped_list_r1=""
unmapped_list_r2=""
IFS=',' read -r -a array <<< "${R1}"
......@@ -459,7 +519,8 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f
rm ${output_dir}/b38_noMT.*_ambiguous_reads_?.fq.gz"
echo -e ${CMD} > ${log_dir}/3_2_merge_unmapped.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_2_merge.cmd" --log ${log_dir}/3_2_merge.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_2_merge_unmapped.cmd" --log ${log_dir}/3_2_merge_unmapped.resources --interval 5 --include-children
else
bash ${log_dir}/3_2_merge_unmapped.cmd
fi
......@@ -476,6 +537,7 @@ fi
echo -ne " 3.3 Deleting nuclear bamfiles... "
bamfile_list=""
IFS=',' read -r -a array <<< "${R1}"
for element in "${array[@]}"; do
......@@ -487,7 +549,8 @@ done
CMD="rm ${bamfile_list}"
echo ${CMD} > ${log_dir}/3_3_delete.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_3_delete.cmd" --log ${log_dir}/3_3_delete.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_3_delete.cmd" --log ${log_dir}/3_3_delete.resources --interval 5 --include-children
else
bash ${log_dir}/3_3_delete.cmd
fi
......@@ -504,17 +567,23 @@ fi
echo -ne " 3.4 Re-aligning unmapped reads to mtDNA... "
if [ "${RAM}" -lt "16" ]; then cpus=1; else cpus=$( expr ${RAM} / 16 ); fi
echo -ne "(${cpus} cores) "
CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fastq.gz \
-2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore 8 \
-2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore ${cpus} \
-o ${output_dir} --prefix umap_to_MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} && \
mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_pe.bam ${output_dir}/umap_to_MT_${sample_id}.bam && \
mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_PE_report.txt ${output_dir}/umap_to_MT_${sample_id}_report.txt) > \
mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_PE_report.txt ${output_dir}/umap_to_MT_${sample_id}_report.txt && \
rm ${output_dir}/b38_noMT_${sample_id}_unmapped.R?.fastq.gz) > \
${log_dir}/3_4_realign_to_mt.stdout 2> \
${log_dir}/3_4_realign_to_mt.stderr"
echo ${CMD} > ${log_dir}/3_4_realign_to_mt.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_4_realign_to_mt.cmd" --log ${log_dir}/3_4_realign_to_mt.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_4_realign_to_mt.cmd" --log ${log_dir}/3_4_realign_to_mt.resources --interval 5 --include-children
else
bash ${log_dir}/3_4_realign_to_mt.cmd
fi
......@@ -529,13 +598,15 @@ fi
echo -ne " 3.5 Dedupping alignments... "
CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
--output_dir ${output_dir} ${output_dir}/umap_to_MT_${sample_id}.bam > \
${log_dir}/3_5_dedup.stdout 2> \
${log_dir}/3_5_dedup.stderr"
echo -e ${CMD} > ${log_dir}/3_5_dedup.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_5_dedup.cmd" --log ${log_dir}/3_5_dedup.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_5_dedup.cmd" --log ${log_dir}/3_5_dedup.resources --interval 5 --include-children
else
bash ${log_dir}/3_5_dedup.cmd
fi
......@@ -555,15 +626,19 @@ fi
echo -ne " 3.6 Extracting methylation in MT..."
cpus=${N_proc}
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \
--gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/umap_to_MT_${sample_id}.dedup.bam > \
${log_dir}/3_7_meth.stdout 2> \
${log_dir}/3_7_meth.stderr"
echo -e ${CMD} > ${log_dir}/3_6_meth.cmd
if $MONITOR; then
${psrecord} "bash ${log_dir}/3_6_meth.cmd" --log ${log_dir}/3_6_meth.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_6_meth.cmd" --log ${log_dir}/3_6_meth.resources --interval 5 --include-children
else
bash ${log_dir}/3_6_meth.cmd
fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment