Commit f107c8e9 authored by Gonzalo S Nido's avatar Gonzalo S Nido
Browse files

cat instead of merge

parent abca2299
...@@ -29,9 +29,21 @@ generate_bs_indices="${PIPELINEPATH}/apps/generate_bs_indices.sh" ...@@ -29,9 +29,21 @@ generate_bs_indices="${PIPELINEPATH}/apps/generate_bs_indices.sh"
psrecord="psrecord" psrecord="psrecord"
# TO ACTIVATE MONITORING SET TO true ########################################
MONITOR=false #>>>> OPTIONS <<<<<#
#MONITOR=true ########################################
# To activate CPU and RAM monitoring
#MONITOR=false
MONITOR=true
# To subset reads to 10,000 (debug)
#SUBSET_READS=false
SUBSET_READS=true
########################################
# exit when any command fails # exit when any command fails
...@@ -39,6 +51,8 @@ set -e ...@@ -39,6 +51,8 @@ set -e
# trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG # trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
# trap 'echo "\"${last_command}\" command filed with exit code $?"' EXIT # trap 'echo "\"${last_command}\" command filed with exit code $?"' EXIT
# Get number of processors in the machine
N_proc=$( grep -c processor /proc/cpuinfo )
# Ensure reference files/indices are there, download and index otherwise # Ensure reference files/indices are there, download and index otherwise
if [[ ! -s ${b38_ref}/Bisulfite_Genome/GA_conversion/genome_mfa.GA_conversion.fa ]]; then if [[ ! -s ${b38_ref}/Bisulfite_Genome/GA_conversion/genome_mfa.GA_conversion.fa ]]; then
...@@ -144,6 +158,11 @@ if $MONITOR; then ...@@ -144,6 +158,11 @@ if $MONITOR; then
echo echo
fi fi
if $SUBSET_READS; then
SUBSET_READS="-u 10000"
echo "SUBSETTING TO 10000 READS ONLY!!!!"
echo
fi
echo "##################################################" echo "##################################################"
...@@ -157,14 +176,15 @@ echo "##################################################" ...@@ -157,14 +176,15 @@ echo "##################################################"
echo -ne " 1.1 Aligning... " echo -ne " 1.1 Aligning... "
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \ cpus=`expr ${N_proc} / 4`
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 8 \ #CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix b38 --path_to_bowtie ${bowtie_path} \ -o ${output_dir} --prefix b38 --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \ --samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/1_1_aln.stdout 2> ${log_dir}/1_1_aln.stderr" ${log_dir}/1_1_aln.stdout 2> ${log_dir}/1_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/1_1_aln.cmd echo -e ${CMD} > ${log_dir}/1_1_aln.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/1_1_aln.cmd" --log ${log_dir}/1_1_aln.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/1_1_aln.cmd" --log ${log_dir}/1_1_aln.resources --interval 60 --include-children
else else
bash ${log_dir}/1_1_aln.cmd bash ${log_dir}/1_1_aln.cmd
...@@ -187,13 +207,15 @@ for element in "${array[@]}"; do ...@@ -187,13 +207,15 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}" bamfile_list="${bamfile_list} ${bamfile}"
done done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \ cpus=`expr ${N_proc} / 2`
#CMD="${samtools} merge -n -r -@ ${cpus} ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
CMD="${samtools} cat -@ ${cpus} -o ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
${log_dir}/1_2_merge.stdout 2> \ ${log_dir}/1_2_merge.stdout 2> \
${log_dir}/1_2_merge.stderr" ${log_dir}/1_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/1_2_merge.cmd echo -e ${CMD} > ${log_dir}/1_2_merge.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/1_2_merge.cmd" --log ${log_dir}/1_2_merge.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/1_2_merge.cmd" --log ${log_dir}/1_2_merge.resources --interval 1 --include-children
else else
bash ${log_dir}/1_2_merge.cmd bash ${log_dir}/1_2_merge.cmd
fi fi
...@@ -218,7 +240,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \ ...@@ -218,7 +240,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/1_3_dedup.stderr" ${log_dir}/1_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/1_3_dedup.cmd echo -e ${CMD} > ${log_dir}/1_3_dedup.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/1_3_dedup.cmd" --log ${log_dir}/1_3_dedup.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/1_3_dedup.cmd" --log ${log_dir}/1_3_dedup.resources --interval 60 --include-children
else else
bash ${log_dir}/1_3_dedup.cmd bash ${log_dir}/1_3_dedup.cmd
...@@ -235,17 +257,19 @@ if [ -s "${output_dir}/b38_${sample_id}.deduplicated.bam" ]; then ...@@ -235,17 +257,19 @@ if [ -s "${output_dir}/b38_${sample_id}.deduplicated.bam" ]; then
mv ${output_dir}/b38_${sample_id}.deduplicated.bam ${output_dir}/b38_${sample_id}.dedup.bam mv ${output_dir}/b38_${sample_id}.deduplicated.bam ${output_dir}/b38_${sample_id}.dedup.bam
fi fi
exit 1
echo -ne " 1.4 Extracting methylation... " echo -ne " 1.4 Extracting methylation... "
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--multicore 16 \ --multicore ${cpus} \
--samtools_path ${samtools_path} \ --samtools_path ${samtools_path} \
${output_dir}/b38_${sample_id}.dedup.bam > \ ${output_dir}/b38_${sample_id}.dedup.bam > \
${log_dir}/1_4_meth.stdout 2> \ ${log_dir}/1_4_meth.stdout 2> \
${log_dir}/1_4_meth.stderr" ${log_dir}/1_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/1_4_meth.cmd echo -e ${CMD} > ${log_dir}/1_4_meth.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/1_4_meth.cmd" --log ${log_dir}/1_4_meth.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/1_4_meth.cmd" --log ${log_dir}/1_4_meth.resources --interval 60 --include-children
else else
bash ${log_dir}/1_4_meth.cmd bash ${log_dir}/1_4_meth.cmd
...@@ -278,7 +302,7 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt ...@@ -278,7 +302,7 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt
${output_dir}/Lambda_meth_prop_${sample_id}.txt" ${output_dir}/Lambda_meth_prop_${sample_id}.txt"
echo -e ${CMD} > ${log_dir}/1_5_extract_MT.cmd echo -e ${CMD} > ${log_dir}/1_5_extract_MT.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/1_5_extract_MT.cmd" --log ${log_dir}/1_5_extract_MT.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/1_5_extract_MT.cmd" --log ${log_dir}/1_5_extract_MT.resources --interval 60 --include-children
else else
bash ${log_dir}/1_5_extract_MT.cmd bash ${log_dir}/1_5_extract_MT.cmd
...@@ -308,14 +332,15 @@ echo "##################################################" ...@@ -308,14 +332,15 @@ echo "##################################################"
echo -ne " 2.1 Aligning... " echo -ne " 2.1 Aligning... "
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \ cpus=`expr ${N_proc} / 4`
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 8 \ #CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix MT --path_to_bowtie ${bowtie_path} \ -o ${output_dir} --prefix MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \ --samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/2_1_aln.stdout 2> ${log_dir}/2_1_aln.stderr" ${log_dir}/2_1_aln.stdout 2> ${log_dir}/2_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/2_1_aln.cmd echo -e ${CMD} > ${log_dir}/2_1_aln.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/2_1_aln.cmd" --log ${log_dir}/2_1_aln.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/2_1_aln.cmd" --log ${log_dir}/2_1_aln.resources --interval 60 --include-children
else else
bash ${log_dir}/2_1_aln.cmd bash ${log_dir}/2_1_aln.cmd
...@@ -338,12 +363,13 @@ for element in "${array[@]}"; do ...@@ -338,12 +363,13 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}" bamfile_list="${bamfile_list} ${bamfile}"
done done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \ cpus=`expr ${N_proc} / 2`
CMD="${samtools} merge -n -r -@ ${cpus} ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
${log_dir}/2_2_merge.stdout 2> \ ${log_dir}/2_2_merge.stdout 2> \
${log_dir}/2_2_merge.stderr" ${log_dir}/2_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/2_2_merge.cmd echo -e ${CMD} > ${log_dir}/2_2_merge.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/2_2_merge.cmd" --log ${log_dir}/2_2_merge.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/2_2_merge.cmd" --log ${log_dir}/2_2_merge.resources --interval 60 --include-children
else else
bash ${log_dir}/2_2_merge.cmd bash ${log_dir}/2_2_merge.cmd
...@@ -369,7 +395,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \ ...@@ -369,7 +395,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/2_3_dedup.stderr" ${log_dir}/2_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/2_3_dedup.cmd echo -e ${CMD} > ${log_dir}/2_3_dedup.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/2_3_dedup.cmd" --log ${log_dir}/2_3_dedup.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/2_3_dedup.cmd" --log ${log_dir}/2_3_dedup.resources --interval 60 --include-children
else else
bash ${log_dir}/2_3_dedup.cmd bash ${log_dir}/2_3_dedup.cmd
...@@ -388,15 +414,16 @@ fi ...@@ -388,15 +414,16 @@ fi
echo -ne " 2.4 Extracting methylation... " echo -ne " 2.4 Extracting methylation... "
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \ --gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \ --samtools_path ${samtools_path} \
${output_dir}/MT_${sample_id}.dedup.bam > \ ${output_dir}/MT_${sample_id}.dedup.bam > \
${log_dir}/2_4_meth.stdout 2> \ ${log_dir}/2_4_meth.stdout 2> \
${log_dir}/2_4_meth.stderr" ${log_dir}/2_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/2_4_meth.cmd echo -e ${CMD} > ${log_dir}/2_4_meth.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/2_4_meth.cmd" --log ${log_dir}/2_4_meth.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/2_4_meth.cmd" --log ${log_dir}/2_4_meth.resources --interval 60 --include-children
else else
bash ${log_dir}/2_4_meth.cmd bash ${log_dir}/2_4_meth.cmd
...@@ -429,14 +456,15 @@ echo "##################################################" ...@@ -429,14 +456,15 @@ echo "##################################################"
echo -ne " 3.1 Aligning vs nuclear DNA... " echo -ne " 3.1 Aligning vs nuclear DNA... "
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \ cpus=`expr ${N_proc} / 4`
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 8 \ #CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
--un --ambiguous -o ${output_dir} --prefix b38_noMT --path_to_bowtie ${bowtie_path} \ --un --ambiguous -o ${output_dir} --prefix b38_noMT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \ --samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/3_1_aln.stdout 2> ${log_dir}/3_1_aln.stderr" ${log_dir}/3_1_aln.stdout 2> ${log_dir}/3_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/3_1_aln.cmd echo -e ${CMD} > ${log_dir}/3_1_aln.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_1_aln.cmd" --log ${log_dir}/3_1_aln.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_1_aln.cmd" --log ${log_dir}/3_1_aln.resources --interval 60 --include-children
else else
bash ${log_dir}/3_1_aln.cmd bash ${log_dir}/3_1_aln.cmd
...@@ -473,7 +501,7 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f ...@@ -473,7 +501,7 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f
rm ${output_dir}/b38_noMT.*_ambiguous_reads_?.fq.gz" rm ${output_dir}/b38_noMT.*_ambiguous_reads_?.fq.gz"
echo -e ${CMD} > ${log_dir}/3_2_merge_unmapped.cmd echo -e ${CMD} > ${log_dir}/3_2_merge_unmapped.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_2_merge.cmd" --log ${log_dir}/3_2_merge.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_2_merge.cmd" --log ${log_dir}/3_2_merge.resources --interval 60 --include-children
else else
bash ${log_dir}/3_2_merge_unmapped.cmd bash ${log_dir}/3_2_merge_unmapped.cmd
...@@ -502,7 +530,7 @@ done ...@@ -502,7 +530,7 @@ done
CMD="rm ${bamfile_list}" CMD="rm ${bamfile_list}"
echo ${CMD} > ${log_dir}/3_3_delete.cmd echo ${CMD} > ${log_dir}/3_3_delete.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_3_delete.cmd" --log ${log_dir}/3_3_delete.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_3_delete.cmd" --log ${log_dir}/3_3_delete.resources --interval 60 --include-children
else else
bash ${log_dir}/3_3_delete.cmd bash ${log_dir}/3_3_delete.cmd
...@@ -520,8 +548,9 @@ fi ...@@ -520,8 +548,9 @@ fi
echo -ne " 3.4 Re-aligning unmapped reads to mtDNA... " echo -ne " 3.4 Re-aligning unmapped reads to mtDNA... "
cpus=`expr ${N_proc} / 4`
CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fastq.gz \ CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fastq.gz \
-2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore 8 \ -2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore ${cpus} \
-o ${output_dir} --prefix umap_to_MT --path_to_bowtie ${bowtie_path} \ -o ${output_dir} --prefix umap_to_MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} && \ --samtools_path ${samtools_path} --temp_dir ${tmp_dir} && \
mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_pe.bam ${output_dir}/umap_to_MT_${sample_id}.bam && \ mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_pe.bam ${output_dir}/umap_to_MT_${sample_id}.bam && \
...@@ -530,7 +559,7 @@ CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fa ...@@ -530,7 +559,7 @@ CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fa
${log_dir}/3_4_realign_to_mt.stderr" ${log_dir}/3_4_realign_to_mt.stderr"
echo ${CMD} > ${log_dir}/3_4_realign_to_mt.cmd echo ${CMD} > ${log_dir}/3_4_realign_to_mt.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_4_realign_to_mt.cmd" --log ${log_dir}/3_4_realign_to_mt.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_4_realign_to_mt.cmd" --log ${log_dir}/3_4_realign_to_mt.resources --interval 60 --include-children
else else
bash ${log_dir}/3_4_realign_to_mt.cmd bash ${log_dir}/3_4_realign_to_mt.cmd
...@@ -555,7 +584,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \ ...@@ -555,7 +584,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/3_5_dedup.stderr" ${log_dir}/3_5_dedup.stderr"
echo -e ${CMD} > ${log_dir}/3_5_dedup.cmd echo -e ${CMD} > ${log_dir}/3_5_dedup.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_5_dedup.cmd" --log ${log_dir}/3_5_dedup.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_5_dedup.cmd" --log ${log_dir}/3_5_dedup.resources --interval 60 --include-children
else else
bash ${log_dir}/3_5_dedup.cmd bash ${log_dir}/3_5_dedup.cmd
...@@ -576,15 +605,16 @@ fi ...@@ -576,15 +605,16 @@ fi
echo -ne " 3.6 Extracting methylation in MT..." echo -ne " 3.6 Extracting methylation in MT..."
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \ --gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \ --samtools_path ${samtools_path} \
${output_dir}/umap_to_MT_${sample_id}.dedup.bam > \ ${output_dir}/umap_to_MT_${sample_id}.dedup.bam > \
${log_dir}/3_7_meth.stdout 2> \ ${log_dir}/3_7_meth.stdout 2> \
${log_dir}/3_7_meth.stderr" ${log_dir}/3_7_meth.stderr"
echo -e ${CMD} > ${log_dir}/3_6_meth.cmd echo -e ${CMD} > ${log_dir}/3_6_meth.cmd
if $MONITOR; then if $MONITOR; then
echo -e "\n " echo -ne "\n "
${psrecord} "bash ${log_dir}/3_6_meth.cmd" --log ${log_dir}/3_6_meth.resources --interval 60 --include-children ${psrecord} "bash ${log_dir}/3_6_meth.cmd" --log ${log_dir}/3_6_meth.resources --interval 60 --include-children
else else
bash ${log_dir}/3_6_meth.cmd bash ${log_dir}/3_6_meth.cmd
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment