Commit f107c8e9 authored by Gonzalo S Nido's avatar Gonzalo S Nido
Browse files

cat instead of merge

parent abca2299
......@@ -29,9 +29,21 @@ generate_bs_indices="${PIPELINEPATH}/apps/generate_bs_indices.sh"
psrecord="psrecord"
# TO ACTIVATE MONITORING SET TO true
MONITOR=false
#MONITOR=true
########################################
#>>>> OPTIONS <<<<<#
########################################
# To activate CPU and RAM monitoring
#MONITOR=false
MONITOR=true
# To subset reads to 10,000 (debug)
#SUBSET_READS=false
SUBSET_READS=true
########################################
# exit when any command fails
......@@ -39,6 +51,8 @@ set -e
# trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
# trap 'echo "\"${last_command}\" command filed with exit code $?"' EXIT
# Get number of processors in the machine
N_proc=$( grep -c processor /proc/cpuinfo )
# Ensure reference files/indices are there, download and index otherwise
if [[ ! -s ${b38_ref}/Bisulfite_Genome/GA_conversion/genome_mfa.GA_conversion.fa ]]; then
......@@ -144,6 +158,11 @@ if $MONITOR; then
echo
fi
if $SUBSET_READS; then
SUBSET_READS="-u 10000"
echo "SUBSETTING TO 10000 READS ONLY!!!!"
echo
fi
echo "##################################################"
......@@ -157,14 +176,15 @@ echo "##################################################"
echo -ne " 1.1 Aligning... "
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 8 \
cpus=`expr ${N_proc} / 4`
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix b38 --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/1_1_aln.stdout 2> ${log_dir}/1_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/1_1_aln.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_1_aln.cmd" --log ${log_dir}/1_1_aln.resources --interval 60 --include-children
else
bash ${log_dir}/1_1_aln.cmd
......@@ -187,13 +207,15 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}"
done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
cpus=`expr ${N_proc} / 2`
#CMD="${samtools} merge -n -r -@ ${cpus} ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
CMD="${samtools} cat -@ ${cpus} -o ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
${log_dir}/1_2_merge.stdout 2> \
${log_dir}/1_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/1_2_merge.cmd
if $MONITOR; then
echo -e "\n "
${psrecord} "bash ${log_dir}/1_2_merge.cmd" --log ${log_dir}/1_2_merge.resources --interval 60 --include-children
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_2_merge.cmd" --log ${log_dir}/1_2_merge.resources --interval 1 --include-children
else
bash ${log_dir}/1_2_merge.cmd
fi
......@@ -218,7 +240,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/1_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/1_3_dedup.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_3_dedup.cmd" --log ${log_dir}/1_3_dedup.resources --interval 60 --include-children
else
bash ${log_dir}/1_3_dedup.cmd
......@@ -235,17 +257,19 @@ if [ -s "${output_dir}/b38_${sample_id}.deduplicated.bam" ]; then
mv ${output_dir}/b38_${sample_id}.deduplicated.bam ${output_dir}/b38_${sample_id}.dedup.bam
fi
exit 1
echo -ne " 1.4 Extracting methylation... "
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--multicore 16 \
--multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/b38_${sample_id}.dedup.bam > \
${log_dir}/1_4_meth.stdout 2> \
${log_dir}/1_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/1_4_meth.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_4_meth.cmd" --log ${log_dir}/1_4_meth.resources --interval 60 --include-children
else
bash ${log_dir}/1_4_meth.cmd
......@@ -278,7 +302,7 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt
${output_dir}/Lambda_meth_prop_${sample_id}.txt"
echo -e ${CMD} > ${log_dir}/1_5_extract_MT.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/1_5_extract_MT.cmd" --log ${log_dir}/1_5_extract_MT.resources --interval 60 --include-children
else
bash ${log_dir}/1_5_extract_MT.cmd
......@@ -308,14 +332,15 @@ echo "##################################################"
echo -ne " 2.1 Aligning... "
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 8 \
cpus=`expr ${N_proc} / 4`
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
-o ${output_dir} --prefix MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/2_1_aln.stdout 2> ${log_dir}/2_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/2_1_aln.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_1_aln.cmd" --log ${log_dir}/2_1_aln.resources --interval 60 --include-children
else
bash ${log_dir}/2_1_aln.cmd
......@@ -338,12 +363,13 @@ for element in "${array[@]}"; do
bamfile_list="${bamfile_list} ${bamfile}"
done
CMD="${samtools} merge -n -r -@ 32 ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
cpus=`expr ${N_proc} / 2`
CMD="${samtools} merge -n -r -@ ${cpus} ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
${log_dir}/2_2_merge.stdout 2> \
${log_dir}/2_2_merge.stderr"
echo -e ${CMD} > ${log_dir}/2_2_merge.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_2_merge.cmd" --log ${log_dir}/2_2_merge.resources --interval 60 --include-children
else
bash ${log_dir}/2_2_merge.cmd
......@@ -369,7 +395,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/2_3_dedup.stderr"
echo -e ${CMD} > ${log_dir}/2_3_dedup.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_3_dedup.cmd" --log ${log_dir}/2_3_dedup.resources --interval 60 --include-children
else
bash ${log_dir}/2_3_dedup.cmd
......@@ -388,15 +414,16 @@ fi
echo -ne " 2.4 Extracting methylation... "
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \
--gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/MT_${sample_id}.dedup.bam > \
${log_dir}/2_4_meth.stdout 2> \
${log_dir}/2_4_meth.stderr"
echo -e ${CMD} > ${log_dir}/2_4_meth.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/2_4_meth.cmd" --log ${log_dir}/2_4_meth.resources --interval 60 --include-children
else
bash ${log_dir}/2_4_meth.cmd
......@@ -429,14 +456,15 @@ echo "##################################################"
echo -ne " 3.1 Aligning vs nuclear DNA... "
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 8 \
cpus=`expr ${N_proc} / 4`
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS} \
--un --ambiguous -o ${output_dir} --prefix b38_noMT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} > \
${log_dir}/3_1_aln.stdout 2> ${log_dir}/3_1_aln.stderr"
echo -e ${CMD} > ${log_dir}/3_1_aln.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_1_aln.cmd" --log ${log_dir}/3_1_aln.resources --interval 60 --include-children
else
bash ${log_dir}/3_1_aln.cmd
......@@ -473,7 +501,7 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f
rm ${output_dir}/b38_noMT.*_ambiguous_reads_?.fq.gz"
echo -e ${CMD} > ${log_dir}/3_2_merge_unmapped.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_2_merge.cmd" --log ${log_dir}/3_2_merge.resources --interval 60 --include-children
else
bash ${log_dir}/3_2_merge_unmapped.cmd
......@@ -502,7 +530,7 @@ done
CMD="rm ${bamfile_list}"
echo ${CMD} > ${log_dir}/3_3_delete.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_3_delete.cmd" --log ${log_dir}/3_3_delete.resources --interval 60 --include-children
else
bash ${log_dir}/3_3_delete.cmd
......@@ -520,8 +548,9 @@ fi
echo -ne " 3.4 Re-aligning unmapped reads to mtDNA... "
cpus=`expr ${N_proc} / 4`
CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fastq.gz \
-2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore 8 \
-2 ${output_dir}/b38_noMT_${sample_id}_unmapped.R2.fastq.gz --multicore ${cpus} \
-o ${output_dir} --prefix umap_to_MT --path_to_bowtie ${bowtie_path} \
--samtools_path ${samtools_path} --temp_dir ${tmp_dir} && \
mv ${output_dir}/umap_to_MT.b38_noMT_${sample_id}_unmapped.R1_bismark_bt2_pe.bam ${output_dir}/umap_to_MT_${sample_id}.bam && \
......@@ -530,7 +559,7 @@ CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fa
${log_dir}/3_4_realign_to_mt.stderr"
echo ${CMD} > ${log_dir}/3_4_realign_to_mt.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_4_realign_to_mt.cmd" --log ${log_dir}/3_4_realign_to_mt.resources --interval 60 --include-children
else
bash ${log_dir}/3_4_realign_to_mt.cmd
......@@ -555,7 +584,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${log_dir}/3_5_dedup.stderr"
echo -e ${CMD} > ${log_dir}/3_5_dedup.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_5_dedup.cmd" --log ${log_dir}/3_5_dedup.resources --interval 60 --include-children
else
bash ${log_dir}/3_5_dedup.cmd
......@@ -576,15 +605,16 @@ fi
echo -ne " 3.6 Extracting methylation in MT..."
cpus=`expr ${N_proc} / 3`
CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
--gzip --multicore 16 \
--gzip --multicore ${cpus} \
--samtools_path ${samtools_path} \
${output_dir}/umap_to_MT_${sample_id}.dedup.bam > \
${log_dir}/3_7_meth.stdout 2> \
${log_dir}/3_7_meth.stderr"
echo -e ${CMD} > ${log_dir}/3_6_meth.cmd
if $MONITOR; then
echo -e "\n "
echo -ne "\n "
${psrecord} "bash ${log_dir}/3_6_meth.cmd" --log ${log_dir}/3_6_meth.resources --interval 60 --include-children
else
bash ${log_dir}/3_6_meth.cmd
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment