Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gonzalo S Nido
wgbs-pipeline
Commits
cae1d186
Commit
cae1d186
authored
Aug 19, 2020
by
Gonzalo S Nido
Browse files
Fixed some bugs
parent
350d3590
Changes
1
Hide whitespace changes
Inline
Side-by-side
wgbs-pipeline.sh
View file @
cae1d186
...
...
@@ -34,12 +34,12 @@ psrecord="psrecord"
########################################
# To activate CPU and RAM monitoring
#
MONITOR=false
MONITOR
=
true
MONITOR
=
false
#
MONITOR=true
# To subset reads to 10,000 (debug)
#
SUBSET_READS=false
SUBSET_READS
=
true
SUBSET_READS
=
false
#
SUBSET_READS=true
########################################
...
...
@@ -161,8 +161,8 @@ if $MONITOR; then
fi
if
$SUBSET_READS
;
then
SUBSET_READS
=
"-u 100000"
echo
"SUBSETTING TO 100000 READS ONLY!!!!"
SUBSET_READS
=
"-u 100000
0
"
echo
"SUBSETTING TO 100000
0
READS ONLY!!!!"
echo
fi
...
...
@@ -189,7 +189,7 @@ CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS}
echo
-e
${
CMD
}
>
${
log_dir
}
/1_1_aln.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_1_aln.cmd"
--log
${
log_dir
}
/1_1_aln.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/1_1_aln.cmd"
--log
${
log_dir
}
/1_1_aln.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/1_1_aln.cmd
fi
...
...
@@ -246,7 +246,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
echo
-e
${
CMD
}
>
${
log_dir
}
/1_3_dedup.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_3_dedup.cmd"
--log
${
log_dir
}
/1_3_dedup.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/1_3_dedup.cmd"
--log
${
log_dir
}
/1_3_dedup.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/1_3_dedup.cmd
fi
...
...
@@ -265,7 +265,7 @@ fi
echo
-ne
" 1.4 Extracting methylation... "
cpus
=
`
expr
${
N_proc
}
/ 3
`
cpus
=
${
N_proc
}
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--multicore
${
cpus
}
\
...
...
@@ -276,7 +276,7 @@ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
echo
-e
${
CMD
}
>
${
log_dir
}
/1_4_meth.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_4_meth.cmd"
--log
${
log_dir
}
/1_4_meth.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/1_4_meth.cmd"
--log
${
log_dir
}
/1_4_meth.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/1_4_meth.cmd
fi
...
...
@@ -310,7 +310,7 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt
echo
-e
${
CMD
}
>
${
log_dir
}
/1_5_extract_MT.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_5_extract_MT.cmd"
--log
${
log_dir
}
/1_5_extract_MT.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/1_5_extract_MT.cmd"
--log
${
log_dir
}
/1_5_extract_MT.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/1_5_extract_MT.cmd
fi
...
...
@@ -324,7 +324,6 @@ fi
echo
exit
1
...
...
@@ -352,7 +351,7 @@ CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS}
echo
-e
${
CMD
}
>
${
log_dir
}
/2_1_aln.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_1_aln.cmd"
--log
${
log_dir
}
/2_1_aln.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/2_1_aln.cmd"
--log
${
log_dir
}
/2_1_aln.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/2_1_aln.cmd
fi
...
...
@@ -383,7 +382,7 @@ CMD="${samtools} cat -o ${output_dir}/MT_${sample_id}.bam ${bamfile_list} > \
echo
-e
${
CMD
}
>
${
log_dir
}
/2_2_merge.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_2_merge.cmd"
--log
${
log_dir
}
/2_2_merge.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/2_2_merge.cmd"
--log
${
log_dir
}
/2_2_merge.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/2_2_merge.cmd
fi
...
...
@@ -402,6 +401,7 @@ fi
echo
-ne
" 2.3 Dedupping alignment... "
CMD
=
"
${
deduplicate_bismark
}
--paired --samtools_path
${
samtools_path
}
\
--output_dir
${
output_dir
}
${
output_dir
}
/MT_
${
sample_id
}
.bam >
\
${
log_dir
}
/2_3_dedup.stdout 2>
\
...
...
@@ -409,7 +409,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
echo
-e
${
CMD
}
>
${
log_dir
}
/2_3_dedup.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_3_dedup.cmd"
--log
${
log_dir
}
/2_3_dedup.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/2_3_dedup.cmd"
--log
${
log_dir
}
/2_3_dedup.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/2_3_dedup.cmd
fi
...
...
@@ -428,7 +428,7 @@ fi
echo
-ne
" 2.4 Extracting methylation... "
cpus
=
`
expr
${
N_proc
}
/ 3
`
cpus
=
${
N_proc
}
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--gzip --multicore
${
cpus
}
\
...
...
@@ -439,7 +439,7 @@ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
echo
-e
${
CMD
}
>
${
log_dir
}
/2_4_meth.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_4_meth.cmd"
--log
${
log_dir
}
/2_4_meth.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/2_4_meth.cmd"
--log
${
log_dir
}
/2_4_meth.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/2_4_meth.cmd
fi
...
...
@@ -482,7 +482,7 @@ CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} ${SUBSET_READS}
echo
-e
${
CMD
}
>
${
log_dir
}
/3_1_aln.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_1_aln.cmd"
--log
${
log_dir
}
/3_1_aln.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_1_aln.cmd"
--log
${
log_dir
}
/3_1_aln.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_1_aln.cmd
fi
...
...
@@ -520,7 +520,7 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f
echo
-e
${
CMD
}
>
${
log_dir
}
/3_2_merge_unmapped.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_2_merge.cmd"
--log
${
log_dir
}
/3_2_merge.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_2_merge
_unmapped
.cmd"
--log
${
log_dir
}
/3_2_merge
_unmapped
.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_2_merge_unmapped.cmd
fi
...
...
@@ -550,7 +550,7 @@ CMD="rm ${bamfile_list}"
echo
${
CMD
}
>
${
log_dir
}
/3_3_delete.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_3_delete.cmd"
--log
${
log_dir
}
/3_3_delete.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_3_delete.cmd"
--log
${
log_dir
}
/3_3_delete.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_3_delete.cmd
fi
...
...
@@ -568,21 +568,22 @@ fi
echo
-ne
" 3.4 Re-aligning unmapped reads to mtDNA... "
cpus
=
`
expr
${
RAM
}
/ 16
`
if
[[
$cpus
-eq
0
]]
;
then
cpus
=
1
;
fi
if
[
"
${
RAM
}
"
-lt
"16"
]
;
then
cpus
=
1
;
else
cpus
=
$(
expr
${
RAM
}
/ 16
)
;
fi
echo
-ne
"(
${
cpus
}
cores) "
CMD
=
"(
${
bismark
}
${
mt_ref
}
-1
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R1.fastq.gz
\
-2
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R2.fastq.gz --multicore
${
cpus
}
\
-o
${
output_dir
}
--prefix umap_to_MT --path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
--temp_dir
${
tmp_dir
}
&&
\
mv
${
output_dir
}
/umap_to_MT.b38_noMT_
${
sample_id
}
_unmapped.R1_bismark_bt2_pe.bam
${
output_dir
}
/umap_to_MT_
${
sample_id
}
.bam &&
\
mv
${
output_dir
}
/umap_to_MT.b38_noMT_
${
sample_id
}
_unmapped.R1_bismark_bt2_PE_report.txt
${
output_dir
}
/umap_to_MT_
${
sample_id
}
_report.txt) >
\
mv
${
output_dir
}
/umap_to_MT.b38_noMT_
${
sample_id
}
_unmapped.R1_bismark_bt2_PE_report.txt
${
output_dir
}
/umap_to_MT_
${
sample_id
}
_report.txt &&
\
rm
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R?.fastq.gz) >
\
${
log_dir
}
/3_4_realign_to_mt.stdout 2>
\
${
log_dir
}
/3_4_realign_to_mt.stderr"
echo
${
CMD
}
>
${
log_dir
}
/3_4_realign_to_mt.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_4_realign_to_mt.cmd"
--log
${
log_dir
}
/3_4_realign_to_mt.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_4_realign_to_mt.cmd"
--log
${
log_dir
}
/3_4_realign_to_mt.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_4_realign_to_mt.cmd
fi
...
...
@@ -595,9 +596,6 @@ else
fi
rm
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R?.fastq.gz
echo
-ne
" 3.5 Dedupping alignments... "
...
...
@@ -608,7 +606,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
echo
-e
${
CMD
}
>
${
log_dir
}
/3_5_dedup.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_5_dedup.cmd"
--log
${
log_dir
}
/3_5_dedup.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_5_dedup.cmd"
--log
${
log_dir
}
/3_5_dedup.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_5_dedup.cmd
fi
...
...
@@ -629,7 +627,7 @@ fi
echo
-ne
" 3.6 Extracting methylation in MT..."
cpus
=
`
expr
${
N_proc
}
/ 3
`
cpus
=
${
N_proc
}
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--gzip --multicore
${
cpus
}
\
...
...
@@ -640,7 +638,7 @@ CMD="${bismark_meth_extract} -p --comprehensive --output ${output_dir} \
echo
-e
${
CMD
}
>
${
log_dir
}
/3_6_meth.cmd
if
$MONITOR
;
then
echo
-ne
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_6_meth.cmd"
--log
${
log_dir
}
/3_6_meth.resources
--interval
60
--include-children
${
psrecord
}
"bash
${
log_dir
}
/3_6_meth.cmd"
--log
${
log_dir
}
/3_6_meth.resources
--interval
5
--include-children
else
bash
${
log_dir
}
/3_6_meth.cmd
fi
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment