Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gonzalo S Nido
wgbs-pipeline
Commits
f107c8e9
Commit
f107c8e9
authored
Aug 17, 2020
by
Gonzalo S Nido
Browse files
cat instead of merge
parent
abca2299
Changes
1
Hide whitespace changes
Inline
Side-by-side
wgbs-pipeline.sh
View file @
f107c8e9
...
...
@@ -29,9 +29,21 @@ generate_bs_indices="${PIPELINEPATH}/apps/generate_bs_indices.sh"
psrecord
=
"psrecord"
# TO ACTIVATE MONITORING SET TO true
MONITOR
=
false
#MONITOR=true
########################################
#>>>> OPTIONS <<<<<#
########################################
# To activate CPU and RAM monitoring
#MONITOR=false
MONITOR
=
true
# To subset reads to 10,000 (debug)
#SUBSET_READS=false
SUBSET_READS
=
true
########################################
# exit when any command fails
...
...
@@ -39,6 +51,8 @@ set -e
# trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
# trap 'echo "\"${last_command}\" command filed with exit code $?"' EXIT
# Get number of processors in the machine
N_proc
=
$(
grep
-c
processor /proc/cpuinfo
)
# Ensure reference files/indices are there, download and index otherwise
if
[[
!
-s
${
b38_ref
}
/Bisulfite_Genome/GA_conversion/genome_mfa.GA_conversion.fa
]]
;
then
...
...
@@ -144,6 +158,11 @@ if $MONITOR; then
echo
fi
if
$SUBSET_READS
;
then
SUBSET_READS
=
"-u 10000"
echo
"SUBSETTING TO 10000 READS ONLY!!!!"
echo
fi
echo
"##################################################"
...
...
@@ -157,14 +176,15 @@ echo "##################################################"
echo
-ne
" 1.1 Aligning... "
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD
=
"
${
bismark
}
${
b38_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore 8
\
cpus
=
`
expr
${
N_proc
}
/ 4
`
#CMD="${bismark} ${b38_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD
=
"
${
bismark
}
${
b38_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore
${
cpus
}
${
SUBSET_READS
}
\
-o
${
output_dir
}
--prefix b38 --path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
--temp_dir
${
tmp_dir
}
>
\
${
log_dir
}
/1_1_aln.stdout 2>
${
log_dir
}
/1_1_aln.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/1_1_aln.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_1_aln.cmd"
--log
${
log_dir
}
/1_1_aln.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/1_1_aln.cmd
...
...
@@ -187,13 +207,15 @@ for element in "${array[@]}"; do
bamfile_list
=
"
${
bamfile_list
}
${
bamfile
}
"
done
CMD
=
"
${
samtools
}
merge -n -r -@ 32
${
output_dir
}
/b38_
${
sample_id
}
.bam
${
bamfile_list
}
>
\
cpus
=
`
expr
${
N_proc
}
/ 2
`
#CMD="${samtools} merge -n -r -@ ${cpus} ${output_dir}/b38_${sample_id}.bam ${bamfile_list} > \
CMD
=
"
${
samtools
}
cat -@
${
cpus
}
-o
${
output_dir
}
/b38_
${
sample_id
}
.bam
${
bamfile_list
}
>
\
${
log_dir
}
/1_2_merge.stdout 2>
\
${
log_dir
}
/1_2_merge.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/1_2_merge.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_2_merge.cmd"
--log
${
log_dir
}
/1_2_merge.resources
--interval
60
--include-children
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_2_merge.cmd"
--log
${
log_dir
}
/1_2_merge.resources
--interval
1
--include-children
else
bash
${
log_dir
}
/1_2_merge.cmd
fi
...
...
@@ -218,7 +240,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${
log_dir
}
/1_3_dedup.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/1_3_dedup.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_3_dedup.cmd"
--log
${
log_dir
}
/1_3_dedup.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/1_3_dedup.cmd
...
...
@@ -235,17 +257,19 @@ if [ -s "${output_dir}/b38_${sample_id}.deduplicated.bam" ]; then
mv
${
output_dir
}
/b38_
${
sample_id
}
.deduplicated.bam
${
output_dir
}
/b38_
${
sample_id
}
.dedup.bam
fi
exit
1
echo
-ne
" 1.4 Extracting methylation... "
cpus
=
`
expr
${
N_proc
}
/ 3
`
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--multicore
16
\
--multicore
${
cpus
}
\
--samtools_path
${
samtools_path
}
\
${
output_dir
}
/b38_
${
sample_id
}
.dedup.bam >
\
${
log_dir
}
/1_4_meth.stdout 2>
\
${
log_dir
}
/1_4_meth.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/1_4_meth.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_4_meth.cmd"
--log
${
log_dir
}
/1_4_meth.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/1_4_meth.cmd
...
...
@@ -278,7 +302,7 @@ cat ${output_dir}/CHG_context_b38_${sample_id}.dedup.txt | python3 ${ext_chr_srt
${
output_dir
}
/Lambda_meth_prop_
${
sample_id
}
.txt"
echo
-e
${
CMD
}
>
${
log_dir
}
/1_5_extract_MT.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/1_5_extract_MT.cmd"
--log
${
log_dir
}
/1_5_extract_MT.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/1_5_extract_MT.cmd
...
...
@@ -308,14 +332,15 @@ echo "##################################################"
echo
-ne
" 2.1 Aligning... "
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD
=
"
${
bismark
}
${
mt_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore 8
\
cpus
=
`
expr
${
N_proc
}
/ 4
`
#CMD="${bismark} ${mt_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD
=
"
${
bismark
}
${
mt_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore
${
cpus
}
${
SUBSET_READS
}
\
-o
${
output_dir
}
--prefix MT --path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
--temp_dir
${
tmp_dir
}
>
\
${
log_dir
}
/2_1_aln.stdout 2>
${
log_dir
}
/2_1_aln.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/2_1_aln.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_1_aln.cmd"
--log
${
log_dir
}
/2_1_aln.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/2_1_aln.cmd
...
...
@@ -338,12 +363,13 @@ for element in "${array[@]}"; do
bamfile_list
=
"
${
bamfile_list
}
${
bamfile
}
"
done
CMD
=
"
${
samtools
}
merge -n -r -@ 32
${
output_dir
}
/MT_
${
sample_id
}
.bam
${
bamfile_list
}
>
\
cpus
=
`
expr
${
N_proc
}
/ 2
`
CMD
=
"
${
samtools
}
merge -n -r -@
${
cpus
}
${
output_dir
}
/MT_
${
sample_id
}
.bam
${
bamfile_list
}
>
\
${
log_dir
}
/2_2_merge.stdout 2>
\
${
log_dir
}
/2_2_merge.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/2_2_merge.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_2_merge.cmd"
--log
${
log_dir
}
/2_2_merge.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/2_2_merge.cmd
...
...
@@ -369,7 +395,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${
log_dir
}
/2_3_dedup.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/2_3_dedup.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_3_dedup.cmd"
--log
${
log_dir
}
/2_3_dedup.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/2_3_dedup.cmd
...
...
@@ -388,15 +414,16 @@ fi
echo
-ne
" 2.4 Extracting methylation... "
cpus
=
`
expr
${
N_proc
}
/ 3
`
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--gzip --multicore
16
\
--gzip --multicore
${
cpus
}
\
--samtools_path
${
samtools_path
}
\
${
output_dir
}
/MT_
${
sample_id
}
.dedup.bam >
\
${
log_dir
}
/2_4_meth.stdout 2>
\
${
log_dir
}
/2_4_meth.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/2_4_meth.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/2_4_meth.cmd"
--log
${
log_dir
}
/2_4_meth.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/2_4_meth.cmd
...
...
@@ -429,14 +456,15 @@ echo "##################################################"
echo
-ne
" 3.1 Aligning vs nuclear DNA... "
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore 2 -u 100000 \
CMD
=
"
${
bismark
}
${
nuc_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore 8
\
cpus
=
`
expr
${
N_proc
}
/ 4
`
#CMD="${bismark} ${nuc_ref} -1 ${R1} -2 ${R2} --multicore ${cpus} -u 100000 \
CMD
=
"
${
bismark
}
${
nuc_ref
}
-1
${
R1
}
-2
${
R2
}
--multicore
${
cpus
}
${
SUBSET_READS
}
\
--un --ambiguous -o
${
output_dir
}
--prefix b38_noMT --path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
--temp_dir
${
tmp_dir
}
>
\
${
log_dir
}
/3_1_aln.stdout 2>
${
log_dir
}
/3_1_aln.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/3_1_aln.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_1_aln.cmd"
--log
${
log_dir
}
/3_1_aln.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_1_aln.cmd
...
...
@@ -473,7 +501,7 @@ CMD="cat ${unmapped_list_r1} > ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.f
rm
${
output_dir
}
/b38_noMT.*_ambiguous_reads_?.fq.gz"
echo
-e
${
CMD
}
>
${
log_dir
}
/3_2_merge_unmapped.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_2_merge.cmd"
--log
${
log_dir
}
/3_2_merge.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_2_merge_unmapped.cmd
...
...
@@ -502,7 +530,7 @@ done
CMD
=
"rm
${
bamfile_list
}
"
echo
${
CMD
}
>
${
log_dir
}
/3_3_delete.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_3_delete.cmd"
--log
${
log_dir
}
/3_3_delete.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_3_delete.cmd
...
...
@@ -520,8 +548,9 @@ fi
echo
-ne
" 3.4 Re-aligning unmapped reads to mtDNA... "
cpus
=
`
expr
${
N_proc
}
/ 4
`
CMD
=
"(
${
bismark
}
${
mt_ref
}
-1
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R1.fastq.gz
\
-2
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R2.fastq.gz --multicore
8
\
-2
${
output_dir
}
/b38_noMT_
${
sample_id
}
_unmapped.R2.fastq.gz --multicore
${
cpus
}
\
-o
${
output_dir
}
--prefix umap_to_MT --path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
--temp_dir
${
tmp_dir
}
&&
\
mv
${
output_dir
}
/umap_to_MT.b38_noMT_
${
sample_id
}
_unmapped.R1_bismark_bt2_pe.bam
${
output_dir
}
/umap_to_MT_
${
sample_id
}
.bam &&
\
...
...
@@ -530,7 +559,7 @@ CMD="(${bismark} ${mt_ref} -1 ${output_dir}/b38_noMT_${sample_id}_unmapped.R1.fa
${
log_dir
}
/3_4_realign_to_mt.stderr"
echo
${
CMD
}
>
${
log_dir
}
/3_4_realign_to_mt.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_4_realign_to_mt.cmd"
--log
${
log_dir
}
/3_4_realign_to_mt.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_4_realign_to_mt.cmd
...
...
@@ -555,7 +584,7 @@ CMD="${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
${
log_dir
}
/3_5_dedup.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/3_5_dedup.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_5_dedup.cmd"
--log
${
log_dir
}
/3_5_dedup.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_5_dedup.cmd
...
...
@@ -576,15 +605,16 @@ fi
echo
-ne
" 3.6 Extracting methylation in MT..."
cpus
=
`
expr
${
N_proc
}
/ 3
`
CMD
=
"
${
bismark_meth_extract
}
-p --comprehensive --output
${
output_dir
}
\
--gzip --multicore
16
\
--gzip --multicore
${
cpus
}
\
--samtools_path
${
samtools_path
}
\
${
output_dir
}
/umap_to_MT_
${
sample_id
}
.dedup.bam >
\
${
log_dir
}
/3_7_meth.stdout 2>
\
${
log_dir
}
/3_7_meth.stderr"
echo
-e
${
CMD
}
>
${
log_dir
}
/3_6_meth.cmd
if
$MONITOR
;
then
echo
-e
"
\n
"
echo
-
n
e
"
\n
"
${
psrecord
}
"bash
${
log_dir
}
/3_6_meth.cmd"
--log
${
log_dir
}
/3_6_meth.resources
--interval
60
--include-children
else
bash
${
log_dir
}
/3_6_meth.cmd
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment