Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gonzalo S Nido
wgbs-pipeline
Commits
8297588c
Commit
8297588c
authored
Aug 04, 2020
by
Gonzalo S Nido
Browse files
First commit
parents
Changes
3
Hide whitespace changes
Inline
Side-by-side
pipeline_1.sh
0 → 100644
View file @
8297588c
#!/bin/bash
# exit when any command fails
set
-e
trap
'last_command=$current_command; current_command=$BASH_COMMAND'
DEBUG
trap
'echo "\"${last_command}\" command filed with exit code $?"'
EXIT
if
[
"
$1
"
==
""
]
then
echo
"Provide ID!!"
exit
1
fi
wd
=
"/data/bs"
id
=
$1
ref_dir
=
"/data/ref"
fastq_dir
=
"/data/bs/trimmed_fq"
# Programs
bismark
=
"/usr/local/bin/bismark-0.22.3"
deduplicate_bismark
=
"/usr/local/bin/deduplicate_bismark-0.22.3"
bismark_meth_extract
=
"/usr/local/bin/bismark_methylation_extractor-0.22.3"
bowtie2
=
"/usr/local/bin/bowtie2-2.4.41"
bowtie_path
=
"/opt/bowtie2-2.4.41/"
samtools
=
"/usr/local/bin/samtools-1.10"
samtools_path
=
"/opt/samtools-1.10"
fetch_mt_reads
=
"/data/bin/fetch_mt_and_mate/fetch_mt_and_mate.py"
#samviewwithmate="/data/bin/jvarkit/dist/samviewwithmate.jar"
#methylKit_extract="~/scripts/extractMethylation.R"
# References
b37_ref
=
${
ref_dir
}
/b37_decoy/
#nuc_ref=${ref_dir}/b37_decoy_no_mt/
mt_ref
=
${
ref_dir
}
/b37_decoy_only_mt/
# Output folders
bamfiles_1
=
${
wd
}
/bamfiles_1
mkdir
-p
${
bamfiles_1
}
meth_1
=
${
wd
}
/meth_1
mkdir
-p
${
meth_1
}
tmp_dir
=
${
wd
}
/tmp_1
mkdir
-p
${
tmp_dir
}
log_dir
=
${
wd
}
/logs_1
mkdir
-p
${
log_dir
}
# Get fastq files
r1_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R1.fastq.gz |
paste
-sd
","
-
)
r2_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R2
*
fastq.gz |
paste
-sd
","
-
)
echo
"R1 FILES:
${
r1_files
}
"
echo
"R2 FILES:
${
r2_files
}
"
echo
"#### STRATEGY 1: Align vs the whole genome (nc + mt), dedup, and extract
\
methylation of MT"
echo
" 1.1 Aligning..."
${
bismark
}
${
b37_ref
}
-1
${
r1_files
}
-2
${
r2_files
}
--un
--ambiguous
--multicore
12
\
-o
${
bamfiles_1
}
--temp_dir
${
tmp_dir
}
--path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
>
${
log_dir
}
/1_1_aln.stdout 2>
${
log_dir
}
/1_1_aln.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 1.1"
exit
1
fi
echo
" 1.2 Merging unaligned and ambiguos reads..."
unaligned_list_r1
=
""
ambig_list_r1
=
""
unaligned_list_r2
=
""
ambig_list_r2
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bamfiles_1
}
/
${
uns
}
_unmapped_reads_1.fq.gz"
unaligned_list_r1
=
"
${
unaligned_list_r1
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bamfiles_1
}
/
${
amb
}
_ambiguous_reads_1.fq.gz"
ambig_list_r1
=
"
${
ambig_list_r1
}
${
amb
}
"
done
IFS
=
','
read
-r
-a
array
<<<
"
${
r2_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bamfiles_1
}
/
${
uns
}
_unmapped_reads_2.fq.gz"
unaligned_list_r2
=
"
${
unaligned_list_r2
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bamfiles_1
}
/
${
amb
}
_ambiguous_reads_2.fq.gz"
ambig_list_r2
=
"
${
ambig_list_r2
}
${
amb
}
"
done
cat
${
unaligned_list_r1
}
>
${
bamfiles_1
}
/
${
id
}
_unmapped.R1.fastq.gz
cat
${
ambig_list_r1
}
>
${
bamfiles_1
}
/
${
id
}
_ambiguous.R1.fastq.gz
cat
${
unaligned_list_r2
}
>
${
bamfiles_1
}
/
${
id
}
_unmapped.R2.fastq.gz
cat
${
ambig_list_r2
}
>
${
bamfiles_1
}
/
${
id
}
_ambiguous.R2.fastq.gz
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_unmapped.R1.fastq.gz"
]
;
then
rm
${
unaligned_list_r1
}
fi
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_unmapped.R2.fastq.gz"
]
;
then
rm
${
unaligned_list_r2
}
fi
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_ambiguous.R1.fastq.gz"
]
;
then
rm
${
ambig_list_r1
}
fi
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_ambiguous.R2.fastq.gz"
]
;
then
rm
${
ambig_list_r2
}
fi
echo
" 1.3 Merging bamfiles..."
bamfile_list
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
bamfile
=
$(
basename
${
element
}
)
bamfile
=
"
${
bamfiles_1
}
/
${
bamfile
%.fastq.gz
}
_bismark_bt2_pe.bam"
bamfile_list
=
"
${
bamfile_list
}
${
bamfile
}
"
done
${
samtools
}
merge
-n
-r
-@ 64
${
bamfiles_1
}
/
${
id
}
_b37.bam
${
bamfile_list
}
>
\
${
log_dir
}
/1_3_merge.stdout 2>
\
${
log_dir
}
/1_3_merge.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 1.3"
exit
1
fi
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_b37.bam"
]
;
then
echo
" Removing lane bams..."
rm
${
bamfile_list
}
fi
echo
" 1.4 Dedupping alignment..."
${
deduplicate_bismark
}
--paired
--samtools_path
${
samtools_path
}
\
--output_dir
${
bamfiles_1
}
${
bamfiles_1
}
/
${
id
}
_b37.bam
>
\
${
log_dir
}
/1_4_dedup.stdout 2>
\
${
log_dir
}
/1_4_dedup.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 1.4"
exit
1
fi
if
[
-s
"
${
bamfiles_1
}
/
${
id
}
_b37.deduplicated.bam"
]
;
then
mv
${
bamfiles_1
}
/
${
id
}
_b37.deduplicated.bam
${
bamfiles_1
}
/
${
id
}
_b37.dedup.bam
fi
echo
" 1.5 Extracting methylation in MT..."
${
bismark_meth_extract
}
-p
--comprehensive
--output
${
meth_1
}
\
--gzip
--multicore
16
--bedGraph
--genome_folder
${
b37_ref
}
\
--CX
--samtools_path
${
samtools_path
}
--ample_memory
\
${
bamfiles_1
}
/
${
id
}
_b37.dedup.bam
>
\
${
log_dir
}
/1_5_meth.stdout 2>
\
${
log_dir
}
/1_5_meth.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 1.5"
exit
1
fi
# echo " 1.6 Fetching MT reads (and their pairs)..."
# python3 ${fetch_mt_reads} ${bamfiles_1}/${id}_b37.dedup.bam | \
# ${samtools} view -@ 16 -b -o ${bamfiles_1}/${id}_mt_reads_and_mates.bam -
pipeline_2.sh
0 → 100644
View file @
8297588c
#!/bin/bash
# exit when any command fails
set
-e
trap
'last_command=$current_command; current_command=$BASH_COMMAND'
DEBUG
trap
'echo "\"${last_command}\" command filed with exit code $?"'
EXIT
if
[
"
$1
"
==
""
]
then
echo
"Provide ID!!"
exit
1
fi
wd
=
"/data/bs"
id
=
$1
ref_dir
=
"/data/ref"
fastq_dir
=
"/data/bs/trimmed_fq"
# Programs
bismark
=
"/usr/local/bin/bismark-0.22.3"
deduplicate_bismark
=
"/usr/local/bin/deduplicate_bismark-0.22.3"
bismark_meth_extract
=
"/usr/local/bin/bismark_methylation_extractor-0.22.3"
bowtie2
=
"/usr/local/bin/bowtie2-2.4.41"
bowtie_path
=
"/opt/bowtie2-2.4.41/"
samtools
=
"/usr/local/bin/samtools-1.10"
samtools_path
=
"/opt/samtools-1.10"
fetch_mt_reads
=
"/data/bin/fetch_mt_and_mate/fetch_mt_and_mate.py"
#samviewwithmate="/data/bin/jvarkit/dist/samviewwithmate.jar"
#methylKit_extract="~/scripts/extractMethylation.R"
# References
#b37_ref=${ref_dir}/b37_decoy/
#nuc_ref=${ref_dir}/b37_decoy_no_mt/
mt_ref
=
${
ref_dir
}
/b37_decoy_only_mt/
# Output folders
bam_dir
=
${
wd
}
/bamfiles_2
mkdir
-p
${
bam_dir
}
meth_dir
=
${
wd
}
/meth_2
mkdir
-p
${
meth_dir
}
tmp_dir
=
${
wd
}
/tmp_2
mkdir
-p
${
tmp_dir
}
log_dir
=
${
wd
}
/logs_2
mkdir
-p
${
log_dir
}
# Get fastq files
r1_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R1.fastq.gz |
paste
-sd
","
-
)
r2_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R2
*
fastq.gz |
paste
-sd
","
-
)
echo
"R1 FILES:
${
r1_files
}
"
echo
"R2 FILES:
${
r2_files
}
"
echo
"#### STRATEGY 2: Align vs mitochondrial genome only (mt), dedup, and extract
\
methylation"
echo
" 2.1 Aligning..."
${
bismark
}
${
mt_ref
}
-1
${
r1_files
}
-2
${
r2_files
}
--un
--ambiguous
--multicore
12
\
-o
${
bam_dir
}
--temp_dir
${
tmp_dir
}
--path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
>
${
log_dir
}
/2_1_aln.stdout 2>
${
log_dir
}
/2_1_aln.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 2.1"
exit
1
fi
echo
" 2.2 Merging unaligned and ambiguos reads..."
unaligned_list_r1
=
""
ambig_list_r1
=
""
unaligned_list_r2
=
""
ambig_list_r2
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bam_dir
}
/
${
uns
}
_unmapped_reads_1.fq.gz"
unaligned_list_r1
=
"
${
unaligned_list_r1
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bam_dir
}
/
${
amb
}
_ambiguous_reads_1.fq.gz"
ambig_list_r1
=
"
${
ambig_list_r1
}
${
amb
}
"
done
IFS
=
','
read
-r
-a
array
<<<
"
${
r2_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bam_dir
}
/
${
uns
}
_unmapped_reads_2.fq.gz"
unaligned_list_r2
=
"
${
unaligned_list_r2
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bam_dir
}
/
${
amb
}
_ambiguous_reads_2.fq.gz"
ambig_list_r2
=
"
${
ambig_list_r2
}
${
amb
}
"
done
cat
${
unaligned_list_r1
}
>
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz
cat
${
ambig_list_r1
}
>
${
bam_dir
}
/
${
id
}
_ambiguous.R1.fastq.gz
cat
${
unaligned_list_r2
}
>
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz
cat
${
ambig_list_r2
}
>
${
bam_dir
}
/
${
id
}
_ambiguous.R2.fastq.gz
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz"
]
;
then
rm
${
unaligned_list_r1
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz"
]
;
then
rm
${
unaligned_list_r2
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_ambiguous.R1.fastq.gz"
]
;
then
rm
${
ambig_list_r1
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_ambiguous.R2.fastq.gz"
]
;
then
rm
${
ambig_list_r2
}
fi
echo
" 2.3 Merging bamfiles..."
bamfile_list
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
bamfile
=
$(
basename
${
element
}
)
bamfile
=
"
${
bam_dir
}
/
${
bamfile
%.fastq.gz
}
_bismark_bt2_pe.bam"
bamfile_list
=
"
${
bamfile_list
}
${
bamfile
}
"
done
${
samtools
}
merge
-n
-r
-@ 64
${
bam_dir
}
/
${
id
}
_mt.bam
${
bamfile_list
}
>
\
${
log_dir
}
/2_3_merge.stdout 2>
\
${
log_dir
}
/2_3_merge.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 2.3"
exit
1
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_mt.bam"
]
;
then
echo
" Removing lane bams..."
rm
${
bamfile_list
}
fi
echo
" 2.4 Dedupping alignment..."
${
deduplicate_bismark
}
--paired
--samtools_path
${
samtools_path
}
\
--output_dir
${
bam_dir
}
${
bam_dir
}
/
${
id
}
_mt.bam
>
\
${
log_dir
}
/2_4_dedup.stdout 2>
\
${
log_dir
}
/2_4_dedup.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 2.4"
exit
1
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_mt.deduplicated.bam"
]
;
then
mv
${
bam_dir
}
/
${
id
}
_mt.deduplicated.bam
${
bam_dir
}
/
${
id
}
_mt.dedup.bam
fi
echo
" 2.5 Extracting methylation in MT..."
${
bismark_meth_extract
}
-p
--comprehensive
--output
${
meth_dir
}
\
--gzip
--multicore
16
--cytosine_report
--genome_folder
${
mt_ref
}
\
--CX
--samtools_path
${
samtools_path
}
\
${
bam_dir
}
/
${
id
}
_mt.dedup.bam
>
\
${
log_dir
}
/2_5_meth.stdout 2>
\
${
log_dir
}
/2_5_meth.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 2.5"
exit
1
fi
echo
" 2.6 Fetching MT reads (and their pairs)..."
python3
${
fetch_mt_reads
}
${
bam_dir
}
/
${
id
}
_mt.dedup.bam |
\
${
samtools
}
view -@ 16
-b
-o
${
bam_dir
}
/
${
id
}
_mt_reads_and_mates.bam -
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 2.6"
exit
1
fi
pipeline_3.sh
0 → 100644
View file @
8297588c
#!/bin/bash
# exit when any command fails
set
-e
trap
'last_command=$current_command; current_command=$BASH_COMMAND'
DEBUG
trap
'echo "\"${last_command}\" command filed with exit code $?"'
EXIT
if
[
"
$1
"
==
""
]
then
echo
"Provide ID!!"
exit
1
fi
wd
=
"/data/bs"
id
=
$1
ref_dir
=
"/data/ref"
fastq_dir
=
"/data/bs/trimmed_fq"
# Programs
bismark
=
"/usr/local/bin/bismark-0.22.3"
deduplicate_bismark
=
"/usr/local/bin/deduplicate_bismark-0.22.3"
bismark_meth_extract
=
"/usr/local/bin/bismark_methylation_extractor-0.22.3"
bismark_bedGraph
=
"/usr/local/bin/bismark2bedGraph-0.22.3"
bowtie2
=
"/usr/local/bin/bowtie2-2.4.41"
bowtie_path
=
"/opt/bowtie2-2.4.41/"
samtools
=
"/usr/local/bin/samtools-1.10"
samtools_path
=
"/opt/samtools-1.10"
fetch_mt_reads
=
"/data/bin/fetch_mt_and_mate/fetch_mt_and_mate.py"
ext_mt_srt
=
"/data/bs/ext_mt_srt.py"
#samviewwithmate="/data/bin/jvarkit/dist/samviewwithmate.jar"
#methylKit_extract="~/scripts/extractMethylation.R"
# References
b37_ref
=
${
ref_dir
}
/b37_decoy/
nuc_ref
=
${
ref_dir
}
/b37_decoy_no_mt/
mt_ref
=
${
ref_dir
}
/b37_decoy_only_mt/
# Output folders
bam_dir
=
${
wd
}
/bamfiles_3
mkdir
-p
${
bam_dir
}
meth_dir
=
${
wd
}
/meth_3
mkdir
-p
${
meth_dir
}
tmp_dir
=
${
wd
}
/tmp_3
mkdir
-p
${
tmp_dir
}
log_dir
=
${
wd
}
/logs_3
mkdir
-p
${
log_dir
}
# Get fastq files
r1_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R1.fastq.gz |
paste
-sd
","
-
)
r2_files
=
$(
find
${
fastq_dir
}
-name
*${
id
}*
R2
*
fastq.gz |
paste
-sd
","
-
)
echo
"R1 FILES:
${
r1_files
}
"
echo
"R2 FILES:
${
r2_files
}
"
echo
"#### STRATEGY 3: Align vs nuclear DNA, collect unmapped and align them to mt DNA
\
methylation"
echo
" 3.1 Aligning vs nuclear DNA..."
${
bismark
}
${
nuc_ref
}
-1
${
r1_files
}
-2
${
r2_files
}
--un
--ambiguous
--multicore
12
\
-o
${
bam_dir
}
--temp_dir
${
tmp_dir
}
--path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
>
${
log_dir
}
/3_1_aln.stdout 2>
${
log_dir
}
/3_1_aln.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.1"
exit
1
fi
echo
" 3.2 Merging unaligned and ambiguos reads..."
unaligned_list_r1
=
""
ambig_list_r1
=
""
unaligned_list_r2
=
""
ambig_list_r2
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bam_dir
}
/
${
uns
}
_unmapped_reads_1.fq.gz"
unaligned_list_r1
=
"
${
unaligned_list_r1
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bam_dir
}
/
${
amb
}
_ambiguous_reads_1.fq.gz"
ambig_list_r1
=
"
${
ambig_list_r1
}
${
amb
}
"
done
IFS
=
','
read
-r
-a
array
<<<
"
${
r2_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
uns
=
$(
basename
${
element
}
)
uns
=
"
${
bam_dir
}
/
${
uns
}
_unmapped_reads_2.fq.gz"
unaligned_list_r2
=
"
${
unaligned_list_r2
}
${
uns
}
"
amb
=
$(
basename
${
element
}
)
amb
=
"
${
bam_dir
}
/
${
amb
}
_ambiguous_reads_2.fq.gz"
ambig_list_r2
=
"
${
ambig_list_r2
}
${
amb
}
"
done
cat
${
unaligned_list_r1
}
>
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz
cat
${
ambig_list_r1
}
>
${
bam_dir
}
/
${
id
}
_ambiguous.R1.fastq.gz
cat
${
unaligned_list_r2
}
>
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz
cat
${
ambig_list_r2
}
>
${
bam_dir
}
/
${
id
}
_ambiguous.R2.fastq.gz
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz"
]
;
then
rm
${
unaligned_list_r1
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz"
]
;
then
rm
${
unaligned_list_r2
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_ambiguous.R1.fastq.gz"
]
;
then
rm
${
ambig_list_r1
}
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_ambiguous.R2.fastq.gz"
]
;
then
rm
${
ambig_list_r2
}
fi
echo
" 3.3 Merging bamfiles..."
bamfile_list
=
""
IFS
=
','
read
-r
-a
array
<<<
"
${
r1_files
}
"
for
element
in
"
${
array
[@]
}
"
;
do
bamfile
=
$(
basename
${
element
}
)
bamfile
=
"
${
bam_dir
}
/
${
bamfile
%.fastq.gz
}
_bismark_bt2_pe.bam"
bamfile_list
=
"
${
bamfile_list
}
${
bamfile
}
"
done
${
samtools
}
merge
-n
-r
-@ 64
${
bam_dir
}
/
${
id
}
_nuc.bam
${
bamfile_list
}
>
\
${
log_dir
}
/3_3_merge.stdout 2>
\
${
log_dir
}
/3_3_merge.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.3"
exit
1
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_nuc.bam"
]
;
then
echo
" Removing lane bams..."
rm
${
bamfile_list
}
fi
## Deduplication not really needed
## echo " 3.4 Dedupping alignment..."
## ${deduplicate_bismark} --paired --samtools_path ${samtools_path} \
## --output_dir ${bam_dir} ${bam_dir}/${id}_nuc.bam > \
## ${log_dir}/3_4_dedup.stdout 2> \
## ${log_dir}/3_4_dedup.stderr
## if [ "$?" != "0" ]; then
## echo "ERROR IN STEP 3.4"
## exit 1
## fi
##
## if [ -s "${bam_dir}/${id}_nuc.deduplicated.bam" ]; then
## mv ${bam_dir}/${id}_nuc.deduplicated.bam ${bam_dir}/${id}_nuc.dedup.bam
## fi
echo
" 3.4 Re-aligning unmapped reads to mtDNA..."
${
bismark
}
${
mt_ref
}
-1
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz
\
-2
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz
--multicore
12
\
-o
${
bam_dir
}
--temp_dir
${
tmp_dir
}
--path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
>
\
${
log_dir
}
/3_4_mt_map.stdout 2>
\
${
log_dir
}
/3_4_mt_map.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.4"
exit
1
fi
mv
${
bam_dir
}
/
${
id
}
_unmapped.R1_bismark_bt2_pe.bam
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.bam
echo
" 3.5 Re-aligning unmapped reads to nuc+mt..."
${
bismark
}
${
b37_ref
}
-1
${
bam_dir
}
/
${
id
}
_unmapped.R1.fastq.gz
\
-2
${
bam_dir
}
/
${
id
}
_unmapped.R2.fastq.gz
--multicore
12
\
-o
${
bam_dir
}
--temp_dir
${
tmp_dir
}
--path_to_bowtie
${
bowtie_path
}
\
--samtools_path
${
samtools_path
}
>
\
${
log_dir
}
/3_5_b37_map.stdout 2>
\
${
log_dir
}
/3_5_b37_map.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.4"
exit
1
fi
mv
${
bam_dir
}
/
${
id
}
_unmapped.R1_bismark_bt2_pe.bam
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.bam
echo
" 3.6 Dedupping alignments..."
${
deduplicate_bismark
}
--paired
--samtools_path
${
samtools_path
}
\
--output_dir
${
bam_dir
}
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.bam
>
\
${
log_dir
}
/3_6_a_dedup.stdout 2>
\
${
log_dir
}
/3_6_a_dedup.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.6.a"
exit
1
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.deduplicated.bam"
]
;
then
mv
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.deduplicated.bam
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.dedup.bam
fi
${
deduplicate_bismark
}
--paired
--samtools_path
${
samtools_path
}
\
--output_dir
${
bam_dir
}
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.bam
>
\
${
log_dir
}
/3_6_b_dedup.stdout 2>
\
${
log_dir
}
/3_6_b_dedup.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.6.b"
exit
1
fi
if
[
-s
"
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.deduplicated.bam"
]
;
then
mv
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.deduplicated.bam
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.dedup.bam
fi
echo
" 3.7 Extracting methylation in MT..."
${
bismark_meth_extract
}
-p
--comprehensive
--output
${
meth_dir
}
\
--gzip
--multicore
16
\
--samtools_path
${
samtools_path
}
\
${
bam_dir
}
/
${
id
}
_unmapped_to_mt.dedup.bam
>
\
${
log_dir
}
/3_7_a_meth.stdout 2>
\
${
log_dir
}
/3_7_a_meth.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.7.a"
exit
1
fi
${
bismark_meth_extract
}
-p
--comprehensive
--output
${
meth_dir
}
\
--gzip
--multicore
16
\
--samtools_path
${
samtools_path
}
\
${
bam_dir
}
/
${
id
}
_unmapped_to_b37.dedup.bam
>
\
${
log_dir
}
/3_7_a_meth.stdout 2>
\
${
log_dir
}
/3_7_b_meth.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.7.b"
exit
1
fi
echo
" 3.8 Subsetting MT reads from methylation table..."
pigz
-dc
${
meth_dir
}
/CpG_context_
${
id
}
_unmapped_to_b37.dedup.txt.gz |
\
python3
${
ext_mt_srt
}
| pigz
-nc
>
\
${
meth_dir
}
/CpG_context_
${
id
}
_unmapped_to_b37_MT.dedup.txt.gz
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.8"
exit
1
fi
pigz
-dc
${
meth_dir
}
/CHH_context_
${
id
}
_unmapped_to_b37.dedup.txt.gz |
\
python3
${
ext_mt_srt
}
| pigz
-nc
>
\
${
meth_dir
}
/CHH_context_
${
id
}
_unmapped_to_b37_MT.dedup.txt.gz
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.8"
exit
1
fi
pigz
-dc
${
meth_dir
}
/CHG_context_
${
id
}
_unmapped_to_b37.dedup.txt.gz |
\
python3
${
ext_mt_srt
}
| pigz
-nc
>
\
${
meth_dir
}
/CHG_context_
${
id
}
_unmapped_to_b37_MT.dedup.txt.gz
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.8"
exit
1
fi
## PROBABLY WORTH REMOVING NON-MT FILES
#if [ -s "${meth_dir}/CpG_context_${id}_unmapped_to_b37_MT.dedup.bam" ]; then
# rm ${meth_dir}/CpG_context_${id}_unmapped_to_b37.dedup.bam
#fi
#if [ -s "${meth_dir}/CHH_context_${id}_unmapped_to_b37_MT.dedup.bam" ]; then
# rm ${meth_dir}/CHH_context_${id}_unmapped_to_b37.dedup.bam
#fi
#if [ -s "${meth_dir}/CHG_context_${id}_unmapped_to_b37_MT.dedup.bam" ]; then
# rm ${meth_dir}/CHG_context_${id}_unmapped_to_b37.dedup.bam
#fi
#
#
#
echo
" 3.9 Generating bedGraph for MT..."
${
bismark_bedGraph
}
--output
${
id
}
_MT.bedGraph
\
--dir
${
meth_dir
}
--CX
--buffer_size
100G
\
${
meth_dir
}
/CpG_context_
${
id
}
_unmapped_to_mt.dedup.txt.gz
\
${
meth_dir
}
/CHH_context_
${
id
}
_unmapped_to_mt.dedup.txt.gz
\
${
meth_dir
}
/CHG_context_
${
id
}
_unmapped_to_mt.dedup.txt.gz
>
\
${
log_dir
}
/3_9_a_bedgraph.stdout 2>
\
${
log_dir
}
/3_9_a_bedgraph.stderr
if
[
"
$?
"
!=
"0"
]
;
then
echo
"ERROR IN STEP 3.9a"
exit
1
fi
${
bismark_bedGraph
}
--output
${
id
}
_b37_then_MT.bedGraph
\
--dir
${
meth_dir
}
--CX
--buffer_size
100G
\
${
meth_dir
}
/CpG_context_
${
id
}
_unmapped_to_b37_MT.dedup.txt.gz
\
${
meth_dir
}
/CHH_context_
${
id
}
_unmapped_to_b37_MT.dedup.txt.gz
\