Commit d22e6824 authored by Gonzalo S Nido's avatar Gonzalo S Nido
Browse files

Merge branch 'scp-sample' into 'master'

First ready-to-use Azure version

See merge request !1
parents 10abca86 84b7d4a3
......@@ -5,6 +5,7 @@ sudo apt update && sudo apt -y upgrade
echo "#### Installing some Ubuntu packages..."
sudo apt -y install unzip build-essential libncurses5-dev zlib1g-dev libbz2-dev liblzma-dev p7zip-full pigz
sudo apt -y install neovim
echo "#### Installing samtools 1.10..."
cd ~
......@@ -53,6 +54,13 @@ pip install psrecord
echo "export PATH=~/.local/bin/:$PATH" >> ~/.bashrc
echo "Fetching config files"
cd ~
git clone https://git.app.uib.no/gni042/configs.git
cd configs
bash INSTALL.sh
#R packages
#----------
#
......
#!/bin/bash
set -e
trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
trap 'if [ "$?" -ne "0" ]; then echo "\"${last_command}\" command filed with exit code different from 0"; fi' EXIT
if [[ $# -ne 3 ]]; then
echo ""
echo "USAGE:"
echo "process_by_sampleid.sh <SAMPLE_ID> <STORAGE_ACCT_FASTQ>/<FILESHARE_FASTQ> <STORAGE_ACCT_RESULTS>/<FILESHARE_RESULTS_PREFIX>"
echo " - SAMPLE_ID: e.g. SL284409"
echo " - STORAGE_ACCT_FASTQ: e.g. forneurosysmedutvcompute"
echo " - FILESHARE_FASTQ: Name of the fileshare, e.g. wgbsfqdata"
echo " - STORAGE_ACCT_RESULTS: e.g. forneurosysmedutvcompute"
echo ""
echo "There should be files in the /etc/smbcredentials/ folder named <STORAGE_ACCT_FASTQ>.cred and <STORAGE_ACCT_RESULTS>.cred (the storage account can be the same for both input fastqs and results, then only one file is needed). This/these file/s contain the username=... and the password=... lines with the storage account name and the key."
echo ""
echo "For the results, a new fileshare will be created for the sample analysed, whose name will be a concatenation of the form <FILESHARE_RESULTS_PREFIX>-<SAMPLE_ID>. If the fileshare name exists and is not empty, the program will fail."
echo ""
echo "EXAMPLE: process_by_sampleid.sh SL284405 forneurosysmedutvcompute/wgbsfqdata forneurosysmedutvcompute/prefix"
exit 1
fi
sample_id=$1
storageAccountFastq=${2%/*}
shareNameFastq=${2#*/}
storageAccountResults=${3%/*}
shareNameResults=${3#*/}-${sample_id}
shareNameResults=$( echo ${shareNameResults} | tr '[:upper:]' '[:lower:]' )
smbCredFastq=/etc/smbcredentials/${storageAccountFastq}.cred
smbCredResults=/etc/smbcredentials/${storageAccountResults}.cred
echo
echo "INPUT / VARIABLES"
echo "================="
echo "- sample_id: ${sample_id}"
echo "- storageAccountFastq: ${storageAccountFastq}"
echo "- shareNameFastq: ${shareNameFastq}"
echo "- storageAccountResults: ${storageAccountResults}"
echo "- shareNameResults: ${shareNameResults}"
echo "- smbCredFastq: ${smbCredFastq}"
echo "- smbCredResults: ${smbCredResults}"
echo
echo "ACTUALLY DOING STUFF"
echo "===================="
# Check credential files exist
echo ">> Getting keys (needs sudo)..."
if [[ ! -s $smbCredFastq ]]; then
echo " [ERROR] File \"$smbCredFastq\" does not exist"
exit 2
fi
if [[ ! -s $smbCredResults ]]; then
echo " [ERROR] File \"$smbCredResults\" does not exist"
exit 2
fi
storageAccountKeyFastq=$( sudo grep '^password=' $smbCredFastq | sed 's/^password=//' )
storageAccountKeyResults=$( sudo grep '^password=' $smbCredResults | sed 's/^password=//' )
if [[ $storageAccountKeyFastq == "" || $storageAccountKeyResults == "" ]]; then
echo " [ERROR] Samba credentials need to have \"username\" and \"passoword\" fields"
exit 2
fi
echo " [DONE]"
echo ">> Preparing input file share... "
# Check that fastq file share exists
exists=$( az storage share exists --account-name ${storageAccountFastq} --account-key ${storageAccountKeyFastq} --name ${shareNameFastq} --output tsv )
if [[ $exists != "True" ]]; then
echo " [ERROR] File share \"${shareNameFastq}\" in storage account \"${storageAccountFastq}\" does not exist"
exit 2
fi
fastq_folder="/fileshares/${storageAccountFastq}/${shareNameFastq}"
sudo mkdir -p ${fastq_folder}
# Check if is mounted
isMounted=$( { grep -cs "${fastq_folder}" /proc/mounts || true; } )
if [[ $isMounted != 0 ]]; then
echo " *Already mounted, remounting..."
sudo umount ${fastq_folder}
fi
# Mount it
sudo mount -t cifs //${storageAccountFastq}.file.core.windows.net/${shareNameFastq} \
${fastq_folder} \
-o vers=3.0,credentials=/etc/smbcredentials/${storageAccountFastq}.cred,dir_mode=0755,file_mode=0644,serverino,gid=1000,uid=1000,forceuid,forcegid
if [[ $? != 0 ]]; then
echo " [ERROR] Could not mount file share for FASTQ files"
exit 2
fi
echo " [DONE]"
echo " *Input FASTQ file share mounted in \"${fastq_folder}\""
echo ">> Preparing output file share... "
# Check that results fileshare does NOT exist or issue warning and wait
exists=$( az storage share exists --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} --name "${shareNameResults}" --output tsv )
if [[ $exists == "True" ]]; then
echo " [WARNING] File share \"${shareNameResults}\" in \"${storageAccountResults}\" ALREADY EXISTS!"
echo -n " OVERWRITING FILES IN 30 SECONDS..."
for i in {30..01}; do
echo -ne "\b\b\b\b\b\b\b\b\b\b\b\b\b${i} SECONDS..."
sleep 1
done
echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b! "
fi
results_folder="/fileshares/${storageAccountResults}/${shareNameResults}"
# Create it
az storage share create \
--account-name ${storageAccountResults} \
--account-key ${storageAccountKeyResults} \
--name "${shareNameResults}" \
--quota 3072 \
--output none
sudo mkdir -p ${results_folder}
# Check if is mounted
isMounted=$( { grep -cs "${results_folder}" /proc/mounts || true; } )
if [[ $isMounted != 0 ]]; then
echo " *Already mounted, remounting..."
sudo umount ${results_folder}
fi
sudo mount -t cifs //${storageAccountResults}.file.core.windows.net/${shareNameResults} \
${results_folder} \
-o vers=3.0,credentials=/etc/smbcredentials/${storageAccountResults}.cred,dir_mode=0755,file_mode=0644,serverino,gid=1000,uid=1000,forceuid,forcegid
if [[ $? != 0 ]]; then
echo " [ERROR] Could not mount file share for RESULTS"
exit 2
fi
echo " [DONE]"
echo " *Results file share mounted in \"${results_folder}\""
# Get fastq file names...
echo ">> Getting FASTQ filenames... "
all_files=($(find ${fastq_folder}/*${sample_id}*.fastq.gz))
num_files=${#all_files[@]}
if [[ $num_files -eq 0 ]]; then
echo " [ERROR] No fastq files retrieved"
exit 3
fi
echo " [DONE]"
echo " *Number of FASTQ files retrieved: ${num_files}"
declare -a R1
declare -a R2
# get R1 files and sort them
for file in "${all_files[@]}"; do
IFS='.' read -r -a array <<< "${file}"
if [[ ${array[1]} == "R1" ]]; then
echo " - R1: ${file}"
R1+=(${file})
else
echo " - R2: ${file}"
R2+=(${file})
fi
done
# RUNNIN' THE PIPELINE
echo ">> RUNNING THE PIPELINE..."
echo
bash /home/gsnido/wgbs-pipeline/wgbs-pipeline.sh ${R1[*]} ${R2[*]} ${results_folder}
echo
echo "**************FINISHED**************"
echo
# Get space used by files in share in GB
sleep 60
size1=$( az storage share stats --name ${shareNameResults} --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} )
size2=$( du -sh -BG ${results_folder} | cut -f 1 | tr -d 'G' )
size=$(( size1 > size2 ? size1 + 10 : size2 + 10))
echo " Output files use around $(( size - 10 )) G"
echo " Resizing fileshare quota to $size..."
az storage share update --name ${shareNameResults} --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} --quota ${size}
echo
echo " [DONE]"
......@@ -177,8 +177,8 @@ if $MONITOR; then
fi
if $SUBSET_READS; then
SUBSET_READS="-u 1000000"
echo "SUBSETTING TO 1000000 READS ONLY!!!!"
SUBSET_READS="-u 100000"
echo "SUBSETTING TO 100000 READS ONLY!!!!"
echo
fi
......@@ -445,6 +445,9 @@ if [ -s "${output_dir}/MT_${sample_id}.deduplicated.bam" ]; then
mv ${output_dir}/MT_${sample_id}.deduplicated.bam ${output_dir}/MT_${sample_id}.dedup.bam
fi
if [ -s "${output_dir}/MT_${sample_id}.dedup.bam" ]; then
rm ${output_dir}/MT_${sample_id}.bam
fi
echo -ne " 2.4 Extracting methylation... "
......@@ -642,6 +645,9 @@ if [ -s "${output_dir}/umap_to_MT_${sample_id}.deduplicated.bam" ]; then
mv ${output_dir}/umap_to_MT_${sample_id}.deduplicated.bam ${output_dir}/umap_to_MT_${sample_id}.dedup.bam
fi
if [ -s "${output_dir}/umap_to_MT_${sample_id}.dedup.bam" ]; then
rm ${output_dir}/umap_to_MT_${sample_id}.bam
fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment