Commit f1f7fb04 authored by Gonzalo S Nido's avatar Gonzalo S Nido
Browse files

Important updates (Closes #1)

parent 60eb9f6f
#!/bin/bash
set -e
trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
trap 'if [ "$?" -ne "0" ]; then echo "\"${last_command}\" command filed with exit code different from 0"; fi' EXIT
# set -e
# trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
# trap 'if [ "$?" -ne "0" ]; then echo "\"${last_command}\" command filed with exit code different from 0"; fi' EXIT
if [[ $# -ne 3 ]]; then
###### FUNCTIONS
usage()
{
echo ""
echo "USAGE:"
echo "process_by_sampleid.sh <SAMPLE_ID> <STORAGE_ACCT_FASTQ>/<FILESHARE_FASTQ> <STORAGE_ACCT_RESULTS>/<FILESHARE_RESULTS_PREFIX>"
echo "process_by_sampleid.sh [OPTIONS] -s <SAMPLE_ID> -i <STORAGE_ACCT_FASTQ>/<FILESHARE_FASTQ> -o <STORAGE_ACCT_RESULTS>/<FILESHARE_RESULTS_PREFIX>"
echo " - SAMPLE_ID: e.g. SL284409"
echo " - STORAGE_ACCT_FASTQ: e.g. forneurosysmedutvcompute"
echo " - FILESHARE_FASTQ: Name of the fileshare, e.g. wgbsfqdata"
echo " - STORAGE_ACCT_RESULTS: e.g. forneurosysmedutvcompute"
echo ""
echo " EXTRA OPTIONS"
echo " -f --force"
echo " Overwrite previous results, if any."
echo " -m --monitor"
echo " Monitor RAM and CPU usage."
echo " -u --subset INTEGER"
echo " Subset reads to test pipeline (to INTEGER)."
echo " --strategies STRATEGIES"
echo " Only run selected strategies. Possible choices of STRATEGIES: "1", "2", "3""
echo " or any combination of them, comma-separated (e.g. "1,2,3", "2,3")."
echo " --input-samba-credentials FILE"
echo " File with the samba credentials to mount input fileshare (fastqs)."
echo " --output-samba-credentials FILE"
echo " File with the samba credentials to mount output fileshare (results)."
echo "There should be files in the /etc/smbcredentials/ folder named <STORAGE_ACCT_FASTQ>.cred and <STORAGE_ACCT_RESULTS>.cred (the storage account can be the same for both input fastqs and results, then only one file is needed). This/these file/s contain the username=... and the password=... lines with the storage account name and the key."
echo ""
echo "For the results, a new fileshare will be created for the sample analysed, whose name will be a concatenation of the form <FILESHARE_RESULTS_PREFIX>-<SAMPLE_ID>. If the fileshare name exists and is not empty, the program will fail."
echo ""
echo "EXAMPLE: process_by_sampleid.sh SL284405 forneurosysmedutvcompute/wgbsfqdata forneurosysmedutvcompute/prefix"
exit 1
}
###### PARSE CMDLINE
sample_id=
storageAccountFastq=
shareNameFastq=
storageAccountResults=
shareNameResults=
FORCE=0
MONITOR=0
SUBSET=0
STRATEGIES="1,2,3"
smbCredFastq=
smbCredResults=
while [ "$1" != "" ]; do
case $1 in
-s | --sample-id ) shift
sample_id=$1
;;
-i | --input-storage-account ) shift
storageAccountFastq=${1%/*}
shareNameFastq=${1#*/}
;;
-o | --output-storage-account ) shift
storageAccountResults=${1%/*}
shareNameResults=${1#*/}
;;
--input-samba-credentials ) shift
smbCredFastq=$1
;;
--output-samba-credentials ) shift
smbCredResults=$1
;;
-f | --force ) FORCE=1
;;
-m | --monitor ) MONITOR=1
;;
-u | --subset ) shift
SUBSET=$1
;;
--strategies ) shift
STRATEGIES=$1
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done
if [[ "$sample_id" == "" ]]; then
echo "[ERROR] -s option is mandatory."
exit 2
fi
if [[ "$storageAccountFastq" == "" ]]; then
echo "[ERROR] -i option is mandatory, e.g. forneurosysmedutvcompute/wgbsfqdata"
exit 2
fi
if [[ "$shareNameFastq" == "" ]]; then
echo "[ERROR] -i option is mandatory, e.g. forneurosysmedutvcompute/wgbsfqdata"
exit 2
fi
if [[ "$storageAccountResults" == "" ]]; then
echo "[ERROR] -o option is mandatory, e.g. forneurosysmedutvcompute/prefix"
exit 2
fi
if [[ "$shareNameResults" == "" ]]; then
echo "[ERROR] -o option is mandatory, e.g. forneurosysmedutvcompute/prefix"
exit 2
fi
if [[ "$SUBSET" == "" ]]; then
echo "[ERROR] -u option needs an integer, e.g. -u 10000"
exit 2
fi
re='^[0-9]+$'
if ! [[ $SUBSET =~ $re ]]; then
echo "[ERROR] -u option needs and integer, e.g. -u 100000"
exit 2
fi
sample_id=$1
storageAccountFastq=${2%/*}
shareNameFastq=${2#*/}
storageAccountResults=${3%/*}
shareNameResults=${3#*/}-${sample_id}
shareNameResults=$( echo ${shareNameResults} | tr '[:upper:]' '[:lower:]' )
shareNameResults=$( echo ${shareNameResults}-${sample_id} | tr '[:upper:]' '[:lower:]' )
if [[ "$smbCredFastq" == "" ]]; then
smbCredFastq=/etc/smbcredentials/${storageAccountFastq}.cred
echo "[WARNING] Using default samba credentials file path: $smbCredFastq"
fi
if [[ "$smbCredResults" == "" ]]; then
smbCredResults=/etc/smbcredentials/${storageAccountResults}.cred
echo "[WARNING] Using default samba credentials file path: $smbCredResults"
fi
smbCredFastq=/etc/smbcredentials/${storageAccountFastq}.cred
smbCredResults=/etc/smbcredentials/${storageAccountResults}.cred
echo
echo "INPUT / VARIABLES"
......@@ -41,6 +143,10 @@ echo "- storageAccountResults: ${storageAccountResults}"
echo "- shareNameResults: ${shareNameResults}"
echo "- smbCredFastq: ${smbCredFastq}"
echo "- smbCredResults: ${smbCredResults}"
echo "- FORCE: $FORCE"
echo "- MONITOR: $MONITOR"
echo "- SUBSET: $SUBSET"
echo "- STRATEGIES: ${STRATEGIES}"
echo
echo "ACTUALLY DOING STUFF"
......@@ -66,6 +172,7 @@ fi
echo " [DONE]"
echo ">> Preparing input file share... "
# Check that fastq file share exists
exists=$( az storage share exists --account-name ${storageAccountFastq} --account-key ${storageAccountKeyFastq} --name ${shareNameFastq} --output tsv )
......@@ -102,14 +209,9 @@ echo " *Input FASTQ file share mounted in \"${fastq_folder}\""
echo ">> Preparing output file share... "
# Check that results fileshare does NOT exist or issue warning and wait
exists=$( az storage share exists --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} --name "${shareNameResults}" --output tsv )
if [[ $exists == "True" ]]; then
echo " [WARNING] File share \"${shareNameResults}\" in \"${storageAccountResults}\" ALREADY EXISTS!"
echo -n " OVERWRITING FILES IN 30 SECONDS..."
for i in {30..01}; do
echo -ne "\b\b\b\b\b\b\b\b\b\b\b\b\b${i} SECONDS..."
sleep 1
done
echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b! "
if [[ $exists == "True" && $FORCE == 0 ]]; then
echo " [ERROR] File share \"${shareNameResults}\" in \"${storageAccountResults}\" ALREADY EXISTS!"
echo " To overwrite, use -f option"
fi
results_folder="/fileshares/${storageAccountResults}/${shareNameResults}"
......@@ -141,10 +243,13 @@ sudo mount -t cifs //${storageAccountResults}.file.core.windows.net/${shareNameR
if [[ $? != 0 ]]; then
echo " [ERROR] Could not mount file share for RESULTS"
exit 2
else
cd ${results_folder}
fi
echo " [DONE]"
echo " *Results file share mounted in \"${results_folder}\""
echo " *Results file share mounted in \"${results_folder}\""
......@@ -159,7 +264,7 @@ if [[ $num_files -eq 0 ]]; then
fi
echo " [DONE]"
echo " *Number of FASTQ files retrieved: ${num_files}"
echo " *Number of FASTQ files found: ${num_files}"
declare -a R1
declare -a R2
......@@ -178,15 +283,28 @@ done
# RUNNIN' THE PIPELINE
opts=" -o ${results_folder} "
if [[ $FORCE != 0 ]]; then
opts="${opts}-f "
fi
if [[ $MONITOR != 0 ]]; then
opts="${opts}-m "
fi
if [[ $SUBSET != 0 ]]; then
opts="${opts}-u ${SUBSET} "
fi
opts="${opts}-s $STRATEGIES "
PIPELINE_CMD="bash /home/gsnido/wgbs-pipeline/wgbs-pipeline.sh${opts}${R1[*]} ${R2[*]}"
echo ">> RUNNING THE PIPELINE..."
echo
bash /home/gsnido/wgbs-pipeline/wgbs-pipeline.sh ${R1[*]} ${R2[*]} ${results_folder}
echo " CMD = \"${PIPELINE_CMD}\""
${PIPELINE_CMD}
echo
echo "**************FINISHED**************"
echo
# Get space used by files in share in GB
sleep 60
sleep 30
size1=$( az storage share stats --name ${shareNameResults} --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} )
size2=$( du -sh -BG ${results_folder} | cut -f 1 | tr -d 'G' )
size=$(( size1 > size2 ? size1 + 10 : size2 + 10))
......@@ -197,6 +315,11 @@ echo " Resizing fileshare quota to $size..."
az storage share update --name ${shareNameResults} --account-name ${storageAccountResults} --account-key ${storageAccountKeyResults} --quota ${size}
echo
cd ~
echo " Unmounting FASTQ folder..."
sudo umount ${fastq_folder}
echo " Unmounting results folder..."
sudo umount ${results_folder}
echo " [DONE]"
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment