From 7d8930801eb7542052d5c0481d2ef813643e4d17 Mon Sep 17 00:00:00 2001 From: khalid belkhir <khalid.belkhir@univ-montp2.fr> Date: Mon, 9 Dec 2019 15:33:07 +0100 Subject: [PATCH] first commit --- Dockerfile | 59 +++ README.md | 1 + deploy.sh | 118 ++++++ deployBigMem.sh | 67 ++++ files/Snakefile | 481 ++++++++++++++++++++++++ files/generate_multiqc_config.py | 43 +++ files/get_samples.py | 43 +++ files/params.total.yml | 360 ++++++++++++++++++ files/scripts/deseq2.script.R | 132 +++++++ files/scripts/edger.script.R | 111 ++++++ files/singularity.def | 122 ++++++ files/tools.py | 26 ++ install.sh | 2 + sagApp/R/chooser.R | 40 ++ sagApp/R/menugauche.R | 41 ++ sagApp/app.R | 238 ++++++++++++ sagApp/pages/pages_def_de.R | 77 ++++ sagApp/pages/pages_def_global_params.R | 41 ++ sagApp/pages/pages_def_quality_check.R | 22 ++ sagApp/pages/pages_def_quantification.R | 70 ++++ sagApp/pages/pages_def_trimming.R | 26 ++ sagApp/sagApp.Rproj | 15 + sagApp/server/opt_global.R | 315 ++++++++++++++++ sagApp/www/added_styles.css | 0 sagApp/www/chooser-binding.js | 84 +++++ waw_workflow.qsub | 28 ++ 26 files changed, 2562 insertions(+) create mode 100644 Dockerfile create mode 100644 deploy.sh create mode 100644 deployBigMem.sh create mode 100644 files/Snakefile create mode 100644 files/generate_multiqc_config.py create mode 100755 files/get_samples.py create mode 100644 files/params.total.yml create mode 100644 files/scripts/deseq2.script.R create mode 100644 files/scripts/edger.script.R create mode 100644 files/singularity.def create mode 100644 files/tools.py create mode 100644 install.sh create mode 100644 sagApp/R/chooser.R create mode 100644 sagApp/R/menugauche.R create mode 100644 sagApp/app.R create mode 100644 sagApp/pages/pages_def_de.R create mode 100644 sagApp/pages/pages_def_global_params.R create mode 100644 sagApp/pages/pages_def_quality_check.R create mode 100644 sagApp/pages/pages_def_quantification.R create mode 100644 sagApp/pages/pages_def_trimming.R create mode 100644 sagApp/sagApp.Rproj create mode 100644 sagApp/server/opt_global.R create mode 100644 sagApp/www/added_styles.css create mode 100644 sagApp/www/chooser-binding.js create mode 100644 waw_workflow.qsub diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8bad83c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,59 @@ +FROM mmassaviol/mbb_workflows_base:latest + +COPY files /workflow +COPY sagApp /sagApp + +RUN cd /opt/biotools \ + && wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.38.zip \ + && unzip Trimmomatic-0.38.zip \ + && echo -e '#!/bin/bash java -jar /opt/biotools/Trimmomatic-0.38/trimmomatic-0.38.jar' > bin/trimmomatic \ + && chmod 777 bin/trimmomatic \ + && rm Trimmomatic-0.38.zip + +RUN apt install -y fastqc=0.11.5+dfsg-6 + +RUN cd /opt/biotools \ + && wget https://github.com/pachterlab/kallisto/releases/download/v0.45.0/kallisto_linux-v0.45.0.tar.gz \ + && tar -zxvf kallisto_linux-v0.45.0.tar.gz \ + && mv kallisto_linux-v0.45.0/kallisto bin \ + && rm -r kallisto_linux-v0.45.0.tar.gz kallisto_linux-v0.45.0 + +RUN cd /opt/biotools \ + && wget https://github.com/COMBINE-lab/salmon/releases/download/v1.0.0/salmon-1.0.0_linux_x86_64.tar.gz \ + && tar -zxvf salmon-1.0.0_linux_x86_64.tar.gz \ + && cd bin && ln -s ../salmon-latest_linux_x86_64/bin/salmon salmon \ + && cd .. && rm -r salmon-1.0.0_linux_x86_64.tar.gz salmon-latest_linux_x86_64/sample_data.tgz + +RUN Rscript -e 'BiocManager::install("limma", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("edgeR", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("tximport", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("rhdf5", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("GenomicFeatures", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("DESeq2", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("apeglm", version = "3.8",Ncpus=8, clean=TRUE)' + +RUN Rscript -e 'BiocManager::install("BiocParallel", version = "3.8",Ncpus=8, clean=TRUE)' + + +#This part is necessary to run on ISEM cluster +RUN mkdir -p /share/apps/lib \ + && mkdir -p /share/apps/gridengine \ + && mkdir -p /share/bio \ + && mkdir -p /opt/gridengine \ + && mkdir -p /export/scrach \ + && mkdir -p /usr/lib64 \ + && ln -s /bin/bash /bin/mbb_bash \ + && ln -s /bin/bash /bin/isem_bash \ + && /usr/sbin/groupadd --system --gid 400 sge \ + && /usr/sbin/useradd --system --uid 400 --gid 400 -c GridEngine --shell /bin/true --home /opt/gridengine sge + +EXPOSE 3838 +CMD ["Rscript", "-e", "setwd('/sagApp/'); shiny::runApp('/sagApp/app.R',port=3838 , host='0.0.0.0')"] diff --git a/README.md b/README.md index e69de29..419bf4b 100644 --- a/README.md +++ b/README.md @@ -0,0 +1 @@ +Rnaseq reads can be cleaned and/or trimmed before quantifying abundances of transcripts (with Kallisto or Salmon). Differentially expressed genes can then be checked with edgeR or deseq2. \ No newline at end of file diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000..fdd1536 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# This script is executed on the virtual machine during the *Deployment* phase. +# It is used to apply parameters specific to the current deployment. +# It is executed secondly during a cloud deployement in IFB-Biosphere, after the *Installation* phase. +if [ $# -lt 1 ] +then + APP_IMG="mbbteam/rnaseqde:latest" +else + IMG_SRC=$1 + case $IMG_SRC in + ifb) + APP_IMG="gitlab-registry.in2p3.fr/ifb-biosphere/apps/rnaseqde:master" ;; + docker ) + APP_IMG="mbbteam/rnaseqde:latest" ;; + local) + docker build . -t rnaseqde:latest + APP_IMG="rnaseqde:latest" ;; + mbb) + #APP_IMG="X.X.X.X:5000/rnaseqde:latest" ;; + esac +fi + + +# Tuning if site proxy or not +#CLOUD_SERVICE = $(ss-get cloudservice) +#CLOUD_SERVICE="ifb-genouest-genostack" + +#HOST_NAME=$( ss-get --timeout=3 hostname ) +HOST_NAME="192.168.100.49" +#if [ "$CLOUD_SERVICE" == "ifb-genouest-genostack" ]; then + # Cloud site WITH a site proxy +# APP_PORT=80 +# PROXIED_IP=$( echo $HOST_NAME | sed "s|\.|-|g") +# HOST_NAME="openstack-${PROXIED_IP}.genouest.org" +# HTTP_ENDP="https://$HOST_NAME" + +# systemctl stop nginx +#else + # Cloud site WOUT a site proxy + APP_PORT=8787 + HTTP_ENDP="https://$HOST_NAME" + + openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/ssl/private/nginx-selfsigned.key -out /etc/ssl/certs/nginx-selfsigned.crt -subj "/C=FR/ST=AURA/L=Lyon/O=IFB/OU=IFB-biosphere/CN=myrstudio.biosphere.france-bioinformatique.fr" + openssl dhparam -out /etc/ssl/certs/dhparam.pem 2048 + + mkdir -p /etc/nginx/snippets + echo "ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;" > /etc/nginx/snippets/self-signed.conf + echo "ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;" >> /etc/nginx/snippets/self-signed.conf + + cp system/nginx_snippets_ssl-params.conf /etc/nginx/snippets/ssl-params.conf + + cp /etc/nginx/sites-available/default /etc/nginx/sites-available/default.bak + + cp system/nginx_sites-available_default /etc/nginx/sites-available/default + sed -i "s|server_domain_or_IP|$HOST_NAME|" /etc/nginx/sites-available/default + + useradd nginx + cp system/nginx_nginx.conf /etc/nginx/nginx.conf + + cp system/nginx_conf.d_10-rstudio.conf /etc/nginx/conf.d/10-rstudio.conf + sed -i "s|example.com|$HOST_NAME|" /etc/nginx/conf.d/10-rstudio.conf + + systemctl restart nginx + systemctl enable nginx +#fi + +# Docker volumes + +# mydatalocal: from the system disk or ephemeral one +IFB_DATADIR="/ifb/data/" +source /etc/profile.d/ifb.sh +VOL_NAME="mydatalocal" +VOL_DEV=$(readlink -f -n $IFB_DATADIR/$VOL_NAME ) +DOCK_VOL=" --mount type=bind,src=$VOL_DEV,dst=$IFB_DATADIR/$VOL_NAME" + +# MBB Workflows reads data from /Data and write results to /Results +mkdir ${VOL_DEV}/Data +mkdir ${VOL_DEV}/Results +DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Data,dst=/Data" +DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Results,dst=/Results" + +# NFS mounts: from ifb_share configuration in autofs +IFS_ORI=$IFS +while IFS=" :" read VOL_NAME VOL_TYPE VOL_IP VOL_DEV ; do + DOCK_VOL+=" --mount type=volume,volume-driver=local,volume-opt=type=nfs,src=$VOL_NAME,dst=$IFB_DATADIR/$VOL_NAME,volume-opt=device=:$VOL_DEV,volume-opt=o=addr=$VOL_IP" +done < /etc/auto.ifb_share +IFS=$IFS_ORI + + + +CONTAINER_ID=$( docker run -d -p $APP_PORT:3838 $DOCK_VOL $APP_IMG ) + +VM_IP=$(curl bot.whatismyipaddress.com) + +if [ $CONTAINER_ID ] +then + echo " " + echo You have to put your Data on : ${VOL_DEV}/Data + echo " " + echo Results will be written to : ${VOL_DEV}/Results + echo " " + echo You can access the workflow interface at : https://${VM_IP} + echo " " + echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash + echo " " + echo To run the workflow without the interface : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile config --cores XX + echo " " + echo config est un fichier de configuration qui doit être dans un sous dossier de ${VOL_DEV}/Data ou ${VOL_DEV}/Results + echo " " + echo ex. si fichier dans ${VOL_DEV}/Data/run1/maconfig1.yml : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile /Data/run1/maconfig1.yml --cores XX + echo " " + echo Vous pouvez utiliser l''interface graphique pour générer un fichier de configuration. + echo " " + echo XX étant le nombre de coeurs qui seront utilisés par le workflow. +else + echo Failed to run the docker container !! +fi diff --git a/deployBigMem.sh b/deployBigMem.sh new file mode 100644 index 0000000..ce74e5a --- /dev/null +++ b/deployBigMem.sh @@ -0,0 +1,67 @@ +#!/bin/bash +#This script will help a deployment of a docker image on an MBB bigmem machine +if [ $# -lt 1 ] +then + APP_IMG="mbbteam/rnaseqde:latest" +else + IMG_SRC=$1 + case $IMG_SRC in + docker ) + APP_IMG="mbbteam/rnaseqde:latest" ;; + local) + docker build . -t rnaseqde:latest + APP_IMG="rnaseqde:latest" ;; + mbb) + #APP_IMG="X.X.X.X:5000/rnaseqde:latest" ;; + esac +fi + +#essayer une plage de ports entre 8787 et 8800 +#APP_PORT=$2 +APP_PORT=8787 +while [[ $(ss -tulw | grep $APP_PORT) != "" && $APP_PORT < 8800 ]] +do + APP_PORT=$(( $APP_PORT + 1)) +done + +if [[ $(ss -tulw | grep $APP_PORT) != "" ]] +then + echo "No tcp port available !!" + exit -1 +fi + +# Docker volumes +#realUSER=$(who am i | awk '{print $1}') +if [ $SUDO_USER ]; then realUSER=$SUDO_USER; else realUSER=`whoami`; fi +VOL_DEV=/media/bigvol/$realUSER +# MBB Workflows reads data from /Data and write results to /Results +mkdir -p ${VOL_DEV}/Data +mkdir -p ${VOL_DEV}/Results +DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Data,dst=/Data" +DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Results,dst=/Results" + + +CONTAINER_ID=$( docker run --rm -d -p $APP_PORT:3838 $DOCK_VOL $APP_IMG ) +if [ $CONTAINER_ID ] +then + echo " " + echo You have to put your Data on : ${VOL_DEV}/Data + echo " " + echo Results will be written to : ${VOL_DEV}/Results + echo " " + hostname -I | awk -v port=$APP_PORT '{print "You can access the workflow interface at : http://"$1":"port}' + echo " " + echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash + echo " " + echo To run the workflow without the interface : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile config --cores XX + echo " " + echo config est un fichier de configuration qui doit être dans un sous dossier de ${VOL_DEV}/Data ou ${VOL_DEV}/Results + echo " " + echo ex. si fichier dans ${VOL_DEV}/Data/run1/maconfig1.yml : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile /Data/run1/maconfig1.yml --cores XX + echo " " + echo Vous pouvez utiliser l''interface graphique pour générer un fichier de configuration. + echo " " + echo XX étant le nombre de coeurs qui seront utilisés par le workflow. +else + echo Failed to run the docker container !! +fi diff --git a/files/Snakefile b/files/Snakefile new file mode 100644 index 0000000..2997d61 --- /dev/null +++ b/files/Snakefile @@ -0,0 +1,481 @@ +import os +import re +import csv +import snakemake.utils + +SAMPLES = config["samples"] +STEPS = config["steps"] +PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"] +PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"] +OUTPUTS = config["outputs"] +PARAMS_INFO = config["params_info"] +config = config["params"] + +########## +# Inputs # +########## + +# Generic input functions +## get raw_reads +def raw_reads(): + inputs = dict() + if (config["SeOrPe"] == "PE"): + inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"], + inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"], + elif (config["SeOrPe"] == "SE"): + inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"], + else: + sys.exit("SeOrPe should be SE or PE") + return inputs + +## get reads (trimmed or raw) +def reads(): + inputs = dict() + if (config["trimming"] == "null"): + return raw_reads() + elif (config["trimming"] == "trimmomatic"): + if (config["SeOrPe"] == "SE"): + inputs["read"] = rules.trimmomatic_SE.output.read + elif (config["SeOrPe"] == "PE"): + inputs["read"] = rules.trimmomatic_PE.output.readFP + inputs["read2"] = rules.trimmomatic_PE.output.readRP + return inputs + +def get_groups(): + with open(config["group_file"], mode="r") as infile: + reader = csv.reader(infile,delimiter='\t') + return {row[0]:row[1] for row in reader} + +config["groups"] = get_groups() + +# Tools inputs functions +def fastqc_inputs(): + return raw_reads() + +def trimmomatic_inputs(): + return raw_reads() + +def kallisto_inputs(): + return reads() + +def salmon_inputs(): + return reads() + +def edger_inputs(): + inputs = dict() + if (config["quantification"] == 'kallisto'): + if (config["SeOrPe"] == "PE"): + inputs["counts"] = expand(rules.kallisto_quant_PE.output.counts,sample=SAMPLES) + else: + inputs["counts"] = expand(rules.kallisto_quant_SE.output.counts,sample=SAMPLES) + + elif (config["quantification"] == 'salmon'): + if (config["SeOrPe"] == "PE"): + inputs["counts"] = expand(rules.salmon_quant_PE.output.counts,sample=SAMPLES) + else: + inputs["counts"] = expand(rules.salmon_quant_SE.output.counts,sample=SAMPLES) + + return inputs + +def deseq2_inputs(): + return edger_inputs() + +def prepare_report_inputs(): + inputs = list() + for step in STEPS: + inputs.extend(step_outputs(step["name"])) + return inputs + +def prepare_report_scripts(): + scripts = list() + for step in STEPS: + tool = config[step["name"]] + script = tool+".prepare.report.R" + if (script in PREPARE_REPORT_SCRIPTS): + scripts.append("/workflow/scripts/"+script) + return scripts + +def prepare_report_outputs(): + outputs = list() + outputs.append(config["results_dir"] + "/outputs_mqc.csv") + for step in STEPS: + tool = config[step["name"]] + if (tool in PREPARE_REPORT_OUTPUTS.keys()): + for output in PREPARE_REPORT_OUTPUTS[tool]: + outputs.append(config["results_dir"]+"/"+tool+"/"+output) + return outputs + +def multiqc_inputs(): + # Need prepare_report inputs and outputs in case prepare_reports has no outputs + return prepare_report_outputs() + +########### +# Outputs # +########### + +def step_outputs(step): + outputs = list() + if (step == "trimming"): + if (config[step] == "trimmomatic"): + if(config["SeOrPe"] == "PE"): + outputs = expand(rules.trimmomatic_PE.output,sample=SAMPLES) + else: + outputs = expand(rules.trimmomatic_SE.output,sample=SAMPLES) + + elif (step == "quality_check"): + if (config[step] == "fastqc"): + if(config["SeOrPe"] == "PE"): + outputs = expand(rules.fastqc_PE.output,sample=SAMPLES) + else: + outputs = expand(rules.fastqc_SE.output,sample=SAMPLES) + + elif (step == "quantification"): + if (config[step] == "kallisto"): + if(config["SeOrPe"] == "PE"): + outputs = expand(rules.kallisto_quant_PE.output,sample=SAMPLES) + else: + outputs = expand(rules.kallisto_quant_SE.output,sample=SAMPLES) + elif (config[step] == "salmon"): + if(config["SeOrPe"] == "PE"): + outputs = expand(rules.salmon_quant_PE.output,sample=SAMPLES) + else: + outputs = expand(rules.salmon_quant_SE.output,sample=SAMPLES) + + elif (step == "DE"): + if (config[step] == "deseq2"): + outputs = rules.deseq2.output + elif (config[step] == "edger"): + outputs = rules.edger.output + + elif (step == "all"): + outputs = list(rules.multiqc.output) + + return outputs + +# get outputs for each choosen tools +def workflow_outputs(step): + outputs = list() + outputs.extend(step_outputs(step)) + return outputs + +######### +# Rules # +######### + +rule trimmomatic_PE: + input: + **trimmomatic_inputs() + output: + readFP = config["results_dir"]+"/"+config["trimmomatic_PE_output_dir"]+"/{sample}_forward_paired.fq.gz", + readFU = config["results_dir"]+"/"+config["trimmomatic_PE_output_dir"]+"/{sample}_forward_unpaired.fq.gz", + readRP = config["results_dir"]+"/"+config["trimmomatic_PE_output_dir"]+"/{sample}_reverse_paired.fq.gz", + readRU = config["results_dir"]+"/"+config["trimmomatic_PE_output_dir"]+"/{sample}_reverse_unpaired.fq.gz", + log: + config["results_dir"]+'/logs/trimmomatic/{sample}_trimmomatic_log.txt' + params: + trimmomatic_qc_score = config["trimmomatic_qc_score"] + threads: + config["trimmomatic_threads"] + shell: + "trimmomatic PE " + "{params.trimmomatic_qc_score} " + "-threads {threads} " + "{input} " + "{output} " + "|& tee {log}" + +rule trimmomatic_SE: + input: + **trimmomatic_inputs() + output: + read = config["results_dir"]+"/"+config["trimmomatic_SE_output_dir"]+"/{sample}_trimmed.fq.gz", + log: + config["results_dir"]+'/logs/trimmomatic/{sample}_trimmomatic_log.txt' + params: + trimmomatic_qc_score = config["trimmomatic_qc_score"] + threads: + config["trimmomatic_threads"] + shell: + "trimmomatic SE " + "{params.trimmomatic_qc_score} " + "-threads {threads} " + "{input} " + "{output} " + "|& tee {log}" + +ruleorder: trimmomatic_PE > trimmomatic_SE + + + +rule fastqc_SE: + input: + **fastqc_inputs() + output: + html = config["results_dir"]+'/'+config["fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.html', + zip = config["results_dir"]+'/'+config["fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', + log: config["results_dir"]+'/logs/fastqc/{sample}_fastqc_log.txt' + threads: 1 + params: + output_dir = config["results_dir"]+'/'+config["fastqc_SE_output_dir"] + shell: + "fastqc {input.read} -t {threads} -o {params.output_dir} |& tee {log}" + +rule fastqc_PE: + input: + **fastqc_inputs() + output: + html1 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.html', + zip1 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', + html2 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R2'+config["sample_suffix"].split(".")[0]+'_fastqc.html', + zip2 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R2'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', + log: config["results_dir"]+'/logs/fastqc/{sample}_fastqc_log.txt' + threads: 2 + params: + output_dir = config["results_dir"]+'/'+config["fastqc_PE_output_dir"] + shell: + "fastqc {input.read} {input.read2} -t {threads} -o {params.output_dir} |& tee {log}" + +ruleorder: fastqc_PE > fastqc_SE + + + +rule kallisto_index: + input: + fasta = config["kallisto_index_input"] + output: + index = config["results_dir"]+"/"+config["kallisto_index_output_dir"]+"/index.kidx" + log: + config["results_dir"]+'/logs/kallisto/index/kallisto_index_log.txt' + shell: + "kallisto index -i {output} {input} |& tee {log}" + +rule kallisto_quant_SE: + input: + **kallisto_inputs(), + index = config["results_dir"]+"/"+config["kallisto_index_output_dir"]+"/index.kidx" + output: + h5 = config["results_dir"]+"/"+config["kallisto_quant_SE_output_dir"]+"/{sample}/abundance.h5", + counts = config["results_dir"]+"/"+config["kallisto_quant_SE_output_dir"]+"/{sample}/abundance.tsv", + run_info = config["results_dir"]+"/"+config["kallisto_quant_SE_output_dir"]+"/{sample}/run_info.json" + log: + config["results_dir"]+'/logs/kallisto/quant/{sample}_kallisto_quant_log.txt' + threads: + config["kallisto_quant_threads"] + params: + fragment_length = config["kallisto_quant_fragment_length_SE"], + standard_deviation = config["kallisto_quant_standard_deviation_SE"], + output_dir = config["results_dir"]+"/"+config["kallisto_quant_SE_output_dir"]+"/{sample}" + shell: + "kallisto quant " + "-t {threads} " + "-i {input.index} " + "-o {params.output_dir} " + "--single " + "-l {params.fragment_length} " + "-s {params.standard_deviation} " + "{input.read} |& tee {log}" + +rule kallisto_quant_PE: + input: + **kallisto_inputs(), + index = config["results_dir"]+"/"+config["kallisto_index_output_dir"]+"/index.kidx" + output: + h5 = config["results_dir"]+"/"+config["kallisto_quant_PE_output_dir"]+"/{sample}/abundance.h5", + counts = config["results_dir"]+"/"+config["kallisto_quant_PE_output_dir"]+"/{sample}/abundance.tsv", + run_info = config["results_dir"]+"/"+config["kallisto_quant_PE_output_dir"]+"/{sample}/run_info.json" + log: + config["results_dir"]+'/logs/kallisto/quant/{sample}_kallisto_quant_log.txt' + threads: + config["kallisto_quant_threads"] + params: + stranded = config["kallisto_quant_stranded_PE"], + output_dir = config["results_dir"]+"/"+config["kallisto_quant_PE_output_dir"]+"/{sample}" + shell: + "kallisto quant " + "-t {threads} " + "-i {input.index} " + "-o {params.output_dir} " + "{input.read} {input.read2} |& tee {log}" + +ruleorder: kallisto_quant_PE > kallisto_quant_SE + +rule salmon_index: + input: config["salmon_index_input"] + output: directory(config["results_dir"]+"/"+config["salmon_index_output_dir"]+"/indexDir") + log: config["results_dir"]+'/logs/salmon/index/salmon_index_log.txt' + shell: + "salmon index -t {input} -i {output} |& tee {log}" + +rule salmon_quant_PE: + input: + **salmon_inputs(), + index = config["results_dir"]+"/"+config["salmon_index_output_dir"]+"/indexDir" + output: + counts = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/quant.sf", + lib_counts = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/lib_format_counts.json", + run_info = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/cmd_info.json" + log: config["results_dir"]+'/logs/salmon/quant/{sample}_salmon_quant_log.txt' + params: + output_dir = config["results_dir"]+"/"+config["salmon_quant_PE_output_dir"]+'/{sample}' + threads: config["salmon_quant_threads"] + shell: + "salmon quant " + "-i {input.index} " + "-l A " + "-1 {input.read} " + "-2 {input.read2} " + "-o {params.output_dir} " + "-p {threads} |& tee {log}" + +rule salmon_quant_SE: + input: + **salmon_inputs(), + index = config["results_dir"]+"/"+config["salmon_index_output_dir"]+"/indexDir" + output: + counts = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/quant.sf", + lib_counts = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/lib_format_counts.json", + run_info = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+"/{sample}/cmd_info.json" + log: + config["results_dir"]+'/logs/salmon/quant/{sample}_salmon_quant_log.txt' + params: + output_dir = config["results_dir"]+"/"+config["salmon_quant_SE_output_dir"]+'/{sample}' + threads: + config["salmon_quant_threads"] + shell: + "salmon quant " + "-i {input.index} " + "-l A " + "-r {input.read} " + "-o {params.output_dir} " + "-p {threads} |& tee {log}" + +ruleorder: salmon_quant_PE > salmon_quant_SE + +rule edger: + input: + **edger_inputs() + output: + de_table = config["results_dir"]+'/'+config["edger_output_dir"]+'/de_table.csv', + r_data = config["results_dir"]+'/'+config["edger_output_dir"]+'/data.RData', + PCA = config["results_dir"]+'/'+config["edger_output_dir"]+'/PCA_mqc.png', + Top_genes = config["results_dir"]+'/'+config["edger_output_dir"]+'/Top_genes_mqc.tsv', + Heatmap = config["results_dir"]+'/'+config["edger_output_dir"]+'/Heatmap_mqc.png', + log: config["results_dir"]+'/logs/edger/edger_log.txt' + script: + "scripts/edger.script.R" + +rule deseq2: + input: + **deseq2_inputs() + output: + de_table = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/de_table.csv', + r_data = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/data.RData', + PCA = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/PCA_mqc.png', + DE_Table = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/DE_Table_mqc.tsv', + MA_plot = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/MA_plot_mqc.png', + Heatmap = config["results_dir"]+'/'+config["deseq2_output_dir"]+'/Heatmap_mqc.png', + log: config["results_dir"]+'/logs/deseq2/deseq2_log.txt' + script: + "scripts/deseq2.script.R" + + + +rule prepare_report: + input: + *prepare_report_inputs(), + output: + *prepare_report_outputs(), + config_multiqc = config["results_dir"] + "/config_multiqc.yaml", + params_tab = config["results_dir"] + "/params_tab_mqc.csv" + params: + params_file = config["results_dir"]+"/params.yml", + results_dir = config["results_dir"] + log: + config["results_dir"]+"/logs/prepare_report_log.txt" + run: + # Specific scripts for each tool + for script in prepare_report_scripts(): + shell("Rscript "+script+" {params.params_file} |& tee {log}") + # Outputs files for Multiqc report + outfile = config["results_dir"] + "/outputs_mqc.csv" + head = """ +# description: 'This is the list of the files generated by each step of the workflow' +# section_name: 'Workflow outputs' +""" + with open(outfile,"w") as out: + out.write(head) + out.write("step\ttool\tfile\tdescription\n")#\tname + for step in STEPS: + tool = config[step["name"]] + i=1 + for command in OUTPUTS[tool]: + if ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))): + outputs = OUTPUTS[tool][command] + for files in outputs: + name = files["file"] if 'file' in files.keys() else files["directory"] + path = config[command+"_output_dir"] + "/" + name #config["results_dir"] +"/"+ + out.write(str(i)+"-"+step["title"]+"\t"+tool+"\t"+path+"\t"+files["description"]+"\n")#"\t"+files["name"]+ + i+=1 + + # Params list for Multiqc report + params_list = "params_name\tdescription\tvalue\n" + head = """# description: 'This is the list of the parameters for each rule' +# section_name: 'Workflow parameters' +""" + for step in STEPS: + tool = config[step["name"]] + for key, value in config.items(): + if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))): + if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()): + description = PARAMS_INFO[key]["label"] + else: + description = '' + params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n" + + with open(output.params_tab,"w") as out: + out.write(head) + out.write(params_list) + + # Config for Multiqc report + shell("python3 /workflow/generate_multiqc_config.py {params.params_file} {output.config_multiqc}") + +rule multiqc: + input: + multiqc_inputs(), + config_multiqc = config["results_dir"] + "/config_multiqc.yaml" + output: + multiqc_dir = directory(config["results_dir"]+"/multiqc_data") + params: + output_dir = config["results_dir"] + log: + config["results_dir"]+'/logs/multiqc/multiqc_log.txt' + shell: + "multiqc --config {input.config_multiqc} -f {params.output_dir} " + "-o {params.output_dir} |& tee {log}" + +# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps) +rule all: + input: + workflow_outputs("all") + output: + Snakefile = config["results_dir"]+"/workflow/Snakefile", + get_samples = config["results_dir"]+"/workflow/get_samples.py", + scripts = directory(config["results_dir"]+"/workflow/scripts"), + params = config["results_dir"]+"/workflow/params.yml" + params: + params_file = config["results_dir"]+"/params.yml", + shell: + "cp /workflow/Snakefile {output.Snakefile} && " + "cp /workflow/get_samples.py {output.get_samples} && " + "cp -r /workflow/scripts {output.scripts} && " + "cp {params.params_file} {output.params}" + +onsuccess: + print("Workflow finished, no error") + shell("touch "+config["results_dir"]+"/logs/workflow_end.ok") + +onerror: + print("An error occurred") + shell("cat {log} > "+config["results_dir"]+"/logs/workflow_end.error") + #shell("mail -s "an error occurred" youremail@provider.com < {log}") + diff --git a/files/generate_multiqc_config.py b/files/generate_multiqc_config.py new file mode 100644 index 0000000..dcf28a6 --- /dev/null +++ b/files/generate_multiqc_config.py @@ -0,0 +1,43 @@ +import re +import sys +from tools import * + +config = read_yaml(sys.argv[1]) + +def report_section_order(): + res = "skip_generalstats: true\n\n" + res += "report_section_order:\n" + res += " Rule_graph:\n" + res += " order: 990\n" + res += " params_tab:\n" + res += " order: 980\n" + res += " outputs:\n" + res += " order: 970\n" + cpt = 960 + for step in config["steps"]: + tool = config["params"][step["name"]] + if (config["multiqc"][tool] != "custom"): + res += " " + config["multiqc"][tool] + ":\n" + res += " " + "order: " + str(cpt) + "\n" + cpt += -10 + for rule in config["outputs"][tool]: + if ((config["params"]["SeOrPe"] == "SE" and not("_PE" in rule)) or (config["params"]["SeOrPe"] == "PE" and not("_SE" in rule))): + for output in config["outputs"][tool][rule]: + if("file" in output.keys() and "mqc" in output["file"] and '{' not in output["file"]): # case of dynamic files ({wildcard}_mqc.png) to deal with + section = re.sub('\_mqc.*$', '', output["file"]) + res += " " + section + ":\n" + res += " " + "order: " + str(cpt) + "\n" + cpt += -10 + + return res + +def main(): + res = "" + res += report_section_order() + + with open(sys.argv[2],"w") as out: + out.write(res) + +if __name__ == "__main__": + # execute only if run as a script + main() \ No newline at end of file diff --git a/files/get_samples.py b/files/get_samples.py new file mode 100755 index 0000000..a2eb44b --- /dev/null +++ b/files/get_samples.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix +# Needs 2 arguments: reads_directory, SeOrPe +# SeOrPe is SE for single end reads and PE for paired end reads +# Usage: ./get_samples.py reads_directory SeOrPe +import os +import re +import csv +import sys + +def sample_list(dir, SeOrPe): + samples = list() + suffixes = list() + files = os.listdir(dir) + if SeOrPe == "PE": + regex = re.compile(r"^(.+?)(_R1|_R2)(.+)") + else: + regex = re.compile(r"^(.+?)(\..*)") + for file in files: + res = re.match(regex, file) + if res: + if res.group(1) not in samples: + samples.append(res.group(1)) + if SeOrPe == "PE": + suffixes.append(res.group(3)) + else: + suffixes.append(res.group(2)) + + if (len(set(suffixes)) == 1 ): + return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]} + else: + exit("Files have different suffixes:" + ','.join(suffixes)) + +def main(): + if len(sys.argv) == 3: + print(sample_list(sys.argv[1],sys.argv[2])) + else: + exit("""Needs 2 arguments: reads_directory, SeOrPe +Usage: ./get_samples.py reads_directory SeOrPe""") + +if __name__ == "__main__": + # execute only if run as a script + main() diff --git a/files/params.total.yml b/files/params.total.yml new file mode 100644 index 0000000..93aa4f8 --- /dev/null +++ b/files/params.total.yml @@ -0,0 +1,360 @@ +pipeline: RNAseq +params: + results_dir: /Results + sample_dir: /Data + group_file_select: server + group_file: '' + SeOrPe: PE + trimming: 'null' + trimmomatic_PE_output_dir: trimmomatic PE + trimmomatic_threads: 4 + trimmomatic_qc_score: -phred64 + trimmomatic_SE_output_dir: trimmomatic + null_output_dir: '' + quality_check: fastqc + fastqc_SE_output_dir: fastqc_SE + fastqc_PE_output_dir: fastqc_PE + fastqc_threads: 4 + quantification: kallisto + kallisto_index_output_dir: kallisto/index + kallisto_index_input: '' + kallisto_index_input_select: server + kallisto_quant_SE_output_dir: kallisto/quant + kallisto_quant_index: kallisto/index/index.kidx + kallisto_quant_threads: 4 + kallisto_quant_fragment_length_SE: 300 + kallisto_quant_standard_deviation_SE: 2 + kallisto_quant_PE_output_dir: kallisto/quant + kallisto_quant_stranded_PE: '' + salmon_index_output_dir: salmon/index + salmon_index_input_select: server + salmon_index_input: '' + salmon_index_threads: 4 + salmon_quant_SE_output_dir: salmon/quant + index: salmon/index/indexDir + salmon_quant_threads: 4 + salmon_quant_PE_output_dir: salmon/quant + DE: edger + edger_output_dir: edger + edger_threads: 4 + edger_tx2gene: false + edger_annotations_select: server + edger_annotations: '' + edger_normfact: TMM + edger_dispersion: '0' + edger_testType: exactTest + deseq2_output_dir: deseq2 + deseq2_threads: 4 + deseq2_tx2gene: false + deseq2_annotations_select: server + deseq2_annotations: '' + deseq2_fitType: parametric + deseq2_betaPrior: false +samples: [] +groups: [] +steps: +- title: Trimming + name: trimming + tools: + - trimmomatic + - 'null' + default: 'null' +- title: Quality check + name: quality_check + tools: + - fastqc + - 'null' + default: fastqc +- title: Quantification + name: quantification + tools: + - kallisto + - salmon + default: kallisto +- title: Differential expression + name: DE + tools: + - edger + - deseq2 + default: edger +params_info: + results_dir: + type: output_dir + sample_dir: + type: input_dir + group_file_select: + type: select + group_file: + type: input_file + SeOrPe: + type: radio + trimmomatic_threads: + tool: trimmomatic + rule: trimmomatic_SE + type: numeric + label: Number of threads to use + trimmomatic_qc_score: + tool: trimmomatic + rule: trimmomatic_SE + type: radio + label: Quality score encoding + fastqc_threads: + tool: fastqc + rule: fastqc_PE + type: numeric + label: Number of threads to use + kallisto_index_input_select: + tool: kallisto + rule: kallisto_index + type: select + kallisto_index_input: + tool: kallisto + rule: kallisto_index + type: input_file + label: Reference fasta file + kallisto_quant_threads: + tool: kallisto + rule: kallisto_quant_PE + type: numeric + label: Number of threads to use + kallisto_quant_fragment_length_SE: + tool: kallisto + rule: kallisto_quant_SE + type: numeric + label: Estimated average fragment length + kallisto_quant_standard_deviation_SE: + tool: kallisto + rule: kallisto_quant_SE + type: numeric + label: Estimated standard deviation of fragment length + kallisto_quant_stranded_PE: + tool: kallisto + rule: kallisto_quant_PE + type: radio + label: For strand specific mode choose --fr-stranded if the first read is forward + and choose --rf-stranded if the first read is reverse + salmon_index_input_select: + tool: salmon + rule: salmon_index + type: select + salmon_index_input: + tool: salmon + rule: salmon_index + type: input_file + label: Reference fasta file + salmon_index_threads: + tool: salmon + rule: salmon_index + type: numeric + label: Number of threads to use for index creation + salmon_quant_threads: + tool: salmon + rule: salmon_quant_PE + type: numeric + label: Number of threads to use + edger_threads: + tool: edger + rule: edger + type: numeric + label: Number of threads to use + edger_tx2gene: + tool: edger + rule: edger + type: checkbox + label: 'Aggregate transcripts counts to gene counts : ' + edger_annotations_select: + tool: edger + rule: edger + type: select + edger_annotations: + tool: edger + rule: edger + type: input_file + label: 'Annotation file (gtf or gff) : ' + edger_normfact: + tool: edger + rule: edger + type: radio + label: Calculate normalization factors to scale the raw library sizes. + edger_dispersion: + tool: edger + rule: edger + type: text + label: 'Dispersion: either a numeric vector of dispersions or a character string + indicating that dispersions should be taken from the data.' + edger_testType: + tool: edger + rule: edger + type: radio + label: 'Test type: exactTest: Computes p-values for differential abundance for + each gene between two samples, conditioning on the total count for each gene. + The counts in each group are assumed to follow a binomial distribution + + glmLRT: Fits a negative binomial generalized log-linear model to the read counts + for each gene and conducts genewise statistical tests.' + deseq2_threads: + tool: deseq2 + rule: deseq2 + type: numeric + label: Number of threads to use + deseq2_tx2gene: + tool: deseq2 + rule: deseq2 + type: checkbox + label: 'Aggregate transcripts counts to gene counts : ' + deseq2_annotations_select: + tool: deseq2 + rule: deseq2 + type: select + deseq2_annotations: + tool: deseq2 + rule: deseq2 + type: input_file + label: 'Annotation file (gtf or gff) : ' + deseq2_fitType: + tool: deseq2 + rule: deseq2 + type: radio + label: Type of fitting of dispersions to the mean intensity + deseq2_betaPrior: + tool: deseq2 + rule: deseq2 + type: checkbox + label: 'betaPrior: whether or not to put a zero-mean normal prior on the non-intercept + coefficients.' +prepare_report_scripts: [] +prepare_report_outputs: {} +outputs: + trimmomatic: + trimmomatic_PE: + - name: readFP + file: '{sample}_forward_paired.fq.gz' + - name: readFU + file: '{sample}_forward_unpaired.fq.gz' + - name: readRP + file: '{sample}_reverse_paired.fq.gz' + - name: readRU + file: '{sample}_reverse_unpaired.fq.gz' + trimmomatic_SE: + - name: read + file: '{sample}_trimmed.fq.gz' + 'null': + 'null': [] + fastqc: + fastqc_SE: + - name: html + file: '{sample}_fastqc.html' + description: Rapport html fastqc + - name: zip + file: '{sample}_fastqc.zip' + description: Dossier zip fastqc + fastqc_PE: + - name: html1 + file: '{sample}_R1_fastqc.html' + description: Rapport html fastqc + - name: zip1 + file: '{sample}_R1_fastqc.zip' + description: Dossier zip fastqc + - name: html2 + file: '{sample}_R2_fastqc.html' + description: Rapport html fastqc + - name: zip2 + file: '{sample}_R2_fastqc.zip' + description: Dossier zip fastqc + kallisto: + kallisto_index: + - name: index + file: index.kidx + description: Index kallisto + kallisto_quant_SE: + - name: h5 + file: '{sample}/abundance.h5' + description: Abondances au format h5 + - name: counts + file: '{sample}/abundance.tsv' + description: Table d'abondance + - name: run_info + file: '{sample}/run_info.json' + description: "Informations sur l'ex\xE9cution" + kallisto_quant_PE: + - name: h5 + file: '{sample}/abundance.h5' + description: Abondances au format h5 + - name: counts + file: '{sample}/abundance.tsv' + description: Table d'abondance + - name: run_info + file: '{sample}/run_info.json' + description: "Informations sur l'ex\xE9cution" + salmon: + salmon_index: + - name: index + directory: indexDir + description: Index Salmon + salmon_quant_SE: + - name: lib_counts + file: salmon/quant/{sample}/lib_format_counts.json + description: Comptages au format JSON + - name: counts + file: salmon/quant/{sample}/quant.sf + description: Fichier de compte Salmon + - name: run_info + file: salmon/quant/{sample}/cmd_info.json + description: "Informations sur l'ex\xE9cution" + salmon_quant_PE: + - name: lib_counts + file: salmon/quant/{sample}/lib_format_counts.json + description: Comptages au format JSON + - name: counts + file: salmon/quant/{sample}/quant.sf + description: Fichier de compte Salmon + - name: run_info + file: salmon/quant/{sample}/cmd_info.json + description: "Informations sur l'ex\xE9cution" + edger: + edger: + - name: de_table + file: de_table.csv + description: "Tableau d'expression diff\xE9rentielle" + - name: RData + file: data.RData + description: Sauvegarde de la session R + - name: PCA + file: PCA_mqc.png + description: PCA plot + - name: Top_genes + file: Top_genes_mqc.tsv + description: "Tableau des top g\xE8nes" + - name: Heatmap + file: Heatmap_mqc.png + description: Heatmap + deseq2: + deseq2: + - name: de_table + file: de_table.csv + description: "Tableau d'expression diff\xE9rentielle" + - name: RData + file: data.RData + description: Sauvegarde de la session R + - name: PCA + file: PCA_mqc.png + description: PCA plot + - name: DE_Table + file: DE_Table_mqc.tsv + description: "Tableau d'expression diff\xE9rentielle avec p-value ajust\xE9\ + e < 0.1" + - name: MA_plot + file: MA_plot_mqc.png + description: MA-plot log2 fold changes (on the y-axis) versus the mean of normalized + counts (on the x-axis) + - name: Heatmap + file: Heatmap_mqc.png + description: Heatmap +multiqc: + trimmomatic: trimmomatic + 'null': custom + fastqc: fastqc + kallisto: kallisto + salmon: salmon + edger: custom + deseq2: custom diff --git a/files/scripts/deseq2.script.R b/files/scripts/deseq2.script.R new file mode 100644 index 0000000..bb507ba --- /dev/null +++ b/files/scripts/deseq2.script.R @@ -0,0 +1,132 @@ +#log <- file(snakemake@log[[1]], open="wt") +#sink(log) +#sink(log, type="message") +library(DESeq2) +library(edgeR) +library(apeglm) +library(BiocParallel) +library(tximport) +library(rhdf5) +library(GenomicFeatures) +library(ggplot2) + +register(MulticoreParam(snakemake@config[["deseq2_threads"]])) + +samples = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V1 +conditions = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2 +#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132") +#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5") +files = sort(snakemake@input[["counts"]]) +names(files) = samples + +if (snakemake@config[["deseq2_tx2gene"]] == TRUE){ + # conversion transcrits vers gènes + TxDb <- makeTxDbFromGFF(file = snakemake@config[["deseq2_annotations"]]) + k <- keys(TxDb, keytype = "TXNAME") + tx2gene <- select(TxDb, k, "GENEID", "TXNAME") + txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene) +} else { + txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE) +} + +designMat <- model.matrix(~conditions) +colnames(designMat)[2] = "condition" + +sampleTable <- data.frame(condition = factor(conditions)) +rownames(sampleTable) <- colnames(txi$counts) + +dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat) + +keep <- rowSums(counts(dds)) >= 10 +dds <- dds[keep,] + +dds$condition <- factor(dds$condition, levels = unique(conditions)) + +dds <- DESeq(dds, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test="Wald") + +res <- results(dds) + +save.image(snakemake@output[["r_data"]]) +write.csv(res, snakemake@output[["de_table"]]) + +###### PREPARE REPORT ###### + +cpm_counts = cpm(txi$counts) +pca = prcomp(t(cpm_counts)) + +png(filename = snakemake@output[["PCA"]], height=800, width=800) +ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point() +dev.off() + +options(digit=4) + +resSig <- subset(res, padj < 0.1) +resSigdf = data.frame(resSig@listData,row.names = resSig@rownames) + +#datatable(resSigdf,options = list(scrollX = '300px')) + +cat("# id: DE_Table +# section_name: 'DE Table' +# description: 'Tableau d'expression différentielle avec p-value ajustée < 0.1' +# format: 'tsv' +# plot_type: 'table'\ngene\t", file=snakemake@output[["DE_Table"]]) + +write.table(resSigdf,snakemake@output[["DE_Table"]],append=TRUE,sep="\t") + +png(filename = snakemake@output[["MA_plot"]], height=800, width=800) +plotMA(res, ylim=c(-2,2)) +dev.off() + +with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2))) +with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red")) +with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange")) +with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green")) + +cpmTop = cpm_counts[rownames(resSigdf),] + +# Heatmap +library(pheatmap) +select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20] +df <- as.data.frame(colData(dds)[,c("condition")]) +ntd <- normTransform(dds) +rownames(df) = samples +colnames(df) = "Conditions" +assayNTD = assay(ntd) + +dev.off() +png(filename = snakemake@output[["Heatmap"]], height=800, width=800) +pheatmap(assayNTD[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df) +#heatmap(cpmTop,main = "Heatmap",margins = c(10,1)) +dev.off() + +#resLFC <- lfcShrink(dds, coef="groups", type="apeglm") +#resOrdered <- res[order(res$pvalue),] + +#plotMA(res, ylim=c(-2,2)) +#plotMA(resLFC, ylim=c(-2,2)) +# +# ntd <- normTransform(dds) +# +# library("vsn") +# meanSdPlot(assay(ntd)) +# +# library("pheatmap") +# select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20] +# df <- as.data.frame(colData(dds)[,c("condition")]) +# pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df) +# +# +# sampleDists <- dist(t(assay(vsd))) +# +# library("RColorBrewer") +# sampleDistMatrix <- as.matrix(sampleDists) +# rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-") +# colnames(sampleDistMatrix) <- NULL +# colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +# pheatmap(sampleDistMatrix, +# clustering_distance_rows=sampleDists, +# clustering_distance_cols=sampleDists, +# col=colors) +# +# plotPCA(vsd, intgroup=c("condition")) +#sink(NULL, type = "message") diff --git a/files/scripts/edger.script.R b/files/scripts/edger.script.R new file mode 100644 index 0000000..26720b3 --- /dev/null +++ b/files/scripts/edger.script.R @@ -0,0 +1,111 @@ +#log <- file(snakemake@log[[1]], open="wt") +#sink(log) +#sink(log, type="message") +library(edgeR) +library(tximport) +library(rhdf5) +library(GenomicFeatures) +library(ggplot2) + +samples = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V1 +conditions = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2 +files = snakemake@input[["counts"]] +names(files) = samples + +if (snakemake@config[["edger_tx2gene"]] == TRUE){ + TxDb <- makeTxDbFromGFF(file = snakemake@config[["edger_annotations"]]) + k <- keys(TxDb, keytype = "TXNAME") + tx2gene <- select(TxDb, k, "GENEID", "TXNAME") + txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene) +} else { + txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE) +} + +cts <- txi$counts +dgelist <- DGEList(cts) + +#Filtering +countsPerMillion <- cpm(dgelist) +countCheck <- countsPerMillion > 1 +keep <- which(rowSums(countCheck) >= 2) +dgelist <- dgelist[keep,] + +dgelist <- calcNormFactors(dgelist, method=snakemake@config[["edger_normfact"]]) +#plotMDS(dgelist) + +#'grep' is a string matching function. +designMat <- model.matrix(~conditions) +colnames(designMat)[2] = "Conditions" +de.com <- c() +dgelist$samples$group = designMat[,2] + +if (snakemake@config[["edger_testType"]] == "exactTest"){ + if (snakemake@config[["edger_dispersion"]] == 0){ + dgelist <- edgeR::estimateDisp(dgelist, designMat) + de.com <- edgeR::exactTest(dgelist) + }else{ + de.com <- edgeR::exactTest(dgelist, dispersion=snakemake@config[["edger_dispersion"]]) + } +} else if (snakemake@config[["edger_testType"]] == "glmLRT"){ + if (snakemake@config[["edger_dispersion"]] == 0){ + dgelist <- edgeR::estimateDisp(dgelist, designMat) + fit <- edgeR::glmFit(dgelist, designMat) + } else { + fit <- edgeR::glmFit(dgelist, designMat, dispersion=snakemake@config[["edger_dispersion"]]) + } + de.com <- edgeR::glmLRT(fit,coef=2) +} + +options(digits=4) + +padj<- p.adjust(de.com$table$PValue, method="bonferroni") +res <-data.frame(cbind(de.com$table$logFC/log(2),de.com$table$PValue, padj)) +colnames(res) <- c("log2FoldChange", "pvalue", "padj") +rownames(res) <- names(keep) + +rm(txi,tx2gene) + +save.image(snakemake@output[["r_data"]]) +write.csv(res, snakemake@output[["de_table"]]) + +###### PREPARE REPORT ###### + +cpm_counts = cpm(cts) +pca = prcomp(t(cpm_counts)) + +png(filename = paste(snakemake@output[["PCA"]],sep = "/"), height=800, width=800) +ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point() +dev.off() + +#plotMDS(dgelist, main = "Distances between gene expression profiles") +#plotSmear(lrt, de.tags=tt$table$gene.id) + + +topGenes = topTags(de.com,n = 40) + +cat("# id: Top_genes +# section_name: 'Top genes' +# description: 'Tableau des top genes' +# format: 'tsv' +# plot_type: 'table'\ngene\t", file=snakemake@output[["Top_genes"]]) + +write.table(as.data.frame(topGenes),snakemake@output[["Top_genes"]],append=TRUE,sep="\t") + +with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2))) +with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red")) +with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange")) +with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green")) + +cpmTop = cpm_counts[rownames(topGenes$table),] + +library(pheatmap) +df <- as.data.frame(conditions) +rownames(df) = samples +colnames(df) = "Conditions" + +dev.off() +png(filename = paste(snakemake@output[["Heatmap"]],sep = "/"), height=800, width=800) +pheatmap(cpmTop, cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df) +#heatmap(cpmTop) +dev.off() +#plot_ly(x = colnames(cpmTop),y = rownames(cpmTop),z=cpmTop,type = "heatmap") \ No newline at end of file diff --git a/files/singularity.def b/files/singularity.def new file mode 100644 index 0000000..b5c1338 --- /dev/null +++ b/files/singularity.def @@ -0,0 +1,122 @@ +Bootstrap: localimage +From: ../base.sif + +%environment + export PATH=/opt/biotools/bin:$PATH + export ROOTSYS=/opt/biotools/root + export LD_LIBRARY_PATH='$LD_LIBRARY_PATH:$ROOTSYS/lib' + + + +%labels + Author YourName + Version v0.0.1 + build_date 2018 déc. 07 + +%runscript +echo "This container contains two apps (UI and Snakemake)." +echo "UI is a user interface to set up the workflow and launch it." +echo "Snakemake let you provide your configfile and other parameters to the snakemake command and launch it." +echo "To get help for an app :\nsingularity help --app appName this_container.sif" +echo "To run an app :\nsingularity run --app appName this_container.sif" + + +%apprun UI + exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)" + +%apphelp UI +To run the UI app you should bind data and results directories like in the following example. +You must also provide the host address and port where the shiny app will be launched +exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234 + + +%apprun Snakemake + configfile=$1 + cores=$2 + shift + shift + exec snakemake -s /workflow/Snakefile all --configfile $configfile --cores $cores $@ + +%apphelp Snakemake +To run the Snakemake app you should bind data and results directories like in the following example. +You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two) +exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams + + +%apprun getConfigfile + exec cp /workflow/params.total.yml ./params.yml + +%apphelp getConfigfile +To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk. +exemple : singularity run --app getConfigfile this_container.sif + + +%apprun getSamples + exec python3 /workflow/get_samples.py $1 $2 + +%apphelp getSamples +To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix. +exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE + + +%help +This container contains four apps (UI, Snakemake, getConfigfile and getSamples). +* UI is a user interface to set up the workflow and launch it. +* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it. +* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app. +* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file). +To get help for an app : +singularity help --app appName this_container.sif +To run an app : +singularity run --app appName this_container.sif + + +%files + ./files /workflow + ./sagApp /sagApp + + +%post + mkdir /Data + mkdir /Results + apt-get update -y + + cd /opt/biotools + wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.38.zip + unzip Trimmomatic-0.38.zip + echo -e '#!/bin/bash java -jar /opt/biotools/Trimmomatic-0.38/trimmomatic-0.38.jar' > bin/trimmomatic + chmod 777 bin/trimmomatic + rm Trimmomatic-0.38.zip + + apt install -y fastqc=0.11.5+dfsg-6 + + cd /opt/biotools + wget https://github.com/pachterlab/kallisto/releases/download/v0.45.0/kallisto_linux-v0.45.0.tar.gz + tar -zxvf kallisto_linux-v0.45.0.tar.gz + mv kallisto_linux-v0.45.0/kallisto bin + rm -r kallisto_linux-v0.45.0.tar.gz kallisto_linux-v0.45.0 + + cd /opt/biotools + wget https://github.com/COMBINE-lab/salmon/releases/download/v1.0.0/salmon-1.0.0_linux_x86_64.tar.gz + tar -zxvf salmon-1.0.0_linux_x86_64.tar.gz + cd bin && ln -s ../salmon-latest_linux_x86_64/bin/salmon salmon + cd .. && rm -r salmon-1.0.0_linux_x86_64.tar.gz salmon-latest_linux_x86_64/sample_data.tgz + + Rscript -e 'BiocManager::install("limma", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("edgeR", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("tximport", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("rhdf5", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("GenomicFeatures", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("DESeq2", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("apeglm", version = "3.8",Ncpus=8, clean=TRUE)' + + Rscript -e 'BiocManager::install("BiocParallel", version = "3.8",Ncpus=8, clean=TRUE)' + diff --git a/files/tools.py b/files/tools.py new file mode 100644 index 0000000..e868a97 --- /dev/null +++ b/files/tools.py @@ -0,0 +1,26 @@ +import oyaml as yaml +import shutil + +def read_yaml(filepath): + try: + with open(filepath, 'r') as file: + data = yaml.load(file) + return data + except IOError as e: + print("Error in file opening:", e) + except yaml.YAMLError as exc: + print("Error in yaml loading:", exc) + +def write_yaml(filepath,data): + try: + with open(filepath, 'w') as file: + yaml.dump(data, file, default_flow_style=False) + except IOError as e: + print("Error in file opening:", e) + +def copy_dir(src,dst): + try: + shutil.copytree(src,dst) + except FileExistsError: + shutil.rmtree(dst, ignore_errors=True) + shutil.copytree(src,dst) \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..d60ff7d --- /dev/null +++ b/install.sh @@ -0,0 +1,2 @@ +##### nginx install ##### +sudo apt-get install -y nginx diff --git a/sagApp/R/chooser.R b/sagApp/R/chooser.R new file mode 100644 index 0000000..b98c522 --- /dev/null +++ b/sagApp/R/chooser.R @@ -0,0 +1,40 @@ +chooserInput <- function(inputId, leftLabel, rightLabel, leftChoices, rightChoices, + size = 5, multiple = FALSE) { + + leftChoices <- lapply(leftChoices, tags$option) + rightChoices <- lapply(rightChoices, tags$option) + + if (multiple) + multiple <- "multiple" + else + multiple <- NULL + + tagList( + singleton(tags$head( + tags$script(src="chooser-binding.js"), + tags$style(type="text/css", + HTML(".chooser-container { display: inline-block; }") + ) + )), + div(id=inputId, class="chooser", + div(class="chooser-container chooser-left-container", + tags$select(class="left", size=size, multiple=multiple, leftChoices) + ), + div(class="chooser-container chooser-center-container", + icon("arrow-circle-o-right", "right-arrow fa-3x text-primary"), + tags$br(), + icon("arrow-circle-o-left", "left-arrow fa-3x text-primary") + ), + div(class="chooser-container chooser-right-container", + tags$select(class="right", size=size, multiple=multiple, rightChoices) + ) + ) + ) +} + +registerInputHandler("shinyjsexamples.chooser", function(data, ...) { + if (is.null(data)) + NULL + else + list(left=as.character(data$left), right=as.character(data$right)) +}, force = TRUE) \ No newline at end of file diff --git a/sagApp/R/menugauche.R b/sagApp/R/menugauche.R new file mode 100644 index 0000000..df65ab3 --- /dev/null +++ b/sagApp/R/menugauche.R @@ -0,0 +1,41 @@ +MenuGauche = sidebarMenu(id="sidebarmenu", + + menuItem("Global parameters", tabName="global_params", icon=icon("pencil", lib="font-awesome"), newtab=FALSE), + + menuItem("Trimming", tabName="trimming", icon=icon("pencil", lib="font-awesome"), newtab=FALSE), + + menuItem("Quality check", tabName="quality_check", icon=icon("pencil", lib="font-awesome"), newtab=FALSE), + + menuItem("Quantification", tabName="quantification", icon=icon("pencil", lib="font-awesome"), newtab=FALSE), + + menuItem("Differential expression", tabName="DE", icon=icon("pencil", lib="font-awesome"), newtab=FALSE), + + menuItem("Draw workflow graph", tabName="RULEGRAPH", icon=icon("gear", lib="font-awesome"), newtab=FALSE), + + tags$br(), + + downloadButton("DownloadParams", "Download config file", class="btn btn-light", style="color:black;margin: 6px 5px 6px 15px;"), + + tags$br(), + + tags$br(), + + numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16), +selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('No'='none','Trimming'='trimming','Quality check'='quality_check','Quantification'='quantification','Differential expression'='DE')), tags$br(), + actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"), + + actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"), + tags$br(), + tags$br(), + menuItem("Running Workflow output", tabName="run_out", icon=icon("terminal", lib="font-awesome"), newtab=FALSE), + menuItem("Final report", tabName="Report", icon=icon("file", lib="font-awesome"), newtab=FALSE), + + tags$br(), + actionButton("close_session", "Close session", icon("times"), class="btn btn-primary"), + tags$br(),tags$br(), + + menuItem("Powered by mbb", href="http://mbb.univ-montp2.fr/MBB/index.php", newtab=TRUE, icon=icon("book", lib="font-awesome"), selected=NULL) + +) + + diff --git a/sagApp/app.R b/sagApp/app.R new file mode 100644 index 0000000..7cc2470 --- /dev/null +++ b/sagApp/app.R @@ -0,0 +1,238 @@ +#@author jimmy.lopez@univ-montp2.fr + + + +library(shiny) +library(shinydashboard) +library(shinyjs) +library(yaml) +library(stringr) +library(shinyFiles) +library(tools) +library(DT) + + +source("./R/chooser.R", local=T) + +source("./pages/pages_def_global_params.R", local=T) +source("./pages/pages_def_trimming.R", local=T) +source("./pages/pages_def_quality_check.R", local=T) +source("./pages/pages_def_quantification.R", local=T) +source("./pages/pages_def_de.R", local=T) +tabRULEGRAPH = fluidPage(box(title = 'Rule Graph :', width = 12, status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('RULEGRAPH_svg'),actionButton('refresh_rg', 'Refresh', icon('sync'), class='btn btn-info'))) +tabReport = fluidPage(box(title = 'Report :', width = 12, status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('report_html'))) +tabRUN = fluidPage(box(title = 'Run :', width = 12 , status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('run_out',style = 'overflow-y: scroll; height: 600px')),actionButton("unlock", "Unlock the directory in case of previous failure"), checkboxInput("rerun_incomplete", "Check in case of incomplete jobs to rerun", value = FALSE, width = NULL)) +source("./R/menugauche.R", local=T) + + + +style <- tags$style(HTML(readLines("www/added_styles.css"))) + +UI <- dashboardPage( + + skin="blue", + + dashboardHeader(title="RNAseq", titleWidth=230), + + dashboardSidebar(width=230, MenuGauche), + + dashboardBody( + + shinyjs::useShinyjs(), + + tags$head(tags$link(rel="stylesheet", type="text/css", href="bootstrap.min.readable.css")), + +tags$head(style), + + tabItems( + + tabItem(tabName = "global_params", tabglobal_params), + + tabItem(tabName = "trimming", tabtrimming), + + tabItem(tabName = "quality_check", tabquality_check), + + tabItem(tabName = "quantification", tabquantification), + + tabItem(tabName = "DE", tabDE) + + ,tabItem(tabName = "RULEGRAPH", tabRULEGRAPH) + ,tabItem(tabName = "Report", tabReport) + ,tabItem(tabName = "run_out", tabRUN) + ) + +) + +) + +reload = function(dossierAnalyse,session,output){ + # if params exists reload them + if (file.exists(paste0(dossierAnalyse,"/params.yml"))){ + params = read_yaml(paste0(dossierAnalyse,"/params.yml")) + for (param in names(params$params_info)){ + if (params$params_info[[param]]$type == "text" || params$params_info[[param]]$type == "input_dir" || params$params_info[[param]]$type == "output_dir"){ + updateTextInput(session, param, value = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "textArea"){ + updateTextAreaInput(session, paste0(param,"_server"), value = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "input_file" && params[["params"]][[paste0(param,"_select")]] == "server"){ + updateTextInput(session, paste0(param,"_server"), value = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "numeric"){ + updateNumericInput(session, param, value = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "radio"){ + updateRadioButtons(session, param, selected = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "select"){ + updateSelectInput(session, param, selected = params[["params"]][[param]]) + } + if (params$params_info[[param]]$type == "checkbox"){ + updateCheckboxInput(session, param, value = params[["params"]][[param]]) + } + } + for (step in params$steps){ + updateSelectInput(session, paste0("select",step$name), selected = params[["params"]][[step$name]]) + } + } + # if rulegraph show it + rulegraph = list.files(path=dossierAnalyse,pattern="rulegraph[[:digit:]]+.svg") + if (length(rulegraph) == 1){ + addResourcePath("results", dossierAnalyse) + output$RULEGRAPH_svg = renderUI(tagList(img(src = paste0("results/",rulegraph) ,alt = "Rulegraph of Snakemake jobs",style="max-width: 100%;height: auto;display: block;margin: auto"))) + } + else{ + output$RULEGRAPH_svg = renderUI(tagList(h3("No rule graph found, press the rule graph button to generate one."))) + } + # if report show it + if (file.exists(paste0(dossierAnalyse,"/multiqc_report.html"))){ + addResourcePath("results", dossierAnalyse) + output$report_html = renderUI(tags$iframe(src=paste0("results/multiqc_report.html"),width="100%", height="900px")) + } + else{ + output$report_html = renderUI(tags$h3("No report found, run the pipeline to produce a report")) + } +} + + +server <- function( input, output, session) { + + rv <- reactiveValues(textstream = c(""), running = FALSE, timer = reactiveTimer(1000)) + +observeEvent(input$results_dir,{ + if (dir.exists(input$results_dir)){ + reload(input$results_dir,session,output) + shinyjs::disable("results_dir") + if (file.exists(paste0(input$results_dir,"/logs/workflow.running"))){ + rv$running = TRUE + } + } + else{ + output$RULEGRAPH_svg = renderUI(tagList(h3("No rule graph found, press the rule graph button to generate one."))) + output$report_html = renderUI(tags$h3("No report found, run the pipeline to produce a report")) + } +}) + observe({ + rv$timer() + if (isolate(rv$running)){ + if (file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){ + rv$textstream <- paste(readLines(paste0(input$results_dir,"/logs/runlog.txt"),warn=F), collapse = "<br/>") + } + else{ + if (!dir.exists(paste0(input$results_dir,"/logs"))){ + dir.create(paste0(input$results_dir,"/logs")) + } + file.create(paste0(input$results_dir,"/logs/runlog.txt")) + } + if (file.exists(paste0(input$results_dir,"/logs/workflow_end.ok"))){ + isolate({rv$running = F}) + toggle_inputs(reactiveValuesToList(input),T,F) + addResourcePath("results", input$results_dir) + outUI = tags$iframe(src=paste0("results/multiqc_report.html"),width="100%", height="900px") + output$report_html = renderUI(outUI) + file.remove(paste0(input$results_dir,"/logs/workflow_end.ok")) + file.remove(paste0(input$results_dir,"/logs/workflow.running")) + } + else if (file.exists(paste0(input$results_dir,"/logs/workflow_end.error"))){ + isolate({rv$running = F}) + toggle_inputs(reactiveValuesToList(input),T,F) + output$report_html = renderUI(HTML(paste(readLines(paste0(input$results_dir,"/logs/workflow_end.error"),warn=F), collapse = "<br/>"))) + file.remove(paste0(input$results_dir,"/logs/workflow_end.error")) + file.remove(paste0(input$results_dir,"/logs/workflow.running")) + } + } + }) + output$run_out <- renderUI({ + HTML(rv$textstream) + }) + +toggle_inputs <- function(input_list,enable_inputs=T,only_buttons=FALSE) + { + # Subset if only_buttons is TRUE. + if(only_buttons){ + buttons <- which(sapply(input_list,function(x) {any(grepl("Button",attr(x,"class")))})) + input_list = input_list[buttons] + } + + # Toggle elements + for(x in setdiff(names(input_list),"results_dir")){ + if(enable_inputs){ + shinyjs::enable(x)} else { + shinyjs::disable(x) } + } + shinyjs::enable("unlock") + shinyjs::enable("StopPipeline") + shinyjs::enable("close_session") + } + observeEvent(input$SeOrPe,{ + input_list <- reactiveValuesToList(input) + SE <- which(sapply(names(input_list),function(x) {any(grepl("_SE$",x))})) + PE <- which(sapply(names(input_list),function(x) {any(grepl("_PE$",x))})) + for (element in SE){ + if (input$SeOrPe == "PE") + shinyjs::hide(names(input_list)[element]) + else{ + shinyjs::show(names(input_list)[element]) + } + } + for (element in PE){ + if (input$SeOrPe == "SE") + shinyjs::hide(names(input_list)[element]) + else{ + shinyjs::show(names(input_list)[element]) + } + } + }) +observeEvent(input$StopPipeline,{ + system("pkill -f snakemake") + if (file.exists(paste0(input$results_dir,"/logs/workflow.running"))){ + file.remove(paste0(input$results_dir,"/logs/workflow.running")) + } + }) +observeEvent(input$close_session,{ + session$close(); + }) +observeEvent(input$unlock,{ + system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " --forcerun all -d ", input$results_dir , " --cores ", input$cores, " all --unlock"),wait = TRUE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt")); + if (file.exists(paste0(input$results_dir,"/logs/workflow.running"))){ + file.remove(paste0(input$results_dir,"/logs/workflow.running")) + } + input_list <- reactiveValuesToList(input) + toggle_inputs(input_list,T,F) + }) +output$DownloadParams <- downloadHandler( + filename = function() { + paste0("params", Sys.Date(), ".yaml", sep="") + }, + content = function(file) { + save_params(file) + }) +source("./server/opt_global.R", local=T) + + +} + + + +shinyApp(ui = UI, server = server) diff --git a/sagApp/pages/pages_def_de.R b/sagApp/pages/pages_def_de.R new file mode 100644 index 0000000..498cb75 --- /dev/null +++ b/sagApp/pages/pages_def_de.R @@ -0,0 +1,77 @@ +tabDE = fluidPage( + +box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE, + + selectInput("selectDE", label = "Select the tool to use : ", selected = "edger", choices = list("edger" = "edger", "deseq2" = "deseq2")), + +conditionalPanel(condition = "input.selectDE == 'edger'",box(title = "edgeR", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + numericInput("edger_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + checkboxInput("edger_tx2gene", label = "Aggregate transcripts counts to gene counts : ", value = FALSE), + + box(title = "Annotation file (gtf or gff) : ", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + selectInput("edger_annotations_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")), + conditionalPanel(condition = "input.edger_annotations_select == 'server'", + tags$label("Annotation file (gtf or gff) : "), + fluidRow( + column(4,shinyFilesButton("shinyfiles_edger_annotations",label="Please select a file", title="Annotation file (gtf or gff) : ", multiple=FALSE)), + column(8,textInput("edger_annotations_server",label=NULL,value="")) + ) + ), + conditionalPanel(condition = "input.edger_annotations_select == 'local'", + fileInput("edger_annotations_local",label = "Annotation file (gtf or gff) : ") + ) + ) +, + + radioButtons("edger_normfact", label = "Calculate normalization factors to scale the raw library sizes.", choices = list("TMM" = "TMM", "RLE" = "RLE", "upperquartile" = "upperquartile", "none" = "none"), selected = "TMM", width = "auto"), + + textInput("edger_dispersion", label = "Dispersion: either a numeric vector of dispersions or a character string indicating that dispersions should be taken from the data.", value = "0", width = "auto"), + + radioButtons("edger_testType", label = "Test type: exactTest: Computes p-values for differential abundance for each gene between two samples, conditioning on the total count for each gene. The counts in each group are assumed to follow a binomial distribution +glmLRT: Fits a negative binomial generalized log-linear model to the read counts for each gene and conducts genewise statistical tests.", choices = list("exactTest" = "exactTest", "glmLRT" = "glmLRT"), selected = "exactTest", width = "auto"), + + p("edgeR: Empirical Analysis of Digital Gene Expression Data."), + + p("Website : ",a(href="http://bioconductor.org/packages/release/bioc/html/edgeR.html","http://bioconductor.org/packages/release/bioc/html/edgeR.html",target="_blank")), + + p("Documentation : ",a(href="http://bioconductor.org/packages/release/bioc/manuals/edgeR/man/edgeR.pdf","http://bioconductor.org/packages/release/bioc/manuals/edgeR/man/edgeR.pdf",target="_blank")), + + p("Paper : ",a(href="https://doi.org/10.18129/B9.bioc.edgeR","https://doi.org/10.18129/B9.bioc.edgeR",target="_blank")) + + )), +conditionalPanel(condition = "input.selectDE == 'deseq2'",box(title = "DESeq2", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + numericInput("deseq2_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + checkboxInput("deseq2_tx2gene", label = "Aggregate transcripts counts to gene counts : ", value = FALSE), + + box(title = "Annotation file (gtf or gff) : ", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + selectInput("deseq2_annotations_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")), + conditionalPanel(condition = "input.deseq2_annotations_select == 'server'", + tags$label("Annotation file (gtf or gff) : "), + fluidRow( + column(4,shinyFilesButton("shinyfiles_deseq2_annotations",label="Please select a file", title="Annotation file (gtf or gff) : ", multiple=FALSE)), + column(8,textInput("deseq2_annotations_server",label=NULL,value="")) + ) + ), + conditionalPanel(condition = "input.deseq2_annotations_select == 'local'", + fileInput("deseq2_annotations_local",label = "Annotation file (gtf or gff) : ") + ) + ) +, + + radioButtons("deseq2_fitType", label = "Type of fitting of dispersions to the mean intensity", choices = list("parametric" = "parametric", "local" = "local", "mean" = "mean"), selected = "parametric", width = "auto"), + + checkboxInput("deseq2_betaPrior", label = "betaPrior: whether or not to put a zero-mean normal prior on the non-intercept coefficients.", value = FALSE), + + p("DESeq2: Differential gene expression analysis based on the negative binomial distribution. Using normalized count data."), + + p("Website : ",a(href="https://bioconductor.org/packages/release/bioc/html/DESeq2.html","https://bioconductor.org/packages/release/bioc/html/DESeq2.html",target="_blank")), + + p("Documentation : ",a(href="https://bioconductor.org/packages/release/bioc/manuals/DESeq2/man/DESeq2.pdf","https://bioconductor.org/packages/release/bioc/manuals/DESeq2/man/DESeq2.pdf",target="_blank")), + + p("Paper : ",a(href="https://doi.org/10.1186/s13059-014-0550-8","https://doi.org/10.1186/s13059-014-0550-8",target="_blank")) + + )))) + + diff --git a/sagApp/pages/pages_def_global_params.R b/sagApp/pages/pages_def_global_params.R new file mode 100644 index 0000000..122582a --- /dev/null +++ b/sagApp/pages/pages_def_global_params.R @@ -0,0 +1,41 @@ +tabglobal_params = fluidPage( + +box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE, + + hidden(textInput("selectglobal_params", label = "", value="global_params")),box(title = "Global parameters :", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + tags$label("Results directory: "), + fluidRow( + column(4,shinyDirButton("shinydir_results_dir",label="Please select a directory", title="Results directory: ")), + column(8,textInput("results_dir",label=NULL,value="")) + ) +, + + tags$label("Data directory: "), + fluidRow( + column(4,shinyDirButton("shinydir_sample_dir",label="Please select a directory", title="Data directory: ")), + column(8,textInput("sample_dir",label=NULL,value="")) + ) +, + + box(title = "Group file (TSV with col1(sample name), col2 (group)): ", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + selectInput("group_file_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")), + conditionalPanel(condition = "input.group_file_select == 'server'", + tags$label("Group file (TSV with col1(sample name), col2 (group)): "), + fluidRow( + column(4,shinyFilesButton("shinyfiles_group_file",label="Please select a file", title="Group file (TSV with col1(sample name), col2 (group)): ", multiple=FALSE)), + column(8,textInput("group_file_server",label=NULL,value="")) + ) + ), + conditionalPanel(condition = "input.group_file_select == 'local'", + fileInput("group_file_local",label = "Group file (TSV with col1(sample name), col2 (group)): ") + ) + ) +, + + radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "PE", width = "auto"), + + textAreaInput("memo", label = "Text area for the user", value = "") + + ))) + + diff --git a/sagApp/pages/pages_def_quality_check.R b/sagApp/pages/pages_def_quality_check.R new file mode 100644 index 0000000..117c5b0 --- /dev/null +++ b/sagApp/pages/pages_def_quality_check.R @@ -0,0 +1,22 @@ +tabquality_check = fluidPage( + +box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE, + + selectInput("selectquality_check", label = "Select the tool to use : ", selected = "fastqc", choices = list("fastqc" = "fastqc", "null" = "null")), + +conditionalPanel(condition = "input.selectquality_check == 'fastqc'",box(title = "FastQC", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + numericInput("fastqc_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + p("FastQC: A quality control tool for high throughput raw sequence data."), + + p("Website : ",a(href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/","https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",target="_blank")), + + p("Documentation : ",a(href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/","http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/",target="_blank")) + + )), +conditionalPanel(condition = "input.selectquality_check == 'null'",box(title = "null", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + p("null: Skip this step") + + )))) + + diff --git a/sagApp/pages/pages_def_quantification.R b/sagApp/pages/pages_def_quantification.R new file mode 100644 index 0000000..2b40851 --- /dev/null +++ b/sagApp/pages/pages_def_quantification.R @@ -0,0 +1,70 @@ +tabquantification = fluidPage( + +box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE, + + selectInput("selectquantification", label = "Select the tool to use : ", selected = "kallisto", choices = list("kallisto" = "kallisto", "salmon" = "salmon")), + +conditionalPanel(condition = "input.selectquantification == 'kallisto'",box(title = "kallisto", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + box(title = "Reference fasta file", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + selectInput("kallisto_index_input_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")), + conditionalPanel(condition = "input.kallisto_index_input_select == 'server'", + tags$label("Reference fasta file"), + fluidRow( + column(4,shinyFilesButton("shinyfiles_kallisto_index_input",label="Please select a file", title="Reference fasta file", multiple=FALSE)), + column(8,textInput("kallisto_index_input_server",label=NULL,value="")) + ) + ), + conditionalPanel(condition = "input.kallisto_index_input_select == 'local'", + fileInput("kallisto_index_input_local",label = "Reference fasta file") + ) + ) +, + + numericInput("kallisto_quant_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + numericInput("kallisto_quant_fragment_length_SE", label = "Estimated average fragment length", min = 1, max = NA, step = 1, width = "auto", value = 300), + + numericInput("kallisto_quant_standard_deviation_SE", label = "Estimated standard deviation of fragment length", min = 1, max = NA, step = 1, width = "auto", value = 2), + + radioButtons("kallisto_quant_stranded_PE", label = "For strand specific mode choose --fr-stranded if the first read is forward and choose --rf-stranded if the first read is reverse", choices = list("Not stranded" = "", "Forward Reverse" = "--fr-stranded", "Reverse Forward" = "--rf-stranded"), selected = "", width = "auto"), + + p("kallisto: kallisto is a program for quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads."), + + p("Website : ",a(href="https://pachterlab.github.io/kallisto/","https://pachterlab.github.io/kallisto/",target="_blank")), + + p("Documentation : ",a(href="https://pachterlab.github.io/kallisto/manual","https://pachterlab.github.io/kallisto/manual",target="_blank")), + + p("Paper : ",a(href="https://doi.org/10.1038/nbt.3519","https://doi.org/10.1038/nbt.3519",target="_blank")) + + )), +conditionalPanel(condition = "input.selectquantification == 'salmon'",box(title = "Salmon", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + box(title = "Reference fasta file", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + selectInput("salmon_index_input_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")), + conditionalPanel(condition = "input.salmon_index_input_select == 'server'", + tags$label("Reference fasta file"), + fluidRow( + column(4,shinyFilesButton("shinyfiles_salmon_index_input",label="Please select a file", title="Reference fasta file", multiple=FALSE)), + column(8,textInput("salmon_index_input_server",label=NULL,value="")) + ) + ), + conditionalPanel(condition = "input.salmon_index_input_select == 'local'", + fileInput("salmon_index_input_local",label = "Reference fasta file") + ) + ) +, + + numericInput("salmon_index_threads", label = "Number of threads to use for index creation", min = 1, max = NA, step = 1, width = "auto", value = 4), + + numericInput("salmon_quant_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + p("Salmon: Salmon is a tool for quantifying the expression of transcripts using RNA-seq data. "), + + p("Website : ",a(href="https://combine-lab.github.io/salmon/","https://combine-lab.github.io/salmon/",target="_blank")), + + p("Documentation : ",a(href="https://salmon.readthedocs.io/en/latest/","https://salmon.readthedocs.io/en/latest/",target="_blank")), + + p("Paper : ",a(href="https://doi.org/10.1038/nmeth.4197","https://doi.org/10.1038/nmeth.4197",target="_blank")) + + )))) + + diff --git a/sagApp/pages/pages_def_trimming.R b/sagApp/pages/pages_def_trimming.R new file mode 100644 index 0000000..5778a98 --- /dev/null +++ b/sagApp/pages/pages_def_trimming.R @@ -0,0 +1,26 @@ +tabtrimming = fluidPage( + +box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE, + + selectInput("selecttrimming", label = "Select the tool to use : ", selected = "null", choices = list("trimmomatic" = "trimmomatic", "null" = "null")), + +conditionalPanel(condition = "input.selecttrimming == 'trimmomatic'",box(title = "Trimmomatic", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + numericInput("trimmomatic_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4), + + radioButtons("trimmomatic_qc_score", label = "Quality score encoding", choices = list("phred33" = "-phred33", "phred64" = "-phred64"), selected = "-phred64", width = "auto"), + + p("Trimmomatic: A flexible read trimming tool for Illumina NGS data"), + + p("Website : ",a(href="http://www.usadellab.org/cms/?page=trimmomatic","http://www.usadellab.org/cms/?page=trimmomatic",target="_blank")), + + p("Documentation : ",a(href="http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf","http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf",target="_blank")), + + p("Paper : ",a(href="https://doi.org/10.1093/bioinformatics/btu170","https://doi.org/10.1093/bioinformatics/btu170",target="_blank")) + + )), +conditionalPanel(condition = "input.selecttrimming == 'null'",box(title = "null", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE, + p("null: Skip this step") + + )))) + + diff --git a/sagApp/sagApp.Rproj b/sagApp/sagApp.Rproj new file mode 100644 index 0000000..7c00b7f --- /dev/null +++ b/sagApp/sagApp.Rproj @@ -0,0 +1,15 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + + diff --git a/sagApp/server/opt_global.R b/sagApp/server/opt_global.R new file mode 100644 index 0000000..286e7a9 --- /dev/null +++ b/sagApp/server/opt_global.R @@ -0,0 +1,315 @@ +save_params <- function(path_param){ + res = "" # Page : global_params + res = paste0(res , paste("global_params:", paste0('"', input$selectglobal_params, '"'), "\n", sep = " ")) + if(!is.na(as.numeric(input$results_dir))) { + res = paste0(res, paste("results_dir:", input$results_dir, "\n", sep = " ")) + } else { + res = paste0(res, paste("results_dir:", paste0('"', input$results_dir, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$sample_dir))) { + res = paste0(res, paste("sample_dir:", input$sample_dir, "\n", sep = " ")) + } else { + res = paste0(res, paste("sample_dir:", paste0('"', input$sample_dir, '"'), "\n", sep = " ")) + } + + res = paste0(res, paste("group_file_select:", paste0('"', input$group_file_select, '"'), "\n", sep = " ")) + if(input$group_file_select == "server") { + res = paste0(res, paste("group_file:", paste0('"', input$group_file_server, '"'), "\n", sep = " ")) + } + if(input$group_file_select == "local"){ + res = paste0(res, paste("group_file:", paste0('"', input$group_file_local$datapath, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$SeOrPe))) { + res = paste0(res, paste("SeOrPe:", input$SeOrPe, "\n", sep = " ")) + } else { + res = paste0(res, paste("SeOrPe:", paste0('"', input$SeOrPe, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$memo))) { + res = paste0(res, paste("memo:", input$memo, "\n", sep = " ")) + } else { + res = paste0(res, paste("memo:", paste0('"', input$memo, '"'), "\n", sep = " ")) + } + + # Page : trimming + res = paste0(res , paste("trimming:", paste0('"', input$selecttrimming, '"'), "\n", sep = " ")) + if(!is.na(as.numeric(input$trimmomatic_threads))) { + res = paste0(res, paste("trimmomatic_threads:", input$trimmomatic_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("trimmomatic_threads:", paste0('"', input$trimmomatic_threads, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$trimmomatic_qc_score))) { + res = paste0(res, paste("trimmomatic_qc_score:", input$trimmomatic_qc_score, "\n", sep = " ")) + } else { + res = paste0(res, paste("trimmomatic_qc_score:", paste0('"', input$trimmomatic_qc_score, '"'), "\n", sep = " ")) + } + + # Page : quality_check + res = paste0(res , paste("quality_check:", paste0('"', input$selectquality_check, '"'), "\n", sep = " ")) + if(!is.na(as.numeric(input$fastqc_threads))) { + res = paste0(res, paste("fastqc_threads:", input$fastqc_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("fastqc_threads:", paste0('"', input$fastqc_threads, '"'), "\n", sep = " ")) + } + + # Page : quantification + res = paste0(res , paste("quantification:", paste0('"', input$selectquantification, '"'), "\n", sep = " ")) + res = paste0(res, paste("kallisto_index_input_select:", paste0('"', input$kallisto_index_input_select, '"'), "\n", sep = " ")) + if(input$kallisto_index_input_select == "server") { + res = paste0(res, paste("kallisto_index_input:", paste0('"', input$kallisto_index_input_server, '"'), "\n", sep = " ")) + } + if(input$kallisto_index_input_select == "local"){ + res = paste0(res, paste("kallisto_index_input:", paste0('"', input$kallisto_index_input_local$datapath, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$kallisto_quant_threads))) { + res = paste0(res, paste("kallisto_quant_threads:", input$kallisto_quant_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("kallisto_quant_threads:", paste0('"', input$kallisto_quant_threads, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$kallisto_quant_fragment_length_SE))) { + res = paste0(res, paste("kallisto_quant_fragment_length_SE:", input$kallisto_quant_fragment_length_SE, "\n", sep = " ")) + } else { + res = paste0(res, paste("kallisto_quant_fragment_length_SE:", paste0('"', input$kallisto_quant_fragment_length_SE, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$kallisto_quant_standard_deviation_SE))) { + res = paste0(res, paste("kallisto_quant_standard_deviation_SE:", input$kallisto_quant_standard_deviation_SE, "\n", sep = " ")) + } else { + res = paste0(res, paste("kallisto_quant_standard_deviation_SE:", paste0('"', input$kallisto_quant_standard_deviation_SE, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$kallisto_quant_stranded_PE))) { + res = paste0(res, paste("kallisto_quant_stranded_PE:", input$kallisto_quant_stranded_PE, "\n", sep = " ")) + } else { + res = paste0(res, paste("kallisto_quant_stranded_PE:", paste0('"', input$kallisto_quant_stranded_PE, '"'), "\n", sep = " ")) + } + + res = paste0(res, paste("salmon_index_input_select:", paste0('"', input$salmon_index_input_select, '"'), "\n", sep = " ")) + if(input$salmon_index_input_select == "server") { + res = paste0(res, paste("salmon_index_input:", paste0('"', input$salmon_index_input_server, '"'), "\n", sep = " ")) + } + if(input$salmon_index_input_select == "local"){ + res = paste0(res, paste("salmon_index_input:", paste0('"', input$salmon_index_input_local$datapath, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$salmon_index_threads))) { + res = paste0(res, paste("salmon_index_threads:", input$salmon_index_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("salmon_index_threads:", paste0('"', input$salmon_index_threads, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$salmon_quant_threads))) { + res = paste0(res, paste("salmon_quant_threads:", input$salmon_quant_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("salmon_quant_threads:", paste0('"', input$salmon_quant_threads, '"'), "\n", sep = " ")) + } + + # Page : DE + res = paste0(res , paste("DE:", paste0('"', input$selectDE, '"'), "\n", sep = " ")) + if(!is.na(as.numeric(input$edger_threads))) { + res = paste0(res, paste("edger_threads:", input$edger_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("edger_threads:", paste0('"', input$edger_threads, '"'), "\n", sep = " ")) + } + + if(input$edger_tx2gene) { + res = paste0(res, paste("edger_tx2gene:", "true", "\n", sep = " ")) + } else { + res = paste0(res, paste("edger_tx2gene:", "false", "\n", sep = " ")) + } + + res = paste0(res, paste("edger_annotations_select:", paste0('"', input$edger_annotations_select, '"'), "\n", sep = " ")) + if(input$edger_annotations_select == "server") { + res = paste0(res, paste("edger_annotations:", paste0('"', input$edger_annotations_server, '"'), "\n", sep = " ")) + } + if(input$edger_annotations_select == "local"){ + res = paste0(res, paste("edger_annotations:", paste0('"', input$edger_annotations_local$datapath, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$edger_normfact))) { + res = paste0(res, paste("edger_normfact:", input$edger_normfact, "\n", sep = " ")) + } else { + res = paste0(res, paste("edger_normfact:", paste0('"', input$edger_normfact, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$edger_dispersion))) { + res = paste0(res, paste("edger_dispersion:", input$edger_dispersion, "\n", sep = " ")) + } else { + res = paste0(res, paste("edger_dispersion:", paste0('"', input$edger_dispersion, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$edger_testType))) { + res = paste0(res, paste("edger_testType:", input$edger_testType, "\n", sep = " ")) + } else { + res = paste0(res, paste("edger_testType:", paste0('"', input$edger_testType, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$deseq2_threads))) { + res = paste0(res, paste("deseq2_threads:", input$deseq2_threads, "\n", sep = " ")) + } else { + res = paste0(res, paste("deseq2_threads:", paste0('"', input$deseq2_threads, '"'), "\n", sep = " ")) + } + + if(input$deseq2_tx2gene) { + res = paste0(res, paste("deseq2_tx2gene:", "true", "\n", sep = " ")) + } else { + res = paste0(res, paste("deseq2_tx2gene:", "false", "\n", sep = " ")) + } + + res = paste0(res, paste("deseq2_annotations_select:", paste0('"', input$deseq2_annotations_select, '"'), "\n", sep = " ")) + if(input$deseq2_annotations_select == "server") { + res = paste0(res, paste("deseq2_annotations:", paste0('"', input$deseq2_annotations_server, '"'), "\n", sep = " ")) + } + if(input$deseq2_annotations_select == "local"){ + res = paste0(res, paste("deseq2_annotations:", paste0('"', input$deseq2_annotations_local$datapath, '"'), "\n", sep = " ")) + } + + if(!is.na(as.numeric(input$deseq2_fitType))) { + res = paste0(res, paste("deseq2_fitType:", input$deseq2_fitType, "\n", sep = " ")) + } else { + res = paste0(res, paste("deseq2_fitType:", paste0('"', input$deseq2_fitType, '"'), "\n", sep = " ")) + } + + if(input$deseq2_betaPrior) { + res = paste0(res, paste("deseq2_betaPrior:", "true", "\n", sep = " ")) + } else { + res = paste0(res, paste("deseq2_betaPrior:", "false", "\n", sep = " ")) + } + + a = yaml.load_file("/workflow/params.total.yml") + p = a[["params"]] + a["params"] = NULL + b = yaml.load(res) + pnotb = subset(names(p), !(names(p)%in%names(b))) + d = list() + d$params = c(p[pnotb],b) + logical = function(x) { + result <- ifelse(x, "True", "False") + class(result) <- "verbatim" + return(result) + } + d = c(d,a) + get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T)) + d$samples = get_samples$samples + d$params$sample_suffix = get_samples$suffix + write_yaml(d,path_param,handlers=list(logical = logical)) + } + +force_rule <- function(force_from){ + if (input$force_from=="none"){ + return("") + } + else if (input$force_from=="all"){ return("--forcerun all") } + else { + params = yaml.load_file(paste0(input$results_dir,"/params.yml")) + outputs = params[["outputs"]] + tool = params[["params"]][[force_from]] + if (length(outputs[[tool]])==1) + rule = names(outputs[[tool]])[[1]] + else{ + rule = names(outputs[[tool]])[[grep(input$SeOrPe,names(outputs[[tool]]))]] + } + return(paste0("--forcerun ",rule)) + } +} +#' Event when use RULEGRAPH button +observeEvent({c(input$sidebarmenu,input$refresh_rg)}, { + + if(input$sidebarmenu=="RULEGRAPH"){ + input_list <- reactiveValuesToList(input) + toggle_inputs(input_list,F,F) + path_param <- paste0(input$results_dir,"/params.yml") + + save_params(path_param) + i = sample.int(1000,size = 1) + + system(paste0("rm ",input$results_dir,"/rulegraph*")) + + outUI = tryCatch({ + system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph > ",input$results_dir,"/rulegraph",i,".dot"),intern=T) + system(paste0("cat ",input$results_dir,"/rulegraph",i,".dot | dot -Tsvg -Gratio=0.75 > ",input$results_dir,"/rulegraph",i,".svg"),intern=T) + tagList(img(src = paste0("results/rulegraph",i,".svg") ,alt = "Rulegraph of Snakemake jobs",style="max-width: 100%;height: auto;display: block;margin: auto"))}, + error = function(e){ + system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T) + return(tags$p(paste0("error : ",e$message)))}, + warning = function(w){ + system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T) + return(tags$p(paste0("error : ",w$message)))}) + addResourcePath("results", input$results_dir) + output$RULEGRAPH_svg = renderUI(outUI) + toggle_inputs(input_list,T,F) +}}) +#' Event when use RunPipeline button +observeEvent(input$RunPipeline, { + + rv$running = T + input_list <- reactiveValuesToList(input) + toggle_inputs(input_list,F,F) + updateTabsetPanel(session, "sidebarmenu", selected = "run_out") + path_param <- paste0(input$results_dir,"/params.yml") + + save_params(path_param) + + + outUI = tryCatch({ + if (!dir.exists(paste0(input$results_dir,"/logs"))){ + dir.create(paste0(input$results_dir,"/logs")) + } + if (!file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){ + file.create(paste0(input$results_dir,"/logs/runlog.txt")) + } + system(paste0("touch ",input$results_dir,"/logs/workflow.running"),wait = T) + system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph | dot -Tpng -Gratio=0.75 > ",input$results_dir,"/Rule_graph_mqc.png")) + force = force_rule(input$force_from) + rerun = if (input$rerun_incomplete) "--rerun-incomplete" else "" + system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " -d ", input$results_dir , " --cores ", input$cores, " all ", force, " ",rerun),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt")) + tags$iframe(src="results/multiqc_report.html",width="100%", height="900px")}, + error = function(e){ + system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T) + return(tags$p(paste0("error : ",e$message)))}, + warning = function(w){ + system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T) + return(tags$p(paste0("error : ",w$message)))}) + }) + + shinyDirChoose(input, "shinydir_results_dir", root=c(Results="/Results"),session = session) + observeEvent({parseDirPath(c(Results="/Results"),input$shinydir_results_dir)},{ + updateTextInput(session,"results_dir",value = parseDirPath(c(Results="/Results"),input$shinydir_results_dir)) + }) + + shinyDirChoose(input, "shinydir_sample_dir", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseDirPath(c(Data="/Data",Results="/Results"),input$shinydir_sample_dir)},{ + updateTextInput(session,"sample_dir",value = parseDirPath(c(Data="/Data",Results="/Results"),input$shinydir_sample_dir)) + }) + + shinyFileChoose(input, "shinyfiles_group_file", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_group_file)$datapath[1]},{ + updateTextInput(session,"group_file_server",value = parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_group_file)$datapath[1]) + }) + + shinyFileChoose(input, "shinyfiles_kallisto_index_input", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_kallisto_index_input)$datapath[1]},{ + updateTextInput(session,"kallisto_index_input_server",value = parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_kallisto_index_input)$datapath[1]) + }) + + shinyFileChoose(input, "shinyfiles_salmon_index_input", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_salmon_index_input)$datapath[1]},{ + updateTextInput(session,"salmon_index_input_server",value = parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_salmon_index_input)$datapath[1]) + }) + + shinyFileChoose(input, "shinyfiles_edger_annotations", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_edger_annotations)$datapath[1]},{ + updateTextInput(session,"edger_annotations_server",value = parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_edger_annotations)$datapath[1]) + }) + + shinyFileChoose(input, "shinyfiles_deseq2_annotations", root=c(Data="/Data",Results="/Results"),session = session) + observeEvent({parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_deseq2_annotations)$datapath[1]},{ + updateTextInput(session,"deseq2_annotations_server",value = parseFilePaths(c(Data="/Data",Results="/Results"),input$shinyfiles_deseq2_annotations)$datapath[1]) + }) + + diff --git a/sagApp/www/added_styles.css b/sagApp/www/added_styles.css new file mode 100644 index 0000000..e69de29 diff --git a/sagApp/www/chooser-binding.js b/sagApp/www/chooser-binding.js new file mode 100644 index 0000000..cfb3720 --- /dev/null +++ b/sagApp/www/chooser-binding.js @@ -0,0 +1,84 @@ +(function() { + +function updateChooser(chooser) { + chooser = $(chooser); + var left = chooser.find("select.left"); + var right = chooser.find("select.right"); + var leftArrow = chooser.find(".left-arrow"); + var rightArrow = chooser.find(".right-arrow"); + + var canMoveTo = (left.val() || []).length > 0; + var canMoveFrom = (right.val() || []).length > 0; + + leftArrow.toggleClass("muted", !canMoveFrom); + rightArrow.toggleClass("muted", !canMoveTo); +} + +function move(chooser, source, dest) { + chooser = $(chooser); + var selected = chooser.find(source).children("option:selected"); + var dest = chooser.find(dest); + dest.children("option:selected").each(function(i, e) {e.selected = false;}); + dest.append(selected); + updateChooser(chooser); + chooser.trigger("change"); +} + +$(document).on("change", ".chooser select", function() { + updateChooser($(this).parents(".chooser")); +}); + +$(document).on("click", ".chooser .right-arrow", function() { + move($(this).parents(".chooser"), ".left", ".right"); +}); + +$(document).on("click", ".chooser .left-arrow", function() { + move($(this).parents(".chooser"), ".right", ".left"); +}); + +$(document).on("dblclick", ".chooser select.left", function() { + move($(this).parents(".chooser"), ".left", ".right"); +}); + +$(document).on("dblclick", ".chooser select.right", function() { + move($(this).parents(".chooser"), ".right", ".left"); +}); + +var binding = new Shiny.InputBinding(); + +binding.find = function(scope) { + return $(scope).find(".chooser"); +}; + +binding.initialize = function(el) { + updateChooser(el); +}; + +binding.getValue = function(el) { + return { + left: $.makeArray($(el).find("select.left option").map(function(i, e) { return e.value; })), + right: $.makeArray($(el).find("select.right option").map(function(i, e) { return e.value; })) + } +}; + +binding.setValue = function(el, value) { + // TODO: implement +}; + +binding.subscribe = function(el, callback) { + $(el).on("change.chooserBinding", function(e) { + callback(); + }); +}; + +binding.unsubscribe = function(el) { + $(el).off(".chooserBinding"); +}; + +binding.getType = function() { + return "shinyjsexamples.chooser"; +}; + +Shiny.inputBindings.register(binding, "shinyjsexamples.chooser"); + +})(); \ No newline at end of file diff --git a/waw_workflow.qsub b/waw_workflow.qsub new file mode 100644 index 0000000..8c197fa --- /dev/null +++ b/waw_workflow.qsub @@ -0,0 +1,28 @@ +#!/bin/bash +#$ -S /bin/bash +# Job name +#$ -N WAW_worflow +# Using current working directory (otherwise, you will have to use '#$ wd /path/to/run') +#$ -cwd +# job time limits (h_rt is required [s_rt == software time limit / h_rt == hardware time limit]) +#$ -l h_rt=48:00:00 +# Redirects the standard output to the named file. +#$ -o Results/waw.out +#$ -e Results/waw.err +module load singularity-3.1 + +dataDir=$1 +resultsDir=$2 +# Config file in a location binded to /Data or /Results +#ex. file in /home/khalid/dataanalyse/config.yaml +#/home/khalid/dataanalyse/ is dataDir thet will be binded to /Data in the container +#configfile must be set to /Data/config.yaml +configFile=$3 +cores=$4 + +#if we must build the image from docker image +singularity build long_read_assembly_latest.simg docker://mmassaviol/long_read_assembly:latest + + +#Run the workflow in cmd line +singularity exec -B $dataDir:/Data -B $resultsDir:/Results long_read_assembly_latest.simg snakemake -s /workflow/Snakefile --configfile $configFile --cores $cores \ No newline at end of file -- GitLab