From a37db2548568ae8a33107439fcff5f75090bea88 Mon Sep 17 00:00:00 2001 From: khalid <khalid.belkhir@umontpellier.fr> Date: Wed, 1 Dec 2021 15:14:54 +0100 Subject: [PATCH] Add raw indexing --- raw_inputs/raw_accel_align_index.py | 14 ++++ raw_inputs/raw_accel_align_index.yaml | 13 ++++ .../accel_align_index.rule.snakefile | 18 +++++ .../accel_align_index/accel_align_index.yaml | 71 +++++++++++++++++++ tools/global.yaml | 1 + 5 files changed, 117 insertions(+) create mode 100755 raw_inputs/raw_accel_align_index.py create mode 100755 raw_inputs/raw_accel_align_index.yaml create mode 100755 tools/accel_align_index/accel_align_index.rule.snakefile create mode 100755 tools/accel_align_index/accel_align_index.yaml diff --git a/raw_inputs/raw_accel_align_index.py b/raw_inputs/raw_accel_align_index.py new file mode 100755 index 00000000..c3e34f56 --- /dev/null +++ b/raw_inputs/raw_accel_align_index.py @@ -0,0 +1,14 @@ +import os +import re +import sys + +def raw_accel_align_index(results_dir, accel_align_index): + out = dict() + from os import listdir + from os.path import isfile, join + onlyfiles = [join(accel_align_index, f) for f in listdir(accel_align_index) if isfile(join(accel_align_index, f))] + out["accel_align_index"] = onlyfiles + + return(out) + +#print(raw_accel_align_index(sys.argv[1],sys.argv[2]) ) \ No newline at end of file diff --git a/raw_inputs/raw_accel_align_index.yaml b/raw_inputs/raw_accel_align_index.yaml new file mode 100755 index 00000000..5582b49a --- /dev/null +++ b/raw_inputs/raw_accel_align_index.yaml @@ -0,0 +1,13 @@ +{ + name: raw_accel_align_index, + function_call: "raw_accel_align_index(config['results_dir'], config['accel_align_index'])", + options: [ + { + name: "accel_align_index", + type: "input_dir", + value: "/Data", + label: "Directory containing the Accel-Align index files: ", + volumes: [Data: "/Data", Results: "/Results"] + } + ] +} \ No newline at end of file diff --git a/tools/accel_align_index/accel_align_index.rule.snakefile b/tools/accel_align_index/accel_align_index.rule.snakefile new file mode 100755 index 00000000..46e2bf17 --- /dev/null +++ b/tools/accel_align_index/accel_align_index.rule.snakefile @@ -0,0 +1,18 @@ +rule <step_name>__accel_align_index: + input: + **<step_name>__accel_align_index_inputs() + output: + index = config["<step_name>__accel_align_index_output_dir"]+"/index.hash", + log: + config["results_dir"]+"/logs/" + config["<step_name>__accel_align_index_output_dir"] + "/index.log" + params: + command = config["<step_name>__accel_align_index_command"], + accel_align_index_kmers_length = config["<step_name>__accel_align_index_kmers_length"], + out_dir = config["<step_name>__accel_align_index_output_dir"] + shell: + "{params.command} " + "-l {params.accel_align_index_kmers_length} " + "{input.genome_fasta} " + "|& tee {log};" + "cp {input.genome_fasta} {params.out_dir}/index " + "cp {input.genome_fasta}.hash {params.out_dir}/index.hash " diff --git a/tools/accel_align_index/accel_align_index.yaml b/tools/accel_align_index/accel_align_index.yaml new file mode 100755 index 00000000..4633b078 --- /dev/null +++ b/tools/accel_align_index/accel_align_index.yaml @@ -0,0 +1,71 @@ +{ + id: accel_align_index, + id: accel_align_index , + name: Accel-align is a fast alignment tool implemented in C++, + article: 10.1186/s12859-021-04162-z, + website: "https://github.com/raja-appuswamy/accel-align-release", + git: "https://github.com/raja-appuswamy/accel-align-release", + description: "a fast sequence mapper and aligner based on the seed-embed-extend method", + version: "2.0", + documentation: "https://github.com/raja-appuswamy/accel-align-release", + multiqc: "custom", + commands: + [ + { + name: accel_align_index, + cname: "Bowtie index", + command: accindex-x86-64, + category: "indexing", + output_dir: accel_align/index, + inputs: [{ name: genome_fasta, type: "contigs", description: "Fasta reference", from: "parameter" }], + outputs: [{ name: index, type: "accel_align_index", file: index, description: "Index files for Accel-Align alignment" }], + options: + [ + { + name: accel_align_index_genome_fasta, + type: input_file, + value: "", + label: "Path to reference genome fasta file", + }, + { + name: accel_align_index_kmers_length, + prefix: -l, + type: numeric, + value: 32, + min: 1, + max: 64, + step: 1, + label: "the length of k-mers to use to index genome", + } + ] + }, + ], + install: { + accel_align: [ + "cd /opt/biotools", + "git clone https://github.com/raja-appuswamy/accel-align-release.git", + "ENV PATH /opt/biotools/accel-align-release:$PATH", + ], + Intel_TBB: [ + "apt-get install -y libtbb-dev", + ], + samtools: [ + "cd /opt/biotools", + "wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2", + "tar -xvjf samtools-1.9.tar.bz2", + "cd samtools-1.9", + "./configure && make", + "cd ..", + "mv samtools-1.9/samtools bin/samtools", + "rm -r samtools-1.9 samtools-1.9.tar.bz2" + ] + }, + citations: { + accel_align: [ + "Yan, Y., Chaturvedi, N. & Appuswamy, R. Accel-Align: a fast sequence mapper and aligner based on the seed-embed-extend method. BMC Bioinformatics 22, 257 (2021)." + ], + samtools: [ + "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352" + ] + } +} \ No newline at end of file diff --git a/tools/global.yaml b/tools/global.yaml index cb534829..3e3ea8e4 100755 --- a/tools/global.yaml +++ b/tools/global.yaml @@ -53,6 +53,7 @@ data: [ {name: "fast5_dir", category: "sequences"}, {name: "illumina_dir", category: "sequences"}, + {name: "accel_align_index", category: "index"}, {name: "bowtie_index", category: "index"}, {name: "bowtie2_index", category: "index"}, {name: "bwa_mem_index", category: "index"}, -- GitLab