diff --git a/raw_inputs/raw_snap_index.py b/raw_inputs/raw_snap_index.py new file mode 100755 index 0000000000000000000000000000000000000000..1105c40775b3b659394489549dbe19e36ce0269f --- /dev/null +++ b/raw_inputs/raw_snap_index.py @@ -0,0 +1,14 @@ +import os +import re +import sys + +def raw_snap_index(results_dir, snap_index): + out = dict() + from os import listdir + from os.path import isfile, join + onlyfiles = [join(snap_index, f) for f in listdir(snap_index) if isfile(join(snap_index, f))] + out["snap_index"] = onlyfiles + + return(out) + +#print(raw_snap_index(sys.argv[1],sys.argv[2]) ) \ No newline at end of file diff --git a/raw_inputs/raw_snap_index.yaml b/raw_inputs/raw_snap_index.yaml new file mode 100755 index 0000000000000000000000000000000000000000..d84786aa7deb90d844e870b022c7012ca608dd36 --- /dev/null +++ b/raw_inputs/raw_snap_index.yaml @@ -0,0 +1,13 @@ +{ + name: raw_snap_index, + function_call: "raw_snap_index(config['results_dir'], config['snap_index'])", + options: [ + { + name: "snap_index", + type: "input_dir", + value: "/Data", + label: "Directory containing the snap index files: ", + volumes: [Data: "/Data", Results: "/Results"] + } + ] +} \ No newline at end of file diff --git a/tools/global.yaml b/tools/global.yaml index 3e3ea8e47bbd504edea1a3d6c30c59d80f22cef1..162af1d2969b27eb51584476482b1e66c57385fb 100755 --- a/tools/global.yaml +++ b/tools/global.yaml @@ -54,6 +54,7 @@ data: [ {name: "illumina_dir", category: "sequences"}, {name: "accel_align_index", category: "index"}, + {name: "snap_index", category: "index"}, {name: "bowtie_index", category: "index"}, {name: "bowtie2_index", category: "index"}, {name: "bwa_mem_index", category: "index"}, diff --git a/tools/snap/snap.rule.snakefile b/tools/snap/snap.rule.snakefile new file mode 100755 index 0000000000000000000000000000000000000000..212ea9161828666ff32b0502b405c707ab20b7ff --- /dev/null +++ b/tools/snap/snap.rule.snakefile @@ -0,0 +1,61 @@ +if config["SeOrPe"] == "PE": + + rule <step_name>__snap_PE: + input: + **<step_name>__snap_PE_inputs() + output: + bam = config["results_dir"]+"/"+config["<step_name>__snap_PE_output_dir"]+"/{sample}.bam" + log: + config["results_dir"]+"/logs/" + config["<step_name>__snap_PE_output_dir"] + "/{sample}_snap_log.txt" + threads: + config["<step_name>__snap_threads"] + params: + command = config["<step_name>__snap_PE_command"], + #indexPrefix = config["<step_name>__snap_index_output_dir"]+"/index", + #/patho_index/indexName.fna.hash + indexDir = lambda w, input: os.path.dirname([x for x in input.index if 'GenomeIndexHash' in x]), + snap_seed_number = config["<step_name>__snap_seed_number"], + snap_min_insert_size = config["<step_name>__snap_min_insert_size"], + snap_max_insert_size = config["<step_name>__snap_max_insert_size"], + snap_disable_alt_awareness = "-A-" if config["<step_name>__snap_disable_alt_awareness"] else "", + snap_max_score_gap = config["<step_name>__snap_max_score_gap"] + shell: + "{params.command} " + "{params.indexDir} " + "{input.read} {input.read2} " + "-t {threads} " + "-n {params.snap_seed_number} " + "-s {params.snap_min_insert_size} {params.snap_max_insert_size} " + "{params.snap_disable_alt_awareness} " + "-asg {params.snap_max_score_gap} " + "-so " + "-o {output.bam} 2> {log}; " + + +elif config["SeOrPe"] == "SE": + + rule <step_name>__snap_SE: + input: + **<step_name>__snap_SE_inputs() + output: + bam = config["results_dir"]+"/"+config["<step_name>__snap_SE_output_dir"]+"/{sample}.bam" + log: + config["results_dir"]+"/logs/" + config["<step_name>__snap_SE_output_dir"] + "/{sample}_snap_log.txt" + threads: + config["<step_name>__snap_threads"] + params: + command = config["<step_name>__snap_PE_command"], + snap_seed_number = config["<step_name>__snap_seed_number"], + snap_disable_alt_awareness = "-A-" if config["<step_name>__snap_disable_alt_awareness"] else "", + snap_max_score_gap = config["<step_name>__snap_max_score_gap"] + indexDir = lambda w, input: os.path.dirname([x for x in input.index if 'GenomeIndexHash' in x]), + shell: + "{params.command} " + "{params.indexDir} " + "{input.read} " + "-t {threads} " + "-n {params.snap_seed_number} " + "{params.snap_disable_alt_awareness} " + "-asg {params.snap_max_score_gap} " + "-so " + "-o {output.bam} 2> {log}; " diff --git a/tools/snap/snap.yaml b/tools/snap/snap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33b2097da7bee19ac94b26cabfcb10b4dd6b9b91 --- /dev/null +++ b/tools/snap/snap.yaml @@ -0,0 +1,153 @@ +{ + id: snap, + name: Scalable Nucleotide Alignment Program, + article: arXiv:1111.5572v1, + website: "https://www.microsoft.com/en-us/research/project/snap/", + git: "https://github.com/amplab/snap", + description: "fast and accurate aligner for short DNA reads. It is optimized for modern read lengths of 100 bases or higher, and align them quickly through a hash-based indexing scheme.", + version: "2.0", + documentation: "https://github.com/amplab/snap/tree/master/docs", + multiqc: "custom", + commands: + [ + { + name: snap_PE, + cname: "snap PE", + command: snap-aligner paired, + category: "mapping", + output_dir: snap/PE, + inputs: [{ name: read, type: "reads" }, { name: read2, type: "reads" }, { name: index, type: "snap_index", file: index, description: "Index files for snap alignment" }], + outputs: [{ name: bam, type: "bams", file: "{sample}.bam", description: "Alignment file" }], + options: + [ + { + name: snap_threads, + prefix: -t, + type: numeric, + value: 4, + min: 1, + max: NA, + step: 1, + label: "Number of threads to use", + }, + { + name: snap_seed_number, + prefix: -n, + type: numeric, + value: 4, + min: 1, + max: 10, + step: 1, + label: "number of seeds to use per read", + }, + { + name: snap_min_insert_size, + prefix: -s, + type: numeric, + value: 0, + min: 10, + max: 400, + step: 10, + label: "minimum spacing to allow between paired ends", + }, + { + name: snap_max_insert_size, + prefix: -s, + type: numeric, + value: 1000, + min: 10, + max: 3000, + step: 10, + label: "maximum spacing to allow between paired ends", + }, + { + name: snap_force_spacing, + prefix: -sf, + type: checkbox, + value: FALSE, + label: "force spacing to lie between min and max.", + }, + { + name: snap_disable_alt_awareness, + prefix: -A-, + type: checkbox, + value: FALSE, + label: "Disable ALT awareness. By default snap try to map reads to the primary assembly and only to choose ALT alignments when they're much better, and to compute MAPQ for non-ALT alignments using only non-ALT hits. This flag disables that behavior", + }, + { + name: snap_max_score_gap, + prefix: -asg, + type: numeric, + value: 2, + min: 1, + max: 10, + step: 1, + label: "Maximum score gap to prefer a non-ALT alignment. If the best non-ALT alignment is more than this much worse than the best ALT alignment emit the ALT alignment as the primary result rather than as", + }, + ] + }, + { + name: snap_SE, + cname: "snap SE", + command: snap-aligner single, + category: "mapping", + output_dir: snap/SE, + inputs: [{ name: read, type: "reads" }, { name: index, type: "snap_index", file: index, description: "Index files for snap alignment" }], + outputs: [{ name: bam, type: "bams", file: "{sample}.bam", description: "Alignment file" }], + options: + [ + { + name: snap_threads, + prefix: -t, + type: numeric, + value: 4, + min: 1, + max: NA, + step: 1, + label: "Number of threads to use", + }, + { + name: snap_seed_number, + prefix: -n, + type: numeric, + value: 4, + min: 1, + max: 10, + step: 1, + label: "number of seeds to use per read", + }, + { + name: snap_disable_alt_awareness, + prefix: -A-, + type: checkbox, + value: FALSE, + label: "Disable ALT awareness. By default snap try to map reads to the primary assembly and only to choose ALT alignments when they're much better, and to compute MAPQ for non-ALT alignments using only non-ALT hits. This flag disables that behavior", + }, + { + name: snap_max_score_gap, + prefix: -asg, + type: numeric, + value: 2, + min: 1, + max: 10, + step: 1, + label: "Maximum score gap to prefer a non-ALT alignment. If the best non-ALT alignment is more than this much worse than the best ALT alignment emit the ALT alignment as the primary result rather than as", + }, + ] + } + ], + install: { + snap: [ + "cd /opt/biotools", + "git clone https://github.com/amplab/snap.git", + "cd snap", + "make -j 8", + "ENV PATH /opt/biotools/snap:$PATH", + ] + }, + citations: { + snap: [ + "Faster and More Accurate Sequence Alignment with SNAP. Matei Zaharia, William J. Bolosky, Kristal Curtis, Armando Fox, David Patterson, Scott Shenker, Ion Stoica, Richard M. Karp, and Taylor Sittler. arXiv:1111.5572v1, November 2011." + ] + } +} \ No newline at end of file