diff --git a/tools/flye/flye.rule.snakefile b/tools/flye/flye.rule.snakefile new file mode 100755 index 0000000000000000000000000000000000000000..d57d5538bf2b1aa76f7f17b309c458f57fa5d3bc --- /dev/null +++ b/tools/flye/flye.rule.snakefile @@ -0,0 +1,24 @@ +rule <step_name>__flye: + input: + **<step_name>__flye_inputs(), + output: + assembly_fasta = config["results_dir"] + "/" + config["<step_name>__flye_output_dir"] + "/assembly.fasta", + assembly_info = config["results_dir"] + "/" + config["<step_name>__flye_output_dir"] + "/assembly.info", + params: + command = config["<step_name>__flye_command"], + output_dir = config["results_dir"] + "/" + config["<step_name>__flye_output_dir"]+ "/", + pacbio_oxfordNanopore = config["<step_name>__flye_pacbio_oxfordNanopore"], + scaffold = "--scaffold" if config["<step_name>__flye_scaffold"], else "", + keep_haplotypes = "--keep-haplotypes" if config["<step_name>__flye_keep_haplotypes"] else "", + log: + config["results_dir"] + "/logs/" + config["<step_name>__flye_output_dir"] + "/flye_log.txt" + threads: + config["<step_name>__flye_threads"] + shell: + "{params.command} {params.pacbio_oxfordNanopore} " + "{input.read} " + "--out-dir {params.output_dir} " + "-t {threads} " + "{params.scaffold} {params.keep_haplotypes} |& tee {log} ;" + "awk '/^>/ {{printf(\"\n%s\n\",$0);next; }} {{ printf(\"%s\",$0);}} END {{printf(\"\n\");}}' < {params.output_dir}/assembly.fasta > {params.output_dir}/assembly.fa; " + "mv {params.output_dir}/assembly.fa > {params.output_dir}/assembly.fasta" \ No newline at end of file diff --git a/tools/flye/flye.yaml b/tools/flye/flye.yaml new file mode 100755 index 0000000000000000000000000000000000000000..382749b3771ffeb307d51ba638a72dbde4e9e0aa --- /dev/null +++ b/tools/flye/flye.yaml @@ -0,0 +1,82 @@ +{ + id: flye, + name: flye, + article: "s41592-020-00971-x", + website: "https://github.com/fenderglass/Flye/blob/flye/docs/FAQ.md", + git: "https://github.com/fenderglass/Flye", + description: "Flye is a de novo assembler for single molecule sequencing reads, such as those produced by PacBio and Oxford Nanopore Technologies.", + version: "2.9", + documentation: "https://github.com/fenderglass/Flye/tree/flye/docs", + multiqc: "custom", + commands: + [ + { + name: flye, + cname: "Flye assembler", + command: flye, + category: "assembly", + output_dir: flye, + inputs: [{ name: read, type: "reads" }], + outputs: + [ + { name: assembly_info, type: "tsv", file: assembly_info.txt , description: "Extra information about contigs" }, + { name: assembly_fasta, type: "fasta", file: assembly.fasta, description: "Final assembly. Contains contigs and possibly scaffolds" } + ], + options: + [ + { + name: flye_threads, + prefix: -t, + type: numeric, + value: 4, + min: 1, + max: NA, + step: 1, + label: "Number of threads to use", + }, + { + name: "flye_pacbio_oxfordNanopore", + type: "select", + value: "--nano-hq", + choices: [ + Pacbio raw: --pacbio-raw, + Pacbio corre: --pacbio-corr, + Pacbio hifi: --pacbio-hifi, + ONT raw: --nano-raw, + ONT corr: --nano-corr, + ONT hq: '--nano-hq' + ], + label: "Reads type", + }, + { + name: flye_keep_haplotype, + prefix: "--keep-haplotypes", + type: checkbox, + value: False, + label: "do not collapse alternative haplotypes", + }, + { + name: flye_scaffold, + prefix: "--scaffold", + type: checkbox, + value: False, + label: "enable scaffolding using graph", + }, + ], + }, + ], + install: { + minimap2: [ + "cd /opt/biotools", + "git clone https://github.com/fenderglass/Flye", + "cd Flye", + "make", + "ENV PATH /opt/biotools/Flye/bin:$PATH" + ] + }, + citations: { + minimap2: [ + "Mikhail Kolmogorov, Derek M. Bickhart, Bahar Behsaz, Alexey Gurevich, Mikhail Rayko, Sung Bong Shin, Kristen Kuhn, Jeffrey Yuan, Evgeny Polevikov, Timothy P. L. Smith and Pavel A. Pevzner metaFlye: scalable long-read metagenome assembly using repeat graphs", Nature Methods, 2020" + ] + } +} \ No newline at end of file