From a37db2548568ae8a33107439fcff5f75090bea88 Mon Sep 17 00:00:00 2001
From: khalid <khalid.belkhir@umontpellier.fr>
Date: Wed, 1 Dec 2021 15:14:54 +0100
Subject: [PATCH] Add raw indexing

---
 raw_inputs/raw_accel_align_index.py           | 14 ++++
 raw_inputs/raw_accel_align_index.yaml         | 13 ++++
 .../accel_align_index.rule.snakefile          | 18 +++++
 .../accel_align_index/accel_align_index.yaml  | 71 +++++++++++++++++++
 tools/global.yaml                             |  1 +
 5 files changed, 117 insertions(+)
 create mode 100755 raw_inputs/raw_accel_align_index.py
 create mode 100755 raw_inputs/raw_accel_align_index.yaml
 create mode 100755 tools/accel_align_index/accel_align_index.rule.snakefile
 create mode 100755 tools/accel_align_index/accel_align_index.yaml

diff --git a/raw_inputs/raw_accel_align_index.py b/raw_inputs/raw_accel_align_index.py
new file mode 100755
index 00000000..c3e34f56
--- /dev/null
+++ b/raw_inputs/raw_accel_align_index.py
@@ -0,0 +1,14 @@
+import os
+import re
+import sys
+
+def raw_accel_align_index(results_dir, accel_align_index):
+    out = dict()
+    from os import listdir
+    from os.path import isfile, join
+    onlyfiles = [join(accel_align_index, f) for f in listdir(accel_align_index) if isfile(join(accel_align_index, f))]
+    out["accel_align_index"] = onlyfiles
+
+    return(out)
+
+#print(raw_accel_align_index(sys.argv[1],sys.argv[2]) )
\ No newline at end of file
diff --git a/raw_inputs/raw_accel_align_index.yaml b/raw_inputs/raw_accel_align_index.yaml
new file mode 100755
index 00000000..5582b49a
--- /dev/null
+++ b/raw_inputs/raw_accel_align_index.yaml
@@ -0,0 +1,13 @@
+{
+  name: raw_accel_align_index,
+  function_call: "raw_accel_align_index(config['results_dir'], config['accel_align_index'])",
+  options: [
+    {
+      name: "accel_align_index",
+      type: "input_dir",
+      value: "/Data",
+      label: "Directory containing the Accel-Align index files: ",
+      volumes:  [Data: "/Data", Results: "/Results"]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tools/accel_align_index/accel_align_index.rule.snakefile b/tools/accel_align_index/accel_align_index.rule.snakefile
new file mode 100755
index 00000000..46e2bf17
--- /dev/null
+++ b/tools/accel_align_index/accel_align_index.rule.snakefile
@@ -0,0 +1,18 @@
+rule <step_name>__accel_align_index:
+    input:
+        **<step_name>__accel_align_index_inputs()
+    output:
+        index = config["<step_name>__accel_align_index_output_dir"]+"/index.hash",
+    log:
+        config["results_dir"]+"/logs/" + config["<step_name>__accel_align_index_output_dir"] + "/index.log"
+    params:
+        command = config["<step_name>__accel_align_index_command"],
+        accel_align_index_kmers_length = config["<step_name>__accel_align_index_kmers_length"],
+        out_dir = config["<step_name>__accel_align_index_output_dir"]
+    shell:
+        "{params.command} "
+        "-l  {params.accel_align_index_kmers_length} "        
+        "{input.genome_fasta} "
+        "|& tee {log};"
+        "cp {input.genome_fasta}  {params.out_dir}/index "
+        "cp {input.genome_fasta}.hash  {params.out_dir}/index.hash "
diff --git a/tools/accel_align_index/accel_align_index.yaml b/tools/accel_align_index/accel_align_index.yaml
new file mode 100755
index 00000000..4633b078
--- /dev/null
+++ b/tools/accel_align_index/accel_align_index.yaml
@@ -0,0 +1,71 @@
+{
+  id: accel_align_index,
+  id: accel_align_index ,
+  name: Accel-align is a fast alignment tool implemented in C++,
+  article: 10.1186/s12859-021-04162-z,
+  website: "https://github.com/raja-appuswamy/accel-align-release",
+  git: "https://github.com/raja-appuswamy/accel-align-release",
+  description: "a fast sequence mapper and aligner based on the seed-embed-extend method",
+  version: "2.0",
+  documentation: "https://github.com/raja-appuswamy/accel-align-release",
+  multiqc: "custom",
+  commands:
+    [
+      {
+        name: accel_align_index,
+        cname:  "Bowtie index",
+        command: accindex-x86-64,
+        category: "indexing",
+        output_dir: accel_align/index,
+        inputs: [{ name: genome_fasta, type: "contigs", description: "Fasta reference", from: "parameter" }],
+        outputs: [{ name: index, type: "accel_align_index", file: index, description: "Index files for Accel-Align alignment" }],
+        options:
+          [
+            {
+              name: accel_align_index_genome_fasta,
+              type: input_file,
+              value: "",
+              label: "Path to reference genome fasta file",
+            },
+            {
+              name: accel_align_index_kmers_length,
+              prefix: -l,
+              type: numeric,
+              value: 32,
+              min: 1,
+              max: 64,
+              step: 1,
+              label: "the length of k-mers to use to index genome",
+            }
+          ]
+      },
+    ],
+  install:  {
+    accel_align: [
+      "cd /opt/biotools",
+      "git clone https://github.com/raja-appuswamy/accel-align-release.git",
+      "ENV PATH /opt/biotools/accel-align-release:$PATH",
+    ],
+    Intel_TBB: [
+     "apt-get install -y libtbb-dev",
+    ],
+    samtools: [
+      "cd /opt/biotools",
+      "wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2",
+      "tar -xvjf samtools-1.9.tar.bz2",
+      "cd samtools-1.9",
+      "./configure && make",
+      "cd ..",
+      "mv samtools-1.9/samtools bin/samtools",
+      "rm -r samtools-1.9 samtools-1.9.tar.bz2"
+    ]
+  },
+  citations:  {
+    accel_align: [
+      "Yan, Y., Chaturvedi, N. & Appuswamy, R. Accel-Align: a fast sequence mapper and aligner based on the seed-embed-extend method. BMC Bioinformatics 22, 257 (2021)."
+    ],
+    samtools: [
+      "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352"
+    ]
+  }
+}
\ No newline at end of file
diff --git a/tools/global.yaml b/tools/global.yaml
index cb534829..3e3ea8e4 100755
--- a/tools/global.yaml
+++ b/tools/global.yaml
@@ -53,6 +53,7 @@ data: [
     {name: "fast5_dir", category: "sequences"},    
     {name: "illumina_dir", category: "sequences"},
 
+    {name: "accel_align_index", category: "index"},
     {name: "bowtie_index", category: "index"},
     {name: "bowtie2_index", category: "index"},
     {name: "bwa_mem_index", category: "index"},
-- 
GitLab