From d9e44cd88837d9be2ba1368cba3c7aa3892240ee Mon Sep 17 00:00:00 2001
From: khalid <khalid.belkhir@umontpellier.fr>
Date: Mon, 29 Nov 2021 17:32:55 +0100
Subject: [PATCH] change pre-prcessing fasta assembly to find mito scaffold

---
 tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile
index 1935e265..850f6df1 100755
--- a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile
+++ b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile
@@ -13,8 +13,11 @@ rule <step_name>__mitoz_findmitoscaf:
         clade = config["<step_name>__mitoz_findmitoscaf_clade"]
     shell:
         # Si l'on veut eviter l'étape cal_bwa_abundance.py qui fait un mapping couteux sur tous les scaffolds de l'assemblage :
-        # convert Megahit to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1
-        """awk '{{if (NR%2 == 1) {{split($0,a," "); split(a[4],b,"="); sub(">",">C",a[1]); print a[1]"\\t100.0\\tlength="b[2]}} else print}}' {input.fasta} > /tmp/assembly.contigs.fa; """
+        # convert Megahit or flye output to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1
+        # N.B. assembly fasta file must be one line per sequence format
+
+        """awk '{{posit = index($0,">"); if (posit == 1) {{split($0,a," "); sub(">",">C",a[1]); ident=a[1]"\\t100.0\\tlength="}} else {{long=length($0);if (long > 0) {{ident=ident long; print ident"\\n"$0 }} }}}}' {input.fasta} > /tmp/assembly.contigs.fa;"""
+
         "mkdir -p {params.output_dir} && "
         "cd {params.output_dir}; "
         "{params.command} "
-- 
GitLab