From d9e44cd88837d9be2ba1368cba3c7aa3892240ee Mon Sep 17 00:00:00 2001 From: khalid <khalid.belkhir@umontpellier.fr> Date: Mon, 29 Nov 2021 17:32:55 +0100 Subject: [PATCH] change pre-prcessing fasta assembly to find mito scaffold --- tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile index 1935e265..850f6df1 100755 --- a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile +++ b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile @@ -13,8 +13,11 @@ rule <step_name>__mitoz_findmitoscaf: clade = config["<step_name>__mitoz_findmitoscaf_clade"] shell: # Si l'on veut eviter l'étape cal_bwa_abundance.py qui fait un mapping couteux sur tous les scaffolds de l'assemblage : - # convert Megahit to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1 - """awk '{{if (NR%2 == 1) {{split($0,a," "); split(a[4],b,"="); sub(">",">C",a[1]); print a[1]"\\t100.0\\tlength="b[2]}} else print}}' {input.fasta} > /tmp/assembly.contigs.fa; """ + # convert Megahit or flye output to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1 + # N.B. assembly fasta file must be one line per sequence format + + """awk '{{posit = index($0,">"); if (posit == 1) {{split($0,a," "); sub(">",">C",a[1]); ident=a[1]"\\t100.0\\tlength="}} else {{long=length($0);if (long > 0) {{ident=ident long; print ident"\\n"$0 }} }}}}' {input.fasta} > /tmp/assembly.contigs.fa;""" + "mkdir -p {params.output_dir} && " "cd {params.output_dir}; " "{params.command} " -- GitLab