diff --git a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile index 1935e2652b9a6ea1e6a05bfa0e23a28d87f68ba1..850f6df149da4f5b45803389f9fae99e181f1b31 100755 --- a/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile +++ b/tools/mitoz_findmitoscaf/mitoz_findmitoscaf.rule.snakefile @@ -13,8 +13,11 @@ rule <step_name>__mitoz_findmitoscaf: clade = config["<step_name>__mitoz_findmitoscaf_clade"] shell: # Si l'on veut eviter l'étape cal_bwa_abundance.py qui fait un mapping couteux sur tous les scaffolds de l'assemblage : - # convert Megahit to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1 - """awk '{{if (NR%2 == 1) {{split($0,a," "); split(a[4],b,"="); sub(">",">C",a[1]); print a[1]"\\t100.0\\tlength="b[2]}} else print}}' {input.fasta} > /tmp/assembly.contigs.fa; """ + # convert Megahit or flye output to format similar to one produced by abundance estimate procedure with a dummy abundance of 0.1 + # N.B. assembly fasta file must be one line per sequence format + + """awk '{{posit = index($0,">"); if (posit == 1) {{split($0,a," "); sub(">",">C",a[1]); ident=a[1]"\\t100.0\\tlength="}} else {{long=length($0);if (long > 0) {{ident=ident long; print ident"\\n"$0 }} }}}}' {input.fasta} > /tmp/assembly.contigs.fa;""" + "mkdir -p {params.output_dir} && " "cd {params.output_dir}; " "{params.command} "