From 05b5e5f4a2adfb38beb6d56d4898987f9e634cd8 Mon Sep 17 00:00:00 2001 From: Iago Bonnici <iago.bonnici@umontpellier.fr> Date: Mon, 17 Feb 2025 16:49:39 +0100 Subject: [PATCH] Merge `clear` & `unclear` into a single table. --- src/bin/dmox.rs | 14 +++----- src/config.rs | 3 +- src/lib.rs | 89 ++++++++++++++++++++++--------------------------- 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/src/bin/dmox.rs b/src/bin/dmox.rs index e1315f9..7480492 100644 --- a/src/bin/dmox.rs +++ b/src/bin/dmox.rs @@ -104,13 +104,9 @@ struct Cli { #[arg(long, default_value_t = 6)] zlevel: u32, - /// Where to write the file summarizing assigned samples. + /// Where to write the file summarizing assigned / unassigned barcodes. #[arg(long)] - clear: PathBuf, - - /// Where to write the file summarizing unassigned samples. - #[arg(long)] - unclear: PathBuf, + barcodes_table: PathBuf, /// Total number of reference barcode modules for each letter. #[arg(long, default_value_t = 96)] @@ -181,8 +177,7 @@ fn run() -> Result<(), Error> { qx, id_tail, zlevel, - clear, - unclear, + barcodes_table, n_modules, module_size, n_writers, @@ -232,8 +227,7 @@ fn run() -> Result<(), Error> { distance, max_distance, schema, - clear, - unclear, + barcodes_table_file: barcodes_table, sample: Box::new(move |id, infix| { let mut path = PathBuf::from(&samples); path.push(format!("{id}.{infix}.fq.gz")); diff --git a/src/config.rs b/src/config.rs index b46b476..9a9e0ea 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,8 +15,7 @@ pub struct Config { pub distance: Distance, pub max_distance: Option<usize>, pub schema: PathBuf, - pub clear: PathBuf, - pub unclear: PathBuf, + pub barcodes_table_file: PathBuf, pub sample: Box<SamplePath>, pub output_bx: bool, pub output_rx: bool, diff --git a/src/lib.rs b/src/lib.rs index 544c670..a4e359b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -129,21 +129,19 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { let samples = Samples::parse(reader)?; //---------------------------------------------------------------------------------------------- - println!("Open log streams."); - // (do it now to error early if they can't be opened, but actual writing there happens later) - let [mut clear_file, mut unclear_file] = [ - (&cf.clear, "Barcode \t Correct reads \t Corrected reads\n"), - (&cf.unclear, "Barcode \t Reads\n"), - ] - .try_map(|(path, header)| -> Result<_, Error> { + println!("Open table stream."); + // (do it now to error early if it can't be opened, but actual writing there happens later) + let path = &cf.barcodes_table_file; + let header = "Barcode\tTotal\tCorrect_Reads\tCorrected_Reads\n"; + let table_file = { println!(" {}", path.display().to_string().blue()); let mut writer = ContextualizedWriter::from_file(path)?; writer .write_all(header.as_bytes()) .with_context(|_| WriteLineErr { context: &writer })?; writer.bump_line(); - Ok(writer) - })?; + writer + }; let mut counters = IndexMap::new(); //---------------------------------------------------------------------------------------------- @@ -324,12 +322,11 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { } //------------------------------------------------------------------------------------------ - // Find the corresponding matches to construct the barcode. + // Find the corresponding matches against each reference to construct the barcode. let mut bx = [0, 0, 0, 0]; // [{A, C}, {B, D}] <> [{I1}, {I2}] - let mut is_exact = true; - let mut is_corrected = false; - let mut is_unclear = false; - let mut sample_code = None; // Fill if that one is clear. + let mut all_clear = true; // Raise if all closest matches were unique. + let mut all_exact = true; // Raise if all match distances were null. + let mut sample_code = None; // Fill if the sample letter has unique closest match. let (m1, m2) = bx.split_at_mut(2); for ((i, _), (u, v), matches) in [ (&i1, ((b'A', &mut ref_a), (b'C', &mut ref_c)), m1), @@ -341,18 +338,17 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { let q2 = &q2[1..]; // Remove (and ignore) stitch base. for ((q, (letter, reference)), m) in [(q2, u), (q1, v)].iter_mut().zip(matches) { let Some((code, distance)) = reference.closest(q) else { - is_unclear = true; + all_clear = false; continue; }; if let Some(max) = cf.max_distance { if distance > max { - is_unclear = true; + all_clear = false; continue; } } if distance > 0 { - is_corrected = true; - is_exact = false; + all_exact = false; } *m = code + 1; if *letter == samples.letter { @@ -361,20 +357,23 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { } } - // Update counters. + // Update counters, + // considering unclear sample as neither 'exact' or 'corrected'. + let [exact, corrected] = + [all_exact, !all_exact].map(|b| if all_clear { u64::from(b) } else { 0 }); match counters.entry(bx) { Entry::Vacant(entry) => { entry.insert(Counters { - exact: u64::from(is_exact), - corrected: u64::from(is_corrected), - unclear: u64::from(is_unclear), + total: 1, + exact, + corrected, }); } Entry::Occupied(mut entry) => { let counters = entry.get_mut(); - counters.exact += u64::from(is_exact); - counters.corrected += u64::from(is_corrected); - counters.unclear += u64::from(is_unclear); + counters.total += 1; + counters.exact += exact; + counters.corrected += corrected; } } @@ -553,33 +552,20 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { //---------------------------------------------------------------------------------------------- println!("Writing clear/unclear files."); + let mut writer = table_file; for (code, counters) in counters { let Counters { + total, exact, corrected, - unclear, } = counters; - if unclear > 0 { - |w: &mut ContextualizedWriter<_>| -> Result<(), _> { - write_code(w, &code)?; - writeln!(w, "\t{unclear}")?; - w.bump_line(); - Ok(()) - }(&mut unclear_file) - .with_context(|_| WriteLineErr { - context: &unclear_file, - })?; - } else { - |w: &mut ContextualizedWriter<_>| -> Result<(), _> { - write_code(w, &code)?; - writeln!(w, "\t{exact}\t{corrected}")?; - w.bump_line(); - Ok(()) - }(&mut clear_file) - .with_context(|_| WriteLineErr { - context: &clear_file, - })?; - } + |w: &mut ContextualizedWriter<_>| -> Result<(), _> { + write_code(w, &code)?; + writeln!(w, "\t{total}\t{exact}\t{corrected}")?; + w.bump_line(); + Ok(()) + }(&mut writer) + .with_context(|_| WriteLineErr { context: &writer })?; } let elapsed = tic.elapsed(); @@ -587,11 +573,16 @@ pub fn demultiplex(cf: &Config) -> Result<(), Error> { Ok(()) } -// One value for: +/// For every barcode. struct Counters { + /// How many times it has been produced. + total: u64, + /// How many time was it an exact match against all references + /// (impossible if it contains `00`). exact: u64, + /// How many it has been corrected to match against the references + /// (non-null distance) (also impossible if it contains `00`). corrected: u64, - unclear: u64, } fn write_code(writer: &mut impl Write, bx: &Barcode) -> Result<(), std::io::Error> { -- GitLab