From 719e496ae09fc312724790d18ee57731855d098b Mon Sep 17 00:00:00 2001 From: Iago Bonnici <iago.bonnici@umontpellier.fr> Date: Wed, 24 Jul 2024 14:55:52 +0200 Subject: [PATCH] Export full config detail as an output warmup. --- src/bin/aphid/main.rs | 78 ++++++++++++++++++++++++++++---- src/config/check.rs | 103 +++--------------------------------------- src/io.rs | 11 +++++ src/lib.rs | 1 + src/optim/bfgs.rs | 3 +- src/output.rs | 4 ++ src/output/config.rs | 87 +++++++++++++++++++++++++++++++++++ 7 files changed, 178 insertions(+), 109 deletions(-) create mode 100644 src/output.rs create mode 100644 src/output/config.rs diff --git a/src/bin/aphid/main.rs b/src/bin/aphid/main.rs index aca3e73..4967078 100644 --- a/src/bin/aphid/main.rs +++ b/src/bin/aphid/main.rs @@ -1,6 +1,14 @@ // Main entry point into the CLI. -use std::{cmp::max, collections::HashSet, fmt::Write, path::PathBuf, process}; +use std::{ + cmp::max, + collections::HashSet, + fmt::Write as FmtWrite, + fs::{self, File}, + io::Write as IoWrite, + path::{self, PathBuf}, + process, +}; use aphid::{ extract_local_triplet, imbalance, @@ -28,8 +36,20 @@ use crate::display_tree_analysis::display_geometrical_tree_analysis; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] struct Args { + /// Raise to overwrite previous output folder. + #[clap(short, long)] + force: bool, + /// Path to the config file. config: PathBuf, + + /// Path to the output folder, created if missing. + output: PathBuf, +} + +// Standard output filenames. +mod out { + pub(super) const CONFIG: &str = "config"; } fn main() { @@ -39,7 +59,7 @@ fn main() { eprintln!("Success. {}", "✓".bold().green()); } Err(e) => { - eprintln!("{} {e}", "🗙".bold().red()); + eprintln!("{} {e}", "🗙 Error:".bold().red()); process::exit(1); } } @@ -47,9 +67,10 @@ fn main() { #[allow(clippy::too_many_lines)] // TODO: factorize later: ongoing experiment. fn run() -> Result<(), Error> { - // Parse command line arguments. - let args = Args::parse(); + //============================================================================== + // Init. + let args = Args::parse(); let separator = || eprintln!("\n{:=<80}", ""); eprintln!("Prepare string interner."); @@ -57,7 +78,8 @@ fn run() -> Result<(), Error> { let interner = &mut interner; //============================================================================== - // Read input + // Read. + let path = &args.config; eprintln!("Read config from {}.", format!("{path:?}").blue()); let config = Config::from_file(path, interner)?; @@ -65,11 +87,12 @@ fn run() -> Result<(), Error> { let path = &config.trees; eprintln!("Read gene trees from {}.", format!("{path:?}").blue()); let forest = GenesForest::from_file(path, interner)?; + eprintln!(" Found {} gene trees.", forest.len()); //============================================================================== - // Check input consistency. + // Check. - eprintln!(" Check config consistency."); + eprintln!("Check input consistency."); // Check that all relevant species are actually found in the forest, // or they may be mispelled. @@ -94,9 +117,40 @@ fn run() -> Result<(), Error> { } ); - eprintln!("Echo input:"); - eprintln!("{:#?}", config.for_display(interner)); - eprintln!(" Found {} gene trees.", forest.len()); + //============================================================================== + // Prepare output. + + eprintln!("Prepare output folder."); + let output = &path::absolute(&args.output)?; + match (output.exists(), args.force) { + (true, true) => { + eprintln!( + " {} existing folder: {}.", + "Removing".yellow(), + format!("{}", output.display()).blue() + ); + fs::remove_dir_all(output)?; + } + (true, false) => OutputExistsErr { path: output }.fail()?, + (false, _) => { + eprintln!( + " Creating empty folder: {}.", + format!("{}", output.display()).blue() + ); + } + }; + fs::create_dir_all(output)?; + + //============================================================================== + // Export full config specification, including implicit defaults. + + let path = output.join(out::CONFIG); + eprintln!( + "Write full configuration to {}.", + format!("{}", path.display()).blue() + ); + let mut file = File::create(path)?; + writeln!(file, "{:#?}", config.for_display(interner))?; //============================================================================== // Extract various information from the trees, @@ -530,8 +584,12 @@ impl ToString for ColoredString { #[derive(Debug, Snafu)] #[snafu(context(suffix(Err)))] enum Error { + #[snafu(transparent)] + Io { source: std::io::Error }, #[snafu(transparent)] Check { source: aphid::config::check::Error }, + #[snafu(display("Output folder already exists: {}.", format!("{}", path.display()).blue()))] + OutputExists { path: PathBuf }, #[snafu(transparent)] ForestParse { source: aphid::genes_forest::parse::Error, diff --git a/src/config/check.rs b/src/config/check.rs index 97f9d56..4602eb5 100644 --- a/src/config/check.rs +++ b/src/config/check.rs @@ -4,13 +4,12 @@ use std::{ cmp::Ordering, collections::HashSet, - fmt, path::{Path, PathBuf}, }; use arrayvec::ArrayVec; use regex::Regex; -use snafu::{ensure, ResultExt, Snafu}; +use snafu::{ensure, Snafu}; use crate::{ config::{ @@ -18,7 +17,7 @@ use crate::{ deserialize::{self as raw, NodeInput}, Search, SpeciesTriplet, Taxa, MAX_UNRESOLVED_LENGTH, }, - interner::{Interner, ResolvedSymbol, SpeciesSymbol}, + interner::{Interner, SpeciesSymbol}, io::{self, read_file}, model::parameters::{GeneFlowTimes, MAX_N_GF_TIMES}, optim::{ @@ -71,14 +70,12 @@ impl Config { .unwrap_or_else(|| { panic!( "Config file has been read but its path has no parent: {}", - path.to_string_lossy() + path.display() ) }) .to_owned(); path.push(trees); - trees = path - .canonicalize() - .with_context(|_| CanonicalizeErr { path })?; + trees = io::canonicalize(&path)?; } // Most checks implemented within `TryFrom` trait. @@ -194,9 +191,7 @@ impl TryFrom<&'_ raw::BfgsConfig> for BfgsConfig { let Some(path) = path else { return Ok(None); }; - let path = path - .canonicalize() - .with_context(|_| CanonicalizeErr { path })?; + let path = io::canonicalize(path)?; if path.is_file() { eprintln!("Will override {:?}.", path.to_string_lossy()); }; @@ -567,87 +562,6 @@ impl SpeciesTriplet { } } -//================================================================================================== -// Display. - -// We cannot simply derive(Debug) for the config -// because a reference to the interner is necessary to resolve the species symbols into names. -// TODO: One unfortunate consequence is that the list of all fields needs to be mirrored here. -pub struct ConfigDisplay<'i> { - config: &'i Config, - interner: &'i Interner, -} - -struct TaxaDisplay<'i> { - taxa: &'i Taxa, - interner: &'i Interner, -} - -struct TripletDisplay<'i> { - triplet: &'i SpeciesTriplet, - interner: &'i Interner, -} - -impl<'i> fmt::Debug for ConfigDisplay<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let Config { trees, taxa, filters, unresolved_length, search } = self.config; - f.debug_struct("Config") - .field("trees", trees) - .field("taxa", &taxa.for_display(self.interner)) - .field("filters", filters) - .field("unresolved_length", unresolved_length) - .field("search", search) - .finish() - } -} - -impl<'i> fmt::Debug for TaxaDisplay<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let Taxa { triplet, outgroup, other } = self.taxa; - let resolve = |slice: &[_]| { - slice - .iter() - .map(|&symbol| ResolvedSymbol::new(symbol, self.interner)) - .collect::<Vec<_>>() - }; - f.debug_struct("Taxa") - .field("triplet", &triplet.for_display(self.interner)) - .field("outgroup", &resolve(outgroup)) - .field("other", &resolve(other)) - .finish() - } -} - -// Display as [[A, B], C]. -impl<'i> fmt::Debug for TripletDisplay<'i> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let [a, b, c] = self - .triplet - .as_array() - .map(|s| ResolvedSymbol::new(s, self.interner)); - let ab = [a, b]; - f.debug_list().entry(&ab).entry(&c).finish() - } -} - -impl Config { - pub fn for_display<'d>(&'d self, interner: &'d Interner) -> ConfigDisplay<'d> { - ConfigDisplay { config: self, interner } - } -} - -impl Taxa { - fn for_display<'d>(&'d self, interner: &'d Interner) -> TaxaDisplay<'d> { - TaxaDisplay { taxa: self, interner } - } -} - -impl SpeciesTriplet { - fn for_display<'d>(&'d self, interner: &'d Interner) -> TripletDisplay<'d> { - TripletDisplay { triplet: self, interner } - } -} - //================================================================================================== // Errors. @@ -662,12 +576,7 @@ pub enum Error { Config { mess: String }, #[snafu(transparent)] Optim { source: optim::Error }, - #[snafu(display("Could not canonicalize path: {:?}:\n{source}", path.to_string_lossy()))] - Canonicalize { - source: std::io::Error, - path: PathBuf, - }, - #[snafu(display("Could not find folder: {:?}", path.to_string_lossy()))] + #[snafu(display("Could not find folder: {:?}", path.display()))] NoSuchFolder { path: PathBuf }, } diff --git a/src/io.rs b/src/io.rs index 955fe8b..bebe8ce 100644 --- a/src/io.rs +++ b/src/io.rs @@ -19,6 +19,12 @@ pub(crate) fn read_file(path: &Path) -> Result<String, Error> { String::from_utf8(buf).context(Utf8Err { path: tobuf() }) } +// Wrap to integrate into aphid error system. +pub fn canonicalize(path: &Path) -> Result<PathBuf, Error> { + path.canonicalize() + .with_context(|_| CanonicalizeErr { path }) +} + #[derive(Debug, Snafu)] #[snafu(context(suffix(Err)))] pub enum Error { @@ -29,4 +35,9 @@ pub enum Error { path: PathBuf, source: FromUtf8Error, }, + #[snafu(display("Could not canonicalize path: {:?}:\n{source}", path.display()))] + Canonicalize { + source: std::io::Error, + path: PathBuf, + }, } diff --git a/src/lib.rs b/src/lib.rs index 2c6c5da..386a2b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ mod lexer; mod model; mod optim; mod tree; +mod output; pub use config::{Config, Filters}; pub use gene_tree::{ diff --git a/src/optim/bfgs.rs b/src/optim/bfgs.rs index 01550d7..b6cbd15 100644 --- a/src/optim/bfgs.rs +++ b/src/optim/bfgs.rs @@ -334,8 +334,7 @@ pub enum Error { }, #[snafu(transparent)] WolfeSearch { source: WolfeSearchError }, - #[snafu(display("Error writing to trace file {:?}:\n{source}", - path.as_os_str().to_string_lossy()))] + #[snafu(display("Error writing to trace file {:?}:\n{source}", path.display()))] Trace { path: PathBuf, source: io::Error }, #[snafu(display( "A log level of {log} is required, \ diff --git a/src/output.rs b/src/output.rs new file mode 100644 index 0000000..8296ae4 --- /dev/null +++ b/src/output.rs @@ -0,0 +1,4 @@ +// Use this module to specify the possible output(s) of aphid. + +mod config; + diff --git a/src/output/config.rs b/src/output/config.rs new file mode 100644 index 0000000..2a0046a --- /dev/null +++ b/src/output/config.rs @@ -0,0 +1,87 @@ +// Render config to text, to trace every parameter used if needed. +// This is basically just pretty-printing of the raw struct, +// but we cannot simply derive(Debug) for the config +// because a reference to the interner is necessary +// to resolve the species symbols into names. +use std::fmt; + +use crate::{ + config::{SpeciesTriplet, Taxa}, + interner::{Interner, ResolvedSymbol}, + Config, +}; + +pub struct Display<'i> { + config: &'i Config, + interner: &'i Interner, +} + +struct TaxaDisplay<'i> { + taxa: &'i Taxa, + interner: &'i Interner, +} + +struct TripletDisplay<'i> { + triplet: &'i SpeciesTriplet, + interner: &'i Interner, +} + +impl<'i> fmt::Debug for Display<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Config { trees, taxa, filters, unresolved_length, search } = self.config; + f.debug_struct("Config") + .field("trees", trees) + .field("taxa", &taxa.for_display(self.interner)) + .field("filters", filters) + .field("unresolved_length", unresolved_length) + .field("search", search) + .finish() + } +} + +impl<'i> fmt::Debug for TaxaDisplay<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Taxa { triplet, outgroup, other } = self.taxa; + let resolve = |slice: &[_]| { + slice + .iter() + .map(|&symbol| ResolvedSymbol::new(symbol, self.interner)) + .collect::<Vec<_>>() + }; + f.debug_struct("Taxa") + .field("triplet", &triplet.for_display(self.interner)) + .field("outgroup", &resolve(outgroup)) + .field("other", &resolve(other)) + .finish() + } +} + +// Display as [[A, B], C]. +impl<'i> fmt::Debug for TripletDisplay<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let [a, b, c] = self + .triplet + .as_array() + .map(|s| ResolvedSymbol::new(s, self.interner)); + let ab = [a, b]; + f.debug_list().entry(&ab).entry(&c).finish() + } +} + +impl Config { + pub fn for_display<'d>(&'d self, interner: &'d Interner) -> Display<'d> { + Display { config: self, interner } + } +} + +impl Taxa { + fn for_display<'d>(&'d self, interner: &'d Interner) -> TaxaDisplay<'d> { + TaxaDisplay { taxa: self, interner } + } +} + +impl SpeciesTriplet { + fn for_display<'d>(&'d self, interner: &'d Interner) -> TripletDisplay<'d> { + TripletDisplay { triplet: self, interner } + } +} -- GitLab