From 719e496ae09fc312724790d18ee57731855d098b Mon Sep 17 00:00:00 2001
From: Iago Bonnici <iago.bonnici@umontpellier.fr>
Date: Wed, 24 Jul 2024 14:55:52 +0200
Subject: [PATCH] Export full config detail as an output warmup.

---
 src/bin/aphid/main.rs |  78 ++++++++++++++++++++++++++++----
 src/config/check.rs   | 103 +++---------------------------------------
 src/io.rs             |  11 +++++
 src/lib.rs            |   1 +
 src/optim/bfgs.rs     |   3 +-
 src/output.rs         |   4 ++
 src/output/config.rs  |  87 +++++++++++++++++++++++++++++++++++
 7 files changed, 178 insertions(+), 109 deletions(-)
 create mode 100644 src/output.rs
 create mode 100644 src/output/config.rs

diff --git a/src/bin/aphid/main.rs b/src/bin/aphid/main.rs
index aca3e73..4967078 100644
--- a/src/bin/aphid/main.rs
+++ b/src/bin/aphid/main.rs
@@ -1,6 +1,14 @@
 // Main entry point into the CLI.
 
-use std::{cmp::max, collections::HashSet, fmt::Write, path::PathBuf, process};
+use std::{
+    cmp::max,
+    collections::HashSet,
+    fmt::Write as FmtWrite,
+    fs::{self, File},
+    io::Write as IoWrite,
+    path::{self, PathBuf},
+    process,
+};
 
 use aphid::{
     extract_local_triplet, imbalance,
@@ -28,8 +36,20 @@ use crate::display_tree_analysis::display_geometrical_tree_analysis;
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
+    /// Raise to overwrite previous output folder.
+    #[clap(short, long)]
+    force: bool,
+
     /// Path to the config file.
     config: PathBuf,
+
+    /// Path to the output folder, created if missing.
+    output: PathBuf,
+}
+
+// Standard output filenames.
+mod out {
+    pub(super) const CONFIG: &str = "config";
 }
 
 fn main() {
@@ -39,7 +59,7 @@ fn main() {
             eprintln!("Success. {}", "✓".bold().green());
         }
         Err(e) => {
-            eprintln!("{} {e}", "🗙".bold().red());
+            eprintln!("{} {e}", "🗙 Error:".bold().red());
             process::exit(1);
         }
     }
@@ -47,9 +67,10 @@ fn main() {
 
 #[allow(clippy::too_many_lines)] // TODO: factorize later: ongoing experiment.
 fn run() -> Result<(), Error> {
-    // Parse command line arguments.
-    let args = Args::parse();
+    //==============================================================================
+    // Init.
 
+    let args = Args::parse();
     let separator = || eprintln!("\n{:=<80}", "");
 
     eprintln!("Prepare string interner.");
@@ -57,7 +78,8 @@ fn run() -> Result<(), Error> {
     let interner = &mut interner;
 
     //==============================================================================
-    // Read input
+    // Read.
+
     let path = &args.config;
     eprintln!("Read config from {}.", format!("{path:?}").blue());
     let config = Config::from_file(path, interner)?;
@@ -65,11 +87,12 @@ fn run() -> Result<(), Error> {
     let path = &config.trees;
     eprintln!("Read gene trees from {}.", format!("{path:?}").blue());
     let forest = GenesForest::from_file(path, interner)?;
+    eprintln!("  Found {} gene trees.", forest.len());
 
     //==============================================================================
-    // Check input consistency.
+    // Check.
 
-    eprintln!("  Check config consistency.");
+    eprintln!("Check input consistency.");
 
     // Check that all relevant species are actually found in the forest,
     // or they may be mispelled.
@@ -94,9 +117,40 @@ fn run() -> Result<(), Error> {
         }
     );
 
-    eprintln!("Echo input:");
-    eprintln!("{:#?}", config.for_display(interner));
-    eprintln!("  Found {} gene trees.", forest.len());
+    //==============================================================================
+    // Prepare output.
+
+    eprintln!("Prepare output folder.");
+    let output = &path::absolute(&args.output)?;
+    match (output.exists(), args.force) {
+        (true, true) => {
+            eprintln!(
+                "  {} existing folder: {}.",
+                "Removing".yellow(),
+                format!("{}", output.display()).blue()
+            );
+            fs::remove_dir_all(output)?;
+        }
+        (true, false) => OutputExistsErr { path: output }.fail()?,
+        (false, _) => {
+            eprintln!(
+                "  Creating empty folder: {}.",
+                format!("{}", output.display()).blue()
+            );
+        }
+    };
+    fs::create_dir_all(output)?;
+
+    //==============================================================================
+    // Export full config specification, including implicit defaults.
+
+    let path = output.join(out::CONFIG);
+    eprintln!(
+        "Write full configuration to {}.",
+        format!("{}", path.display()).blue()
+    );
+    let mut file = File::create(path)?;
+    writeln!(file, "{:#?}", config.for_display(interner))?;
 
     //==============================================================================
     // Extract various information from the trees,
@@ -530,8 +584,12 @@ impl ToString for ColoredString {
 #[derive(Debug, Snafu)]
 #[snafu(context(suffix(Err)))]
 enum Error {
+    #[snafu(transparent)]
+    Io { source: std::io::Error },
     #[snafu(transparent)]
     Check { source: aphid::config::check::Error },
+    #[snafu(display("Output folder already exists: {}.", format!("{}", path.display()).blue()))]
+    OutputExists { path: PathBuf },
     #[snafu(transparent)]
     ForestParse {
         source: aphid::genes_forest::parse::Error,
diff --git a/src/config/check.rs b/src/config/check.rs
index 97f9d56..4602eb5 100644
--- a/src/config/check.rs
+++ b/src/config/check.rs
@@ -4,13 +4,12 @@
 use std::{
     cmp::Ordering,
     collections::HashSet,
-    fmt,
     path::{Path, PathBuf},
 };
 
 use arrayvec::ArrayVec;
 use regex::Regex;
-use snafu::{ensure, ResultExt, Snafu};
+use snafu::{ensure, Snafu};
 
 use crate::{
     config::{
@@ -18,7 +17,7 @@ use crate::{
         deserialize::{self as raw, NodeInput},
         Search, SpeciesTriplet, Taxa, MAX_UNRESOLVED_LENGTH,
     },
-    interner::{Interner, ResolvedSymbol, SpeciesSymbol},
+    interner::{Interner, SpeciesSymbol},
     io::{self, read_file},
     model::parameters::{GeneFlowTimes, MAX_N_GF_TIMES},
     optim::{
@@ -71,14 +70,12 @@ impl Config {
                 .unwrap_or_else(|| {
                     panic!(
                         "Config file has been read but its path has no parent: {}",
-                        path.to_string_lossy()
+                        path.display()
                     )
                 })
                 .to_owned();
             path.push(trees);
-            trees = path
-                .canonicalize()
-                .with_context(|_| CanonicalizeErr { path })?;
+            trees = io::canonicalize(&path)?;
         }
 
         // Most checks implemented within `TryFrom` trait.
@@ -194,9 +191,7 @@ impl TryFrom<&'_ raw::BfgsConfig> for BfgsConfig {
             let Some(path) = path else {
                 return Ok(None);
             };
-            let path = path
-                .canonicalize()
-                .with_context(|_| CanonicalizeErr { path })?;
+            let path = io::canonicalize(path)?;
             if path.is_file() {
                 eprintln!("Will override {:?}.", path.to_string_lossy());
             };
@@ -567,87 +562,6 @@ impl SpeciesTriplet {
     }
 }
 
-//==================================================================================================
-// Display.
-
-// We cannot simply derive(Debug) for the config
-// because a reference to the interner is necessary to resolve the species symbols into names.
-// TODO: One unfortunate consequence is that the list of all fields needs to be mirrored here.
-pub struct ConfigDisplay<'i> {
-    config: &'i Config,
-    interner: &'i Interner,
-}
-
-struct TaxaDisplay<'i> {
-    taxa: &'i Taxa,
-    interner: &'i Interner,
-}
-
-struct TripletDisplay<'i> {
-    triplet: &'i SpeciesTriplet,
-    interner: &'i Interner,
-}
-
-impl<'i> fmt::Debug for ConfigDisplay<'i> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let Config { trees, taxa, filters, unresolved_length, search } = self.config;
-        f.debug_struct("Config")
-            .field("trees", trees)
-            .field("taxa", &taxa.for_display(self.interner))
-            .field("filters", filters)
-            .field("unresolved_length", unresolved_length)
-            .field("search", search)
-            .finish()
-    }
-}
-
-impl<'i> fmt::Debug for TaxaDisplay<'i> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let Taxa { triplet, outgroup, other } = self.taxa;
-        let resolve = |slice: &[_]| {
-            slice
-                .iter()
-                .map(|&symbol| ResolvedSymbol::new(symbol, self.interner))
-                .collect::<Vec<_>>()
-        };
-        f.debug_struct("Taxa")
-            .field("triplet", &triplet.for_display(self.interner))
-            .field("outgroup", &resolve(outgroup))
-            .field("other", &resolve(other))
-            .finish()
-    }
-}
-
-// Display as [[A, B], C].
-impl<'i> fmt::Debug for TripletDisplay<'i> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let [a, b, c] = self
-            .triplet
-            .as_array()
-            .map(|s| ResolvedSymbol::new(s, self.interner));
-        let ab = [a, b];
-        f.debug_list().entry(&ab).entry(&c).finish()
-    }
-}
-
-impl Config {
-    pub fn for_display<'d>(&'d self, interner: &'d Interner) -> ConfigDisplay<'d> {
-        ConfigDisplay { config: self, interner }
-    }
-}
-
-impl Taxa {
-    fn for_display<'d>(&'d self, interner: &'d Interner) -> TaxaDisplay<'d> {
-        TaxaDisplay { taxa: self, interner }
-    }
-}
-
-impl SpeciesTriplet {
-    fn for_display<'d>(&'d self, interner: &'d Interner) -> TripletDisplay<'d> {
-        TripletDisplay { triplet: self, interner }
-    }
-}
-
 //==================================================================================================
 // Errors.
 
@@ -662,12 +576,7 @@ pub enum Error {
     Config { mess: String },
     #[snafu(transparent)]
     Optim { source: optim::Error },
-    #[snafu(display("Could not canonicalize path: {:?}:\n{source}", path.to_string_lossy()))]
-    Canonicalize {
-        source: std::io::Error,
-        path: PathBuf,
-    },
-    #[snafu(display("Could not find folder: {:?}", path.to_string_lossy()))]
+    #[snafu(display("Could not find folder: {:?}", path.display()))]
     NoSuchFolder { path: PathBuf },
 }
 
diff --git a/src/io.rs b/src/io.rs
index 955fe8b..bebe8ce 100644
--- a/src/io.rs
+++ b/src/io.rs
@@ -19,6 +19,12 @@ pub(crate) fn read_file(path: &Path) -> Result<String, Error> {
     String::from_utf8(buf).context(Utf8Err { path: tobuf() })
 }
 
+// Wrap to integrate into aphid error system.
+pub fn canonicalize(path: &Path) -> Result<PathBuf, Error> {
+    path.canonicalize()
+        .with_context(|_| CanonicalizeErr { path })
+}
+
 #[derive(Debug, Snafu)]
 #[snafu(context(suffix(Err)))]
 pub enum Error {
@@ -29,4 +35,9 @@ pub enum Error {
         path: PathBuf,
         source: FromUtf8Error,
     },
+    #[snafu(display("Could not canonicalize path: {:?}:\n{source}", path.display()))]
+    Canonicalize {
+        source: std::io::Error,
+        path: PathBuf,
+    },
 }
diff --git a/src/lib.rs b/src/lib.rs
index 2c6c5da..386a2b3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,6 +9,7 @@ mod lexer;
 mod model;
 mod optim;
 mod tree;
+mod output;
 
 pub use config::{Config, Filters};
 pub use gene_tree::{
diff --git a/src/optim/bfgs.rs b/src/optim/bfgs.rs
index 01550d7..b6cbd15 100644
--- a/src/optim/bfgs.rs
+++ b/src/optim/bfgs.rs
@@ -334,8 +334,7 @@ pub enum Error {
     },
     #[snafu(transparent)]
     WolfeSearch { source: WolfeSearchError },
-    #[snafu(display("Error writing to trace file {:?}:\n{source}",
-                     path.as_os_str().to_string_lossy()))]
+    #[snafu(display("Error writing to trace file {:?}:\n{source}", path.display()))]
     Trace { path: PathBuf, source: io::Error },
     #[snafu(display(
         "A log level of {log} is required, \
diff --git a/src/output.rs b/src/output.rs
new file mode 100644
index 0000000..8296ae4
--- /dev/null
+++ b/src/output.rs
@@ -0,0 +1,4 @@
+// Use this module to specify the possible output(s) of aphid.
+
+mod config;
+
diff --git a/src/output/config.rs b/src/output/config.rs
new file mode 100644
index 0000000..2a0046a
--- /dev/null
+++ b/src/output/config.rs
@@ -0,0 +1,87 @@
+// Render config to text, to trace every parameter used if needed.
+// This is basically just pretty-printing of the raw struct,
+// but we cannot simply derive(Debug) for the config
+// because a reference to the interner is necessary
+// to resolve the species symbols into names.
+use std::fmt;
+
+use crate::{
+    config::{SpeciesTriplet, Taxa},
+    interner::{Interner, ResolvedSymbol},
+    Config,
+};
+
+pub struct Display<'i> {
+    config: &'i Config,
+    interner: &'i Interner,
+}
+
+struct TaxaDisplay<'i> {
+    taxa: &'i Taxa,
+    interner: &'i Interner,
+}
+
+struct TripletDisplay<'i> {
+    triplet: &'i SpeciesTriplet,
+    interner: &'i Interner,
+}
+
+impl<'i> fmt::Debug for Display<'i> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let Config { trees, taxa, filters, unresolved_length, search } = self.config;
+        f.debug_struct("Config")
+            .field("trees", trees)
+            .field("taxa", &taxa.for_display(self.interner))
+            .field("filters", filters)
+            .field("unresolved_length", unresolved_length)
+            .field("search", search)
+            .finish()
+    }
+}
+
+impl<'i> fmt::Debug for TaxaDisplay<'i> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let Taxa { triplet, outgroup, other } = self.taxa;
+        let resolve = |slice: &[_]| {
+            slice
+                .iter()
+                .map(|&symbol| ResolvedSymbol::new(symbol, self.interner))
+                .collect::<Vec<_>>()
+        };
+        f.debug_struct("Taxa")
+            .field("triplet", &triplet.for_display(self.interner))
+            .field("outgroup", &resolve(outgroup))
+            .field("other", &resolve(other))
+            .finish()
+    }
+}
+
+// Display as [[A, B], C].
+impl<'i> fmt::Debug for TripletDisplay<'i> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let [a, b, c] = self
+            .triplet
+            .as_array()
+            .map(|s| ResolvedSymbol::new(s, self.interner));
+        let ab = [a, b];
+        f.debug_list().entry(&ab).entry(&c).finish()
+    }
+}
+
+impl Config {
+    pub fn for_display<'d>(&'d self, interner: &'d Interner) -> Display<'d> {
+        Display { config: self, interner }
+    }
+}
+
+impl Taxa {
+    fn for_display<'d>(&'d self, interner: &'d Interner) -> TaxaDisplay<'d> {
+        TaxaDisplay { taxa: self, interner }
+    }
+}
+
+impl SpeciesTriplet {
+    fn for_display<'d>(&'d self, interner: &'d Interner) -> TripletDisplay<'d> {
+        TripletDisplay { triplet: self, interner }
+    }
+}
-- 
GitLab