diff --git a/src/bin/aphid/args.rs b/src/bin/aphid/args.rs index a2998947b24073f2d07bb239023b673b7bbe7922..cec1d399c4f1f8443fbb3ece533da33c995e2412 100644 --- a/src/bin/aphid/args.rs +++ b/src/bin/aphid/args.rs @@ -6,13 +6,13 @@ use clap::Parser; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] pub(crate) struct Args { - /// Raise to overwrite previous output folder. - #[clap(short, long)] - pub(crate) force: bool, - /// Path to the config file. pub(crate) config: PathBuf, /// Path to the output folder, created if missing. pub(crate) output: PathBuf, + + /// Raise to overwrite previous output folder. + #[clap(short, long)] + pub(crate) force: bool, } diff --git a/src/bin/aphid/main.rs b/src/bin/aphid/main.rs index 617b7afbc92a6d7cf2a2bdfd4a0d11969e15b4d3..3ad4f38d32170c1d9910968c443c238772b8a595 100644 --- a/src/bin/aphid/main.rs +++ b/src/bin/aphid/main.rs @@ -3,7 +3,7 @@ use std::{ fs::{self, File}, io::Write as IoWrite, path::{self, Path, PathBuf}, - process, + process, time::Instant, }; use aphid::{ @@ -11,7 +11,7 @@ use aphid::{ interner::{Interner, ResolvedSymbol, SpeciesSymbol}, it_mean::SummedMean, ln_likelihood, optimize_likelihood, - output::{self, detail}, + output::{self, detail, file}, Config, GeneTree, GeneTriplet, GenesForest, LocalGeneTriplet, VERSION, }; use clap::Parser; @@ -22,19 +22,14 @@ use snafu::{ensure, ResultExt, Snafu}; mod args; -// Standard output filenames. -mod out { - pub(super) const CONFIG: &str = "config.json"; - pub(super) const DETAIL: &str = "detail.json"; - pub(super) const CSV: &str = "trees.csv"; -} - // Run and terminate error bubbling if any. fn main() { println!("Running Aphid v{VERSION}."); + let tick = Instant::now(); match run() { Ok(()) => { - cprintln!("Success. <bold,green>✓</>"); + let duration = tick.elapsed().as_secs_f64(); + cprintln!("Done in <s>{duration:.03} s</>. <bold,green>✓</>"); } Err(e) => { ceprintln!("<bold,red>🗙 Error:</> {e}"); @@ -51,15 +46,18 @@ fn run() -> Result<(), Error> { let mut interner = Interner::new(); let interner = &mut interner; + // Canonicalize files. + let output = path::absolute(&args.output)?; + // Read data from the various files. - let (config, forest) = read_inputs(&args, interner)?; + let (config, forest) = read_inputs(&args, &output, interner)?; // Check overall consistency. // Extract the set of designated species = 'triplet' + 'outgroup' + 'other'. let designated_species = check_input_consistency(&forest, &config, interner)?; // Prepare output folder. - let output = prepare_output(&args)?; + prepare_output(&output, &args)?; // Output full configuration detail. write_config(&output, &config, interner)?; @@ -100,10 +98,10 @@ fn run() -> Result<(), Error> { Ok(()) } -fn read_inputs(args: &args::Args, interner: &mut Interner) -> Result<(Config, GenesForest), Error> { +fn read_inputs(args: &args::Args, output_folder: &Path, interner: &mut Interner) -> Result<(Config, GenesForest), Error> { let path = &args.config; cprintln!("Read config from <b>{}</>.", path.display()); - let config = Config::from_file(path, interner)?; + let config = Config::from_file(path, output_folder, interner)?; let path = &config.trees; cprintln!("Read gene trees from <b>{}</>:", path.display()); @@ -147,10 +145,9 @@ fn check_input_consistency( Ok(designated_species) } -fn prepare_output(args: &args::Args) -> Result<PathBuf, Error> { +fn prepare_output(output: &Path, args: &args::Args) -> Result<(), Error> { println!("Prepare output folder:"); - let output = path::absolute(&args.output)?; match (output.exists(), args.force) { (true, true) => { cprintln!( @@ -165,12 +162,11 @@ fn prepare_output(args: &args::Args) -> Result<PathBuf, Error> { } }; fs::create_dir_all(&output)?; - - Ok(output) + Ok(()) } fn write_config(output: &Path, config: &Config, interner: &Interner) -> Result<(), Error> { - let path = output.join(out::CONFIG); + let path = output.join(file::CONFIG); cprintln!(" Write full configuration to <b>{}</>.", path.display()); let mut file = File::create(path)?; writeln!(file, "{:#}", config.resolve(interner).json())?; @@ -376,7 +372,7 @@ fn final_tree_selection( } fn write_detail(output: &Path, details: &[detail::Tree]) -> Result<(), Error> { - let path = output.join(out::DETAIL); + let path = output.join(file::DETAIL); cprintln!( "Write full trees analysis/filtering detail to <b>{}</>.", path.display() @@ -387,7 +383,7 @@ fn write_detail(output: &Path, details: &[detail::Tree]) -> Result<(), Error> { } fn write_csv(output: &Path, details: &[detail::Tree]) -> Result<(), Error> { - let path = output.join(out::CSV); + let path = output.join(file::CSV); cprintln!("Summarize scalar values to <b>{}</>.", path.display()); let file = File::create(path)?; let mut wtr = csv::Writer::from_writer(file); @@ -456,7 +452,7 @@ fn display_summary(included: &[usize], details: &[detail::Tree], config: &Config } let n = included.len() as u64; - cprintln!(" ==> <s,g>{n}</> tree{} kept for analysis.", s(n)); + cprintln!("==> <s,g>{n}</> tree{} kept for analysis.", s(n)); } fn learn(triplets: &[GeneTriplet], config: &Config) -> Result<(), Error> { diff --git a/src/config.rs b/src/config.rs index dc68e041152fe46d8f4b1e4907629e64df28a92a..97483cd39f6ab233f030a6f8e7cffd94ff468253 100644 --- a/src/config.rs +++ b/src/config.rs @@ -110,7 +110,4 @@ pub struct Search { // Main learning phase configuration. pub(crate) bfgs: BfgsConfig, - - // Possible final gradient descent. - pub(crate) final_descent: Option<GdConfig>, } diff --git a/src/config/check.rs b/src/config/check.rs index 0d855f6f97695975435d715fd50a5ecd2137e474..7ad25cda3e3c3d6296ea0fc4a532d5a730a0d331 100644 --- a/src/config/check.rs +++ b/src/config/check.rs @@ -14,7 +14,7 @@ use snafu::{ensure, Snafu}; use crate::{ config::{ defaults, - deserialize::{self as raw, NodeInput}, + deserialize::{self as raw, NodeInput, RecordTrace}, Search, SpeciesTriplet, Taxa, MAX_UNRESOLVED_LENGTH, }, interner::{Interner, SpeciesSymbol}, @@ -24,6 +24,7 @@ use crate::{ self, bfgs::Config as BfgsConfig, gd::Config as GdConfig, wolfe_search::Config as WolfeConfig, SlopeTrackingConfig, }, + output::file, Config, Filters, Parameters, }; @@ -35,7 +36,11 @@ macro_rules! err { impl Config { // Read raw config from file, then check it and convert into a valid config value. - pub fn from_file(path: &Path, interner: &mut Interner) -> Result<Self, Error> { + pub fn from_file( + path: &Path, + output_folder: &Path, + interner: &mut Interner, + ) -> Result<Self, Error> { // Parse raw TOML file. let input = read_file(path)?; let raw = raw::Config::parse(&input)?; @@ -59,7 +64,7 @@ impl Config { ); } - let search = (&raw).try_into()?; + let search = Search::try_from(&raw, output_folder)?; let taxa = raw.taxa._try_into(interner)?; // Resolve relative paths relatively to the config file. @@ -129,9 +134,8 @@ impl TryFrom<&raw::Filters> for Filters { //-------------------------------------------------------------------------------------------------- // Check search configuration. -impl TryFrom<&'_ raw::Config> for Search { - type Error = Error; - fn try_from(raw: &'_ raw::Config) -> Result<Self, Error> { +impl Search { + fn try_from(raw: &raw::Config, output_folder: &Path) -> Result<Self, Error> { let init_data_reduction_factor = raw.search.init_data_reduction_factor; if init_data_reduction_factor <= 1.0 { err!( @@ -143,13 +147,7 @@ impl TryFrom<&'_ raw::Config> for Search { init_parms: Parameters::try_from(&raw.init, &raw.gf_times)?, init_data_reduction_factor, init_descent: (&raw.search.init_descent).try_into()?, - bfgs: BfgsConfig::try_from(&raw.search.bfgs, raw.gf_times.len())?, - final_descent: raw - .search - .final_descent - .as_ref() - .map(TryInto::try_into) - .transpose()?, + bfgs: BfgsConfig::try_from(&raw.search.bfgs, raw.gf_times.len(), output_folder)?, }) } } @@ -170,14 +168,17 @@ impl TryFrom<&'_ raw::GdConfig> for GdConfig { } impl BfgsConfig { - fn try_from(raw: &raw::BfgsConfig, n_gf_times: usize) -> Result<Self, Error> { + fn try_from( + raw: &raw::BfgsConfig, + n_gf_times: usize, + output_folder: &Path, + ) -> Result<Self, Error> { let &raw::BfgsConfig { max_iter, ref wolfe_search, step_size_threshold, ref slope_tracking, - ref main_trace, - ref linsearch_trace, + ref record_trace, } = raw; ensure!( 0. <= step_size_threshold, @@ -186,40 +187,14 @@ impl BfgsConfig { must be null or positive. Received: {step_size_threshold}.") ) ); - let check_path = |path: &Option<PathBuf>| -> Result<_, Error> { - let Some(path) = path else { - return Ok(None); - }; - if path.is_file() { - let path = io::canonicalize(path)?; - eprintln!("Will override {}.", path.display()); - } else { - ensure!( - !path.is_dir(), - err!(("Path designates a folder: {}", path.display())) - ); - if let Some(parent) = path.parent() { - ensure!(parent.exists(), NoSuchFolderErr { path: parent }); - } else { - return err!(("Path does not exist and has no parent: {}", path.display())) - .fail(); - } - } - Ok(Some(path.clone())) + let (main_trace_path, linsearch_trace_path) = match record_trace { + RecordTrace::No => (None, None), + RecordTrace::Global => (Some(output_folder.join(file::MAIN_TRACE)), None), + RecordTrace::Detail => ( + Some(output_folder.join(file::MAIN_TRACE)), + Some(output_folder.join(file::LINSEARCH_TRACE)), + ), }; - let main_trace_path = check_path(main_trace)?; - let linsearch_trace_path = check_path(linsearch_trace)?; - if let (None, Some(path)) = (&main_trace_path, &linsearch_trace) { - return err!(( - "A path is specified with <b>`search.bfgs.linsearch_trace`</> \ - to log the detailed trace of BFGS linear search during each step, \ - but none is specified to log the global trace of each step. \ - Consider setting <b>`search.bfgs.main_trace`</> option.\n\ - The path given was: <k>{}</>", - path.display(), - )) - .fail(); - } Ok(Self { max_iter, slope_tracking: slope_tracking diff --git a/src/config/defaults.rs b/src/config/defaults.rs index 37dcd8d951ca539138b878ccc8937ccdbc5a5810..6a9d3a33f430a7b11ffc3d0550eafa9778b51770 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -1,5 +1,6 @@ // Decisions for anything not user-provided. +use super::deserialize::RecordTrace; use crate::{ config::deserialize::{ BfgsConfig, GdConfig, InitialParameters, Search, SlopeTrackingConfig, WolfeSearchConfig, @@ -35,7 +36,6 @@ pub(crate) fn search() -> Search { init_data_reduction_factor: init_data_reduction_factor(), init_descent: init_descent(), bfgs: BfgsConfig::default(), - final_descent: None, } } @@ -62,8 +62,7 @@ impl Default for BfgsConfig { threshold: 1e-3, grain: 5, }), - main_trace: None, - linsearch_trace: None, + record_trace: RecordTrace::No, } } } diff --git a/src/config/deserialize.rs b/src/config/deserialize.rs index 70f56e6c580a334380d1c80c1bf8d82259d38584..18b2700f71a6091971ce81051a3698c54fd2042d 100644 --- a/src/config/deserialize.rs +++ b/src/config/deserialize.rs @@ -131,7 +131,6 @@ pub(crate) struct Search { pub(crate) init_descent: GdConfig, #[serde(default)] pub(crate) bfgs: BfgsConfig, - pub(crate) final_descent: Option<GdConfig>, } #[derive(Deserialize)] @@ -141,8 +140,15 @@ pub(crate) struct BfgsConfig { pub(crate) wolfe_search: WolfeSearchConfig, pub(crate) step_size_threshold: f64, pub(crate) slope_tracking: Option<SlopeTrackingConfig>, - pub(crate) main_trace: Option<PathBuf>, - pub(crate) linsearch_trace: Option<PathBuf>, + pub(crate) record_trace: RecordTrace, +} + +#[derive(Deserialize)] +#[serde(rename_all="snake_case")] +pub(crate) enum RecordTrace { + No, + Global, + Detail, } #[derive(Deserialize)] diff --git a/src/learn.rs b/src/learn.rs index 97b34dea2c05b92ed327efef41233147f9f8fae6..217db857e43e408309996c7708f880f06e84ef18 100644 --- a/src/learn.rs +++ b/src/learn.rs @@ -5,6 +5,7 @@ // and 'P' the 'parameters' to optimize, // and that we wish to derive F with respect to. +use color_print::cprintln; use snafu::{ensure, Snafu}; use tch::{Device, Tensor}; @@ -57,9 +58,10 @@ pub fn optimize_likelihood( // If this happens, perform the optimization // on a smaller batch of the data first until we get finite likelihood. let sc: Scores<f64> = (&scores(&p)).into(); - println!( - "The chosen starting point yields non-finite log-likelihood ({first_lnl}) \ - on the whole dataset ({n_triplets} triplets):\n{sc}\n{start:?}", + cprintln!( + "<s>--</> The chosen starting point yields \ + non-finite log-likelihood (<s>{first_lnl}</>) \ + on the whole dataset (<s>{n_triplets}</> triplets):\n<k>{sc}</>\n<k>{start:?}</>", ); // Numerical conversion shenanigans to divide dataset by a floating factor. @@ -83,30 +85,30 @@ pub fn optimize_likelihood( n_samples > 0, NonFiniteLikelihoodErr { lnl: first_lnl, parms: start.clone() } ); - println!( - "-- Try obtaining finite log-likelihood \ - with the {n_samples} first triplets." + cprintln!( + " Try obtaining finite log-likelihood \ + with the <s>{n_samples}</> first triplets." ); sub_triplets = &triplets[0..n_samples]; sub_x = data_tensors(sub_triplets, n_scenarios, &scenarios, device); let lnl = ln_likelihood_tensors(&sub_x, &scores(&p)).to_double(); n_eval += 1; if !lnl.is_finite() { - println!("---- Failure: obtained log-likelihood: {lnl}."); + cprintln!(" Failure: obtained log-likelihood: <s>{lnl}</>."); // Reduce the data again. n_samples = reduce(n_samples); continue 'x; } // With this working (sub_X, P), perform a learning pass // to produce a (hopefully better) candidate P. - println!( - "---- Success: obtained log-likelihood: {lnl}.\n\ - -- Learn on this subsample with simple gradient descent." + cprintln!( + " Success: obtained log-likelihood: <s>{lnl}</>.\n\ + <s>--</> Learn on this subsample with simple gradient descent." ); let f = |p: &Tensor| -ln_likelihood_tensors(&sub_x, &scores(p)); match search.init_descent.minimize(f, &p, 0) { Err(e) => { - println!("---- Learning failed:\n{e}"); + cprintln!(" Learning failed:\n<k>{e}</>"); n_samples /= 2; continue 'x; } @@ -116,26 +118,28 @@ pub fn optimize_likelihood( p = opt.best_vars().copy(); let sc: Scores<f64> = (&scores(&p)).into(); let pm: Parameters<f64> = (&sc).into(); - println!( - "---- Success: parameters learned on the subsample:\n{sc}\n{pm:?}" + cprintln!( + " Success: parameters learned \ + on the subsample:\n<k>{sc}</>\n<k>{pm:?}</>" ); break 'x; } } } - println!("-- Try with this new starting point."); + cprintln!("<s>--</> Try with this new starting point."); let lnl = ln_likelihood_tensors(&x, &scores(&p)).to_double(); n_eval += 1; if !lnl.is_finite() { - println!( - "---- Failure: obtained non-finite log-likelihood again ({lnl}). \ + cprintln!( + " Failure: obtained non-finite log-likelihood again (<s>{lnl}</>). \ Try subsampling again from the new starting point." ); continue 'p; } - println!( - "---- Success: obtained finite log-likelihood on the whole dataset ({lnl}).\n\ - -- Start learning from this new starting point." + cprintln!( + " Success: obtained finite log-likelihood \ + on the whole dataset (<s>{lnl}</>).\n\ + Start learning from this new starting point." ); break 'p; } @@ -144,19 +148,20 @@ pub fn optimize_likelihood( // Learn over the whole dataset. let f = |p: &Tensor| -ln_likelihood_tensors(&x, &scores(p)); - println!("-- BFGS learning."); - let mut opt: Box<dyn OptimResult>; - // Log every step if a file was provided. - opt = Box::new(search.bfgs.minimize(f, &p)?); - let p = opt.best_vars(); - n_eval += opt.n_eval(); - n_diff += opt.n_diff(); - - // Then simple gradient descent to refine. - if let Some(gd) = &search.final_descent { - println!("-- Refine with simple gradient descent."); - opt = Box::new(gd.minimize(f, p, 0)?); + if let Some(main) = &search.bfgs.main_trace_path { + cprintln!( + " Recording main BFGS search trace at <b>{}</>.", + main.display() + ); + } + if let Some(lins) = &search.bfgs.linsearch_trace_path { + cprintln!( + " Recording detailed Wolfe linear search traces at <b>{}</>.", + lins.display() + ); } + // Log every step if a file was provided. + let opt = Box::new(search.bfgs.minimize(f, &p)?); n_eval += opt.n_eval(); n_diff += opt.n_diff(); @@ -167,8 +172,12 @@ pub fn optimize_likelihood( let grad = p.grad(); let scores = scores(&p); let grad = scores.with_grads(&grad); - println!("-- Terminate heuristic after {n_eval} evaluations and {n_diff} differentiations."); - println!("-- Final gradient:\n{grad:#}"); + cprintln!( + "<s>--</> Terminate heuristic after \ + <g>{n_eval}</> evaluations and \ + <g>{n_diff}</> differentiations." + ); + cprintln!("<s>--</> Final location: <k,i>{}value 'gradient{}</>:\n{grad:#}", '<', '>'); let parms: Parameters<f64> = (&scores).into(); let opt_lnl = -opt.best_loss(); diff --git a/src/model/parameters.rs b/src/model/parameters.rs index b570a860c337e227886df3a96c6c65760e52212b..3bc67951797084790d89b9190f070eecd725e456 100644 --- a/src/model/parameters.rs +++ b/src/model/parameters.rs @@ -4,7 +4,7 @@ use std::fmt::{self, Display}; use arrayvec::ArrayVec; -use color_print::cwriteln; +use color_print::{cwrite, cwriteln}; use serde::Serialize; use tch::Tensor; @@ -89,11 +89,11 @@ impl<F: Num + Display> Display for GeneFlowTimes<F> { impl Display for ValGrad { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let Self { value, gradient } = self; - write!(f, "{value} '")?; + cwrite!(f, "{value:?} <k>'</>")?; if let Some(grad) = gradient { - write!(f, "{grad}") + cwrite!(f, "<k>{grad:?}</>") } else { - f.write_str("<nograd>") + cwrite!(f, "<k>{}</>", "<nograd>") } } } diff --git a/src/optim/gd.rs b/src/optim/gd.rs index 53b110147f00bea8e9a3890146debb23017a9398..6f1712132f2107b1ffd28ec1f2654d3d12e6316f 100644 --- a/src/optim/gd.rs +++ b/src/optim/gd.rs @@ -4,6 +4,7 @@ // Stops on its own when the mean slope of the history // becomes flat, provided the history is full. +use color_print::cprintln; use serde::Serialize; use snafu::Snafu; use tch::Tensor; @@ -93,7 +94,7 @@ impl Config { check_finite_grad!(grad, y, x, 0u64); if max_iter == 0 { - println!("No optimisation step asked for."); + cprintln!("<k>No optimisation step asked for.</>"); return Ok(GdResult { vars: best.vars, loss: best.loss, n_eval, n_diff }); } @@ -116,7 +117,9 @@ impl Config { // Check slope. if let Some(ref mut tracker) = slope_tracker { if let Some(lowslope) = tracker.low_slope(y, n_steps) { - println!("Weak loss slope ({lowslope:e}) on iteration {n_steps}: stopping."); + cprintln!( + "<k>Weak loss slope ({lowslope:e}) on iteration {n_steps}: stopping.</>" + ); break Ok(GdResult { loss: best.loss, vars: best.vars, n_eval, n_diff }); } } @@ -136,13 +139,13 @@ impl Config { n_steps += 1; if n_steps >= max_iter { if let Some(slope_threshold) = slope_tracking.as_ref().map(|s| s.threshold) { - println!( - "Max iteration reached ({n_steps}) \ + cprintln!( + "<k>Max iteration reached ({n_steps}) \ without finding a loss slope magnitude \ - lower than {slope_threshold:e}." + lower than {slope_threshold:e}.</>" ); } else { - println!("Max iteration reached ({n_steps})."); + cprintln!("<k>Max iteration reached ({n_steps}).</>"); } break Ok(GdResult { loss: best.loss, vars: best.vars, n_eval, n_diff }); } diff --git a/src/output.rs b/src/output.rs index 87533ae203ad2f6074ffb71dd81bcb8f253e7a7d..9bebdcd15b652c6a5cdb200392c198990e138e57 100644 --- a/src/output.rs +++ b/src/output.rs @@ -3,3 +3,13 @@ mod config; pub mod detail; pub mod csv; + +// Standard output filenames. +pub mod file { + pub const CONFIG: &str = "config.json"; + pub const DETAIL: &str = "detail.json"; + pub const CSV: &str = "trees.csv"; + pub const MAIN_TRACE: &str = "search_global.csv"; + pub const LINSEARCH_TRACE: &str = "search_detail.csv"; +} +