diff --git a/Cargo.toml b/Cargo.toml index 28e1e30f913a09680010a7e0d852972bff807ae2..8ab386045e0f8e12380479442f2efdf72fcb87cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ authors = [ edition = "2021" [dependencies] -arrayvec = "0.7.4" +arrayvec = { version = "0.7.4", features = ["serde"] } clap = { version = "4.5.0", features = ["derive"] } colored = "2.1.0" itertools = "0.12.1" @@ -27,6 +27,7 @@ toml = "0.8.10" unicode-width = "0.1.11" float_eq = { version = "1.0.1", features = ["derive"] } snafu = "0.8.2" +serde_json = { version = "1.0.127", features = ["preserve_order"] } [dev-dependencies] rand = "0.8.5" diff --git a/src/bin/aphid/main.rs b/src/bin/aphid/main.rs index 49670787cdbbe7ddf61351a87f150ba016cb0633..a464b6b7e5edbde1414164f1259bcc296b763b1e 100644 --- a/src/bin/aphid/main.rs +++ b/src/bin/aphid/main.rs @@ -49,7 +49,7 @@ struct Args { // Standard output filenames. mod out { - pub(super) const CONFIG: &str = "config"; + pub(super) const CONFIG: &str = "config.json"; } fn main() { @@ -150,7 +150,7 @@ fn run() -> Result<(), Error> { format!("{}", path.display()).blue() ); let mut file = File::create(path)?; - writeln!(file, "{:#?}", config.for_display(interner))?; + writeln!(file, "{:#}", config.resolve(interner).json())?; //============================================================================== // Extract various information from the trees, diff --git a/src/config.rs b/src/config.rs index 88f1932fda1c781909af7c9cccd3a159ac0c7652..b5496e9c8bf6cf9369edd946053695418c1a1e83 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,6 +8,8 @@ use std::path::PathBuf; +use serde::Serialize; + use crate::{ gene_tree::MeanNbBases, interner::SpeciesSymbol, @@ -50,7 +52,7 @@ pub struct Config { } const MAX_UNRESOLVED_LENGTH: f64 = 0.5; -#[derive(Debug)] +#[derive(Debug, Serialize)] pub struct Filters { // When raised, filter out trees if some species in 'other' // branche between LCA(outgroup) and the root, @@ -93,7 +95,7 @@ pub struct SpeciesTriplet { pub c: SpeciesSymbol, } -#[derive(Debug)] +#[derive(Debug, Serialize)] pub struct Search { // Starting point in the search space. pub init_parms: Parameters<f64>, diff --git a/src/model/parameters.rs b/src/model/parameters.rs index bf16c12276a93cf331fc8d62d88ef10b16cb7e92..bc59a80716189e603c0a68824a1eb564c98c03f3 100644 --- a/src/model/parameters.rs +++ b/src/model/parameters.rs @@ -4,6 +4,7 @@ use std::fmt::{self, Display}; use arrayvec::ArrayVec; +use serde::Serialize; use tch::Tensor; // The structure is generic among float and tensors @@ -23,7 +24,7 @@ impl Num for ValGrad {} // - tau_1 <= tau_2 // - p_* <= 1 // - p_ab + p_ac + p_bc <= 1 -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct Parameters<F: Num> { // Population size. pub(crate) theta: F, @@ -41,7 +42,8 @@ pub struct Parameters<F: Num> { // Parameters are stack-allocated, so don't overuse possible GF times. pub(crate) const MAX_N_GF_TIMES: usize = 3; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[serde(transparent)] pub struct GeneFlowTimes<F: Num>(pub(crate) ArrayVec<F, MAX_N_GF_TIMES>); // Convenience bundle. diff --git a/src/optim.rs b/src/optim.rs index b4f9503a5ba7b2813d8904924f5f888fa9285f90..fa7eb1f4a9f33814c86d954c6cac7bd545afc30d 100644 --- a/src/optim.rs +++ b/src/optim.rs @@ -10,6 +10,7 @@ pub(crate) mod wolfe_search; use std::num::NonZeroUsize; use paste::paste; +use serde::Serialize; use snafu::{ensure, Snafu}; use tch::Tensor; use tensor::Loggable; @@ -149,7 +150,7 @@ impl History { // Useful to optimizers using history // to periodically check slope and stop when it reaches some threshold. -#[derive(Debug)] +#[derive(Debug, Serialize)] pub(crate) struct SlopeTrackingConfig { history_size: NonZeroUsize, threshold: f64, // Positive. diff --git a/src/optim/bfgs.rs b/src/optim/bfgs.rs index b6cbd151b5fbef7bdfbf2426ed71299577b8d5da..fc006b4cf2a088c50809a441a79f95c4ea4f727d 100644 --- a/src/optim/bfgs.rs +++ b/src/optim/bfgs.rs @@ -9,6 +9,7 @@ use std::{ path::PathBuf, }; +use serde::Serialize; use snafu::{ensure, ResultExt, Snafu}; use tch::Tensor; @@ -22,7 +23,7 @@ use crate::optim::{ }; // The exposed config. -#[derive(Debug)] +#[derive(Debug, Serialize)] pub(crate) struct Config { pub(crate) max_iter: u64, pub(crate) wolfe: WolfeSearchConfig, diff --git a/src/optim/gd.rs b/src/optim/gd.rs index 9c03a6ce7cd68ea2dcdfc14f81f0a6076c0917c9..79e31aa45929182f762ec7ee19cb7f545ee3bdd9 100644 --- a/src/optim/gd.rs +++ b/src/optim/gd.rs @@ -4,6 +4,7 @@ // Stops on its own when the mean slope of the history // becomes flat, provided the history is full. +use serde::Serialize; use snafu::Snafu; use tch::Tensor; @@ -12,7 +13,7 @@ use crate::optim::{ SlopeTracker, SlopeTrackingConfig, }; -#[derive(Debug)] +#[derive(Debug, Serialize)] pub(crate) struct Config { pub(crate) max_iter: u64, pub(crate) step_size: f64, // Above 0. diff --git a/src/optim/wolfe_search.rs b/src/optim/wolfe_search.rs index 902356021c98e04643451ecb954445580af5b086..f8657cab0563e249e851e7e891bdc8462f496e83 100644 --- a/src/optim/wolfe_search.rs +++ b/src/optim/wolfe_search.rs @@ -28,12 +28,13 @@ use std::{ ops::ControlFlow, }; +use serde::Serialize; use snafu::{ensure, Snafu}; use tch::Tensor; use crate::optim::{tensor::OptimTensor, Best}; -#[derive(Debug)] +#[derive(Debug, Serialize)] pub(crate) struct Config { // 0 < c1 < c2 < 1 pub(crate) c1: f64, diff --git a/src/output/config.rs b/src/output/config.rs index 2a0046a9734fc3008237858f21cd2198df2fc56e..83b9b13d4e3ae5a3aad752991fb11092f8ffb8c3 100644 --- a/src/output/config.rs +++ b/src/output/config.rs @@ -1,37 +1,116 @@ -// Render config to text, to trace every parameter used if needed. -// This is basically just pretty-printing of the raw struct, -// but we cannot simply derive(Debug) for the config -// because a reference to the interner is necessary -// to resolve the species symbols into names. +// Render config to json +// to trace every parameter used if needed. +// Serde does most of the job, +// but a reference to the interner is necessary +// to resolve species symbols into name strings. use std::fmt; +use serde_json::{json, Value as JsValue, Value as Js}; + use crate::{ config::{SpeciesTriplet, Taxa}, interner::{Interner, ResolvedSymbol}, Config, }; -pub struct Display<'i> { +//================================================================================================== +// 'Resolver' types carry both a reference to their data +// and to the interner to resolve symbols within them. + +pub struct Resolver<'i> { config: &'i Config, interner: &'i Interner, } -struct TaxaDisplay<'i> { +struct TaxaResolver<'i> { taxa: &'i Taxa, interner: &'i Interner, } -struct TripletDisplay<'i> { +struct TripletResolver<'i> { triplet: &'i SpeciesTriplet, interner: &'i Interner, } -impl<'i> fmt::Debug for Display<'i> { +impl Config { + pub fn resolve<'d>(&'d self, interner: &'d Interner) -> Resolver<'d> { + Resolver { config: self, interner } + } +} + +impl Taxa { + fn resolve<'d>(&'d self, interner: &'d Interner) -> TaxaResolver<'d> { + TaxaResolver { taxa: self, interner } + } +} + +impl SpeciesTriplet { + fn resolve<'d>(&'d self, interner: &'d Interner) -> TripletResolver<'d> { + TripletResolver { triplet: self, interner } + } +} + +//================================================================================================== +// Resolve as json values. + +impl Resolver<'_> { + pub fn json(&self) -> JsValue { + let Resolver { config, interner } = self; + let Config { trees, taxa, filters, unresolved_length, search } = config; + json!({ + "trees": trees, + "taxa": taxa.resolve(interner).json(), + "filters": filters, + "unresolved_length": unresolved_length, + "search": search, + }) + } +} + +impl TaxaResolver<'_> { + pub fn json(&self) -> JsValue { + let TaxaResolver { taxa, interner } = self; + let Taxa { triplet, outgroup, other } = taxa; + let outgroup = Js::Array( + outgroup + .iter() + .map(|&s| json! {interner.resolve(s).unwrap()}) + .collect(), + ); + let other = Js::Array( + other + .iter() + .map(|&s| json! {interner.resolve(s).unwrap()}) + .collect(), + ); + json!({ + "triplet": triplet.resolve(interner).json(), + "outgroup": outgroup, + "other": other, + }) + } +} + +impl TripletResolver<'_> { + pub fn json(&self) -> JsValue { + let TripletResolver { triplet, interner } = self; + let SpeciesTriplet { a, b, c } = triplet; + let [a, b, c] = [a, b, c].map(|&s| interner.resolve(s).unwrap()); + json! { + [[a, b], c] + } + } +} + +//================================================================================================== +// Debug displays (same logic without json for informal use within the program). + +impl<'i> fmt::Debug for Resolver<'i> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let Config { trees, taxa, filters, unresolved_length, search } = self.config; f.debug_struct("Config") .field("trees", trees) - .field("taxa", &taxa.for_display(self.interner)) + .field("taxa", &taxa.resolve(self.interner)) .field("filters", filters) .field("unresolved_length", unresolved_length) .field("search", search) @@ -39,7 +118,7 @@ impl<'i> fmt::Debug for Display<'i> { } } -impl<'i> fmt::Debug for TaxaDisplay<'i> { +impl<'i> fmt::Debug for TaxaResolver<'i> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let Taxa { triplet, outgroup, other } = self.taxa; let resolve = |slice: &[_]| { @@ -49,15 +128,14 @@ impl<'i> fmt::Debug for TaxaDisplay<'i> { .collect::<Vec<_>>() }; f.debug_struct("Taxa") - .field("triplet", &triplet.for_display(self.interner)) + .field("triplet", &triplet.resolve(self.interner)) .field("outgroup", &resolve(outgroup)) .field("other", &resolve(other)) .finish() } } -// Display as [[A, B], C]. -impl<'i> fmt::Debug for TripletDisplay<'i> { +impl<'i> fmt::Debug for TripletResolver<'i> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let [a, b, c] = self .triplet @@ -67,21 +145,3 @@ impl<'i> fmt::Debug for TripletDisplay<'i> { f.debug_list().entry(&ab).entry(&c).finish() } } - -impl Config { - pub fn for_display<'d>(&'d self, interner: &'d Interner) -> Display<'d> { - Display { config: self, interner } - } -} - -impl Taxa { - fn for_display<'d>(&'d self, interner: &'d Interner) -> TaxaDisplay<'d> { - TaxaDisplay { taxa: self, interner } - } -} - -impl SpeciesTriplet { - fn for_display<'d>(&'d self, interner: &'d Interner) -> TripletDisplay<'d> { - TripletDisplay { triplet: self, interner } - } -}