From 5ed197523c5ea71017d7a467f4e624927f3e5cc0 Mon Sep 17 00:00:00 2001 From: Iago Bonnici <iago.bonnici@umontpellier.fr> Date: Mon, 17 Mar 2025 19:30:56 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=96=20Integrate=20Nico's=20feedback.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitlab-ci/pages.yml | 1 + doc/src/intro.md | 4 ++-- doc/src/learn.md | 1 + doc/src/likelihood.md | 5 +++-- doc/src/preprocess.md | 6 +++--- src/config/raw.rs | 17 ++++++++--------- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci/pages.yml b/.gitlab-ci/pages.yml index 4d16a28..874358d 100644 --- a/.gitlab-ci/pages.yml +++ b/.gitlab-ci/pages.yml @@ -9,6 +9,7 @@ pages: pacman -Sy --noconfirm \ base-devel \ rustup \ + ripgrep \ python-pytorch \ texlive \ pdf2svg diff --git a/doc/src/intro.md b/doc/src/intro.md index 3848078..93fcd64 100644 --- a/doc/src/intro.md +++ b/doc/src/intro.md @@ -20,7 +20,7 @@ and the associated command-line program: This document assumes that you are familiar with the method described in the paper. -Reader interested in the context, the intuition, +Readers interested in the context, the intuition, or the meaning of the method are encouraged to refer to the original paper instead, as these are not covered here. @@ -28,7 +28,7 @@ as these are not covered here. This document explains how to install the `aphid` program and how to use it. It also specifies the detail of expected inputs, -of the calculation performed within the program +of the calculation performed by the program and the outputs produced. Bug reports, feature requests and contributions are welcome [here][repo]. diff --git a/doc/src/learn.md b/doc/src/learn.md index a577ab9..f4e8cf8 100644 --- a/doc/src/learn.md +++ b/doc/src/learn.md @@ -48,6 +48,7 @@ Then use these as default starting points: \end{align} \\] +(see aphid's [default values for `theta` = \\(𝜃\\)](./use.md#init)) ## Reparametrization { #transform } diff --git a/doc/src/likelihood.md b/doc/src/likelihood.md index 1be4033..8f01eeb 100644 --- a/doc/src/likelihood.md +++ b/doc/src/likelihood.md @@ -34,7 +34,8 @@ At the global level: - \\(\pbc\\) : probability that GF occured between \\(B\\) and \\(C\\) (*idem*). - \\(\po\\) : - probability that GF occured at the **o**ldest date in \\(gt\\) (*idem*). + knowing that GF occured, + probability it occured at the **o**ldest date in \\(gt\\) (*idem*). [learn]: ./learn.md [gft]: ./use.md#gft @@ -81,7 +82,7 @@ For every gene tree \\(g\\): The formula values are subject to the following constraints: - No negative values. -- \\(\sl_g\\) integer (number of bases) +- \\(\sl_g\\) integer (number of sites) - \\(𝜏_1 \leq 𝜏_2\\) (older coalescence last) - \\(p_* \leq 1\\) (probabilities) - \\(\pab + \pac + \pbc \leq 1\\) (total probability of GF) diff --git a/doc/src/preprocess.md b/doc/src/preprocess.md index 0576ac3..a051c2b 100644 --- a/doc/src/preprocess.md +++ b/doc/src/preprocess.md @@ -19,12 +19,12 @@ the [`other`](./use.md#other) section of the input [`[taxa]`](./use.md#taxa) table. This process reduces the number of node in every tree, -but the branches lengths are conserved. +but the branch lengths are conserved. For instance, pruning species `A`, `C`, `E` and `H` in the following raw gene tree -with branches lengths `a`, `b`, `c`, *etc.*: +with branch lengths `a`, `b`, `c`, *etc.*: ``` │q @@ -148,7 +148,7 @@ The 'imbalance' of every tree is calculated with respect to the whole forest. \\] If a tree has a high imbalance value, it means that it is dissimilar to its enclosing forest, -and the hypothesis that mutation rate is constant accross the tree(s) +and the hypothesis that mutation rate is constant accross the tree is weakened. The tree geometry is rejected if its imbalance is greater than diff --git a/src/config/raw.rs b/src/config/raw.rs index abedb85..6444b44 100644 --- a/src/config/raw.rs +++ b/src/config/raw.rs @@ -28,7 +28,10 @@ pub struct Config { /// List the relative dates for possible gene flow events. /// At least one event must be specified, /// but no more than [`model::parameters::MAX_N_GF_TIMES`]. - /// Dates are relative to the divergence times of the `triplet` species. + /// Dates are relative to the A|B divergence time. + /// They must take values between 0 and 1: + /// 0 meaning present-day gene flow time, + /// and 1 meaning that gene flow happened at A|B divergence time. #[serde(default = "defaults::gf_times")] pub gf_times: GeneFlowTimes, @@ -39,11 +42,7 @@ pub struct Config { /// In this situation, /// every scenario contributes to the likelihood /// instead of only the ones with a concordant topology. - /// The possible internal branch discordance - /// between actual and expected length - /// is neglected because the the actual length is small. - /// For this reason, only values inferior - /// to [`config::MAX_UNRESOLVED_COUNTS`] are accepted. + /// Only values inferior to [`config::MAX_UNRESOLVED_COUNTS`] are accepted. /// The value is given in *mutations count* units, /// so branch length × sequence length. pub unresolved_mutations_count: Option<f64>, @@ -105,7 +104,7 @@ pub struct Taxa { /// <tree> <TAB> <sequence_length> <TAB> <identifier> /// ``` /// The `<tree>` part being a [Newick] representation of the gene tree, - /// with branches lengths specified. + /// with branches lengths specified in unit of 'per site substitution'. /// /// [Newick]: https://en.wikipedia.org/wiki/Newick_format /// @@ -113,7 +112,7 @@ pub struct Taxa { pub trees: PathBuf, /// This parameter is where you specify - /// the three species of interest and their phylogenetical topology. + /// the triplet of species of interest its topology. /// Either forms `["A", ["B", "C"]]` or `"(A, (B, C))"` are accepted, /// provided `A`, `B` and `C` match names within the provided `taxa.trees`. pub triplet: Triplet, @@ -127,7 +126,7 @@ pub struct Taxa { /// Species listed in this parameter /// are used as extra leaves in the tree - /// to estimate branch lengths properties. + /// to estimate branch length properties. #[serde_as(as = "FromInto<ListOfStrings>")] pub other: Vec<String>, } -- GitLab