bpp-phyl3  3.0.0
StochasticMapping.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
6 #define BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
7 
8 
9 #include "../Likelihood/DataFlow/DataFlowCWise.h"
10 #include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
11 #include "../Simulation/MutationProcess.h"
12 #include "../Simulation/SubstitutionProcessSequenceSimulator.h"
13 
14 // From the STL:
15 #include <iostream>
16 #include <iomanip>
17 #include <map>
18 
19 using namespace std;
20 
21 /* Store the countings on a DAG similar to the computing DAG */
22 
23 
24 typedef vector<vector<vector<double>>> VVVDouble;
25 typedef vector<vector<double>> VVDouble;
26 typedef vector<double> VDouble;
27 
28 /* class for representing the framework of Stochastic mapping
29  *
30  * A StochasticMapping instance can be used to sample histories of
31  * state transitions along a tree, given a substitution model and the
32  * states at the tip taxa. For more information, see: Nielsen, Rasmus.
33  * "Mapping mutations on phylogenies." Systematic biology 51.5 (2002):
34  * 729-739.‏
35  */
36 
37 namespace bpp
38 {
40 {
41 protected:
42  /*
43  * @brief The tree likelihood instance is used for computing the
44  * the conditional sampling probabilities of the ancestral states as
45  * well as the root assignment probabilities.
46  *
47  */
48 
49  std::shared_ptr<LikelihoodCalculationSingleProcess> likelihood_;
50 
51  std::shared_ptr<PhyloTree> tree_;
52 
53  /*
54  * @brief this instance will hold the parameters required for the
55  * stochastic mapping procedure, and be used to generate stochastic
56  * mappings.
57  */
58 
59 // SimpleSubstitutionProcessSequenceSimulator mappingParameters_;
60 
61  /*
62  * @briefvector that holds the fractional probabilities per state
63  * per node in the tree, based on which the conditional and
64  * posterior probabilities are computed
65  *
66  */
67 
69 
70  VVVDouble ConditionalProbabilities_; // vector that holds the conditionl states assignment probabilities of the nodes in the tree (node*father_states*son_states)
71  size_t nodesCounter_; // counter of nodes hat allows adding unique names to the generated nodes while breaking branching in a mapping
72  size_t numOfMappings_; // the number of stochastic mappings to generate
73 
74 public:
75  /* constructors and destructors */
76 
77  explicit StochasticMapping(std::shared_ptr<LikelihoodCalculationSingleProcess> drl, size_t numOfMappings = 10000); // it is a good general practice to use "explicit" keyword on constructors with a single argument: https://stackoverflow.com/questions/121162/what-does-the-explicit-keyword-mean
78 
80 
82  likelihood_(sm.likelihood_),
83  tree_(sm.tree_),
84 // mappingParameters_(likelihood_->getSubstitutionProcess()),
85  fractionalProbabilities_(sm.fractionalProbabilities_),
86  ConditionalProbabilities_(sm.ConditionalProbabilities_),
87  nodesCounter_(0), numOfMappings_(sm.numOfMappings_)
88  { }
89 
95  StochasticMapping* clone() const { return new StochasticMapping(*this); }
96 
97 
98  /*
99  * @brief generates a stochastic mappings based on the sampling
100  * parameters
101  *
102  * @param Number of histories to sample
103  * @param mappings Vector of shared Trees that will hold the sampled stochastic mappings.
104  *
105  */
106 
107  void generateStochasticMapping(std::vector<std::shared_ptr<PhyloTree>>& mappings);
108 
126  std::shared_ptr<PhyloTree> generateExpectedMapping(std::vector<std::shared_ptr<PhyloTree>>& mappings, size_t divMethod = 0);
127 
138  std::shared_ptr<PhyloTree> generateAnalyticExpectedMapping(size_t divMethod = 0);
139 
140  /* extracts the state of a node in a mapping
141  * @param node The node to get the state of
142  * @return Node state is int
143  */
144 
145  int getNodeState(const PhyloNode* node) const;
146 
147 private:
148  /* adds names to the internal nodes, in case of absence.
149  * @param tree The tree whose nodes should be edited if needed.
150  */
151  void giveNamesToInternalNodes(PhyloTree& tree);
152 
153  /* sets the state of a node in a mapping
154  * @param node The node to get the state of
155  * @param state The state that needs to be assigned to the node
156  */
157  void setNodeState(PhyloNode* node, size_t state);
158 
159  /* set the character states of the leafs as properties of their nodes instances
160  * @param mapping - the tree to sets the properties in
161  */
162  void setLeafsStates(std::shared_ptr<PhyloTree> mapping);
163 
164  /* compute the fractional probabilities of all the nodes assignments
165  * @param A vector of the fractional probabilities probabilities to fill in (node**state combinaion in each entry)
166  */
167  void computeFractionals();
168 
169  /* compute the conditional probabilities of all the nodes assignments
170  * @param rootProbabilities The root frequencies
171  */
172  void ComputeConditionals();
173 
174  /* compute the ancestral frequenceis of character states of all the nodes based on the mappings
175  * @param A vector of the posterior probabilities probabilities to fill in (node**state combinaion in each entry)
176  * @param A vector of mappings to base the frequencies on
177  */
178  void computeStatesFrequencies(VVDouble& ancestralStatesFreuquencies, vector<shared_ptr<PhyloTree>>& mappings);
179 
180  /* auxiliary function that samples a state based on a given discrete distribution
181  * @param distibution The distribution to sample states based on
182  */
183  size_t sampleState(const VDouble& distibution); // k: best by ref
184 
185  /* samples ancestral states based on the conditional probabilities at each node in the base (user input) tree and the root assignment probabilities. States will be updated as nodes properties
186  * @param mapping The tree whose nodes names should be updated according to their assigned states.
187  */
188  void sampleAncestrals(shared_ptr<PhyloTree> mapping);
189 
190  /* set ancestral states in the expected history based on the conditional probabilities at each node in the base (user input) tree and the root assignment probabilities. States will be updated as nodes properties
191  * @param expectedMapping The expected mapping instance whose nodes names should be updated according to their assigned states.
192  * @param posteriorProbabilities Vector of posterior assignment proabilities to inner node to decide on assignments
193  */
194  void setExpectedAncestrals(shared_ptr<PhyloTree> expectedMapping, VVDouble& posteriorProbabilities);
195 
196  /* simulates mutations on phylogeny based the sampled ancestrals, tips data, and the simulation parameters
197  * @param mapping The tree that should be edited according to the sampled history
198  */
199  void sampleMutationsGivenAncestrals(shared_ptr<PhyloTree> mapping);
200 
201  /* adds a branch mapping to the mapping in a tree format by repeatedly braking branches and adding internal nodes with single children
202  * @param son The node at the bottom of the branch
203  * @param branchMapping The branchMapping of transitions in a MutationProcess format
204  */
205  void updateBranchMapping(PhyloNode* son, const MutationPath& branchMapping);
206 
207  /* sample mutations based on the stochastic mapping parameters, the source and destination state, and the branch length, and updates the simulated history along the branch in the input tree, on the fly
208  * @param son Node of interest
209  * @param maxIterNum Maximal number of imulation trials
210  */
211  void sampleMutationsGivenAncestralsPerBranch(PhyloNode* son, size_t maxIterNum = 10000);
212 
213  /* converts a vector of dwelling times to a mutation path and then updates the bracnh stemming from the given node */
214  /* @param node The node at the bottom of the branch
215  * @param dwellingTimes A vector of dwelling times where the value at each entry i corresponds to the dwelling time under the i'th state
216  * Note that this function generates a new tree instance, that must be deleted by the calling function.
217  * @param posteriorProbabilities Posterior probaibitlies to divide the time spent in a shared state between father and son into two transitions
218  @param divMethod The method used in the case that the son and father share the same state (either divide the wdelling time of the staed state by 2 for two transitions (method 0) or allocate the entire dwelling time to be adjacent to the son(method 1))
219  */
220  void updateBranchByDwellingTimes(PhyloNode* node, VDouble& dwellingTimes, VVDouble& posteriorProbabilities, size_t divMethod = 0);
221 };
222 }
223 #endif // BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
vector< vector< vector< double > > > VVVDouble
vector< vector< double > > VVDouble
vector< double > VDouble
This class is used by MutationProcess to store detailed results of simulations.
std::shared_ptr< LikelihoodCalculationSingleProcess > likelihood_
StochasticMapping(const StochasticMapping &sm)
StochasticMapping * clone() const
cloning function used by the copy constructor of ./Likelihood/JointLikelihoodFunction/h
std::shared_ptr< PhyloTree > tree_
Defines the basic types of data flow nodes.