bpp-phyl3 3.0.0
StochasticMapping.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#ifndef BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
6#define BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
7
8
9#include "../Likelihood/DataFlow/DataFlowCWise.h"
10#include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
11#include "../Simulation/MutationProcess.h"
12#include "../Simulation/SubstitutionProcessSequenceSimulator.h"
13
14// From the STL:
15#include <iostream>
16#include <iomanip>
17#include <map>
18
19using namespace std;
20
21/* Store the countings on a DAG similar to the computing DAG */
22
23
24typedef vector<vector<vector<double>>> VVVDouble;
25typedef vector<vector<double>> VVDouble;
26typedef vector<double> VDouble;
27
28/* class for representing the framework of Stochastic mapping
29 *
30 * A StochasticMapping instance can be used to sample histories of
31 * state transitions along a tree, given a substitution model and the
32 * states at the tip taxa. For more information, see: Nielsen, Rasmus.
33 * "Mapping mutations on phylogenies." Systematic biology 51.5 (2002):
34 * 729-739.‏
35 */
36
37namespace bpp
38{
40{
41protected:
42 /*
43 * @brief The tree likelihood instance is used for computing the
44 * the conditional sampling probabilities of the ancestral states as
45 * well as the root assignment probabilities.
46 *
47 */
48
49 std::shared_ptr<LikelihoodCalculationSingleProcess> likelihood_;
50
51 std::shared_ptr<PhyloTree> tree_;
52
53 /*
54 * @brief this instance will hold the parameters required for the
55 * stochastic mapping procedure, and be used to generate stochastic
56 * mappings.
57 */
58
59 // SimpleSubstitutionProcessSequenceSimulator mappingParameters_;
60
61 /*
62 * @briefvector that holds the fractional probabilities per state
63 * per node in the tree, based on which the conditional and
64 * posterior probabilities are computed
65 *
66 */
67
69
70 VVVDouble ConditionalProbabilities_; // vector that holds the conditionl states assignment probabilities of the nodes in the tree (node*father_states*son_states)
71 size_t nodesCounter_; // counter of nodes hat allows adding unique names to the generated nodes while breaking branching in a mapping
72 size_t numOfMappings_; // the number of stochastic mappings to generate
73
74public:
75 /* constructors and destructors */
76
77 explicit StochasticMapping(std::shared_ptr<LikelihoodCalculationSingleProcess> drl, size_t numOfMappings = 10000); // it is a good general practice to use "explicit" keyword on constructors with a single argument: https://stackoverflow.com/questions/121162/what-does-the-explicit-keyword-mean
78
80
83 tree_(sm.tree_),
84 // mappingParameters_(likelihood_->getSubstitutionProcess()),
88 { }
89
95 StochasticMapping* clone() const { return new StochasticMapping(*this); }
96
97
98 /*
99 * @brief generates a stochastic mappings based on the sampling
100 * parameters
101 *
102 * @param Number of histories to sample
103 * @param mappings Vector of shared Trees that will hold the sampled stochastic mappings.
104 *
105 */
106
107 void generateStochasticMapping(std::vector<std::shared_ptr<PhyloTree>>& mappings);
108
126 std::shared_ptr<PhyloTree> generateExpectedMapping(std::vector<std::shared_ptr<PhyloTree>>& mappings, size_t divMethod = 0);
127
138 std::shared_ptr<PhyloTree> generateAnalyticExpectedMapping(size_t divMethod = 0);
139
140 /* extracts the state of a node in a mapping
141 * @param node The node to get the state of
142 * @return Node state is int
143 */
144
145 int getNodeState(const PhyloNode* node) const;
146
147private:
148 /* adds names to the internal nodes, in case of absence.
149 * @param tree The tree whose nodes should be edited if needed.
150 */
152
153 /* sets the state of a node in a mapping
154 * @param node The node to get the state of
155 * @param state The state that needs to be assigned to the node
156 */
157 void setNodeState(PhyloNode* node, size_t state);
158
159 /* set the character states of the leafs as properties of their nodes instances
160 * @param mapping - the tree to sets the properties in
161 */
162 void setLeafsStates(std::shared_ptr<PhyloTree> mapping);
163
164 /* compute the fractional probabilities of all the nodes assignments
165 * @param A vector of the fractional probabilities probabilities to fill in (node**state combinaion in each entry)
166 */
167 void computeFractionals();
168
169 /* compute the conditional probabilities of all the nodes assignments
170 * @param rootProbabilities The root frequencies
171 */
172 void ComputeConditionals();
173
174 /* compute the ancestral frequenceis of character states of all the nodes based on the mappings
175 * @param A vector of the posterior probabilities probabilities to fill in (node**state combinaion in each entry)
176 * @param A vector of mappings to base the frequencies on
177 */
178 void computeStatesFrequencies(VVDouble& ancestralStatesFreuquencies, vector<shared_ptr<PhyloTree>>& mappings);
179
180 /* auxiliary function that samples a state based on a given discrete distribution
181 * @param distibution The distribution to sample states based on
182 */
183 size_t sampleState(const VDouble& distibution); // k: best by ref
184
185 /* samples ancestral states based on the conditional probabilities at each node in the base (user input) tree and the root assignment probabilities. States will be updated as nodes properties
186 * @param mapping The tree whose nodes names should be updated according to their assigned states.
187 */
188 void sampleAncestrals(shared_ptr<PhyloTree> mapping);
189
190 /* set ancestral states in the expected history based on the conditional probabilities at each node in the base (user input) tree and the root assignment probabilities. States will be updated as nodes properties
191 * @param expectedMapping The expected mapping instance whose nodes names should be updated according to their assigned states.
192 * @param posteriorProbabilities Vector of posterior assignment proabilities to inner node to decide on assignments
193 */
194 void setExpectedAncestrals(shared_ptr<PhyloTree> expectedMapping, VVDouble& posteriorProbabilities);
195
196 /* simulates mutations on phylogeny based the sampled ancestrals, tips data, and the simulation parameters
197 * @param mapping The tree that should be edited according to the sampled history
198 */
199 void sampleMutationsGivenAncestrals(shared_ptr<PhyloTree> mapping);
200
201 /* adds a branch mapping to the mapping in a tree format by repeatedly braking branches and adding internal nodes with single children
202 * @param son The node at the bottom of the branch
203 * @param branchMapping The branchMapping of transitions in a MutationProcess format
204 */
205 void updateBranchMapping(PhyloNode* son, const MutationPath& branchMapping);
206
207 /* sample mutations based on the stochastic mapping parameters, the source and destination state, and the branch length, and updates the simulated history along the branch in the input tree, on the fly
208 * @param son Node of interest
209 * @param maxIterNum Maximal number of imulation trials
210 */
211 void sampleMutationsGivenAncestralsPerBranch(PhyloNode* son, size_t maxIterNum = 10000);
212
213 /* converts a vector of dwelling times to a mutation path and then updates the bracnh stemming from the given node */
214 /* @param node The node at the bottom of the branch
215 * @param dwellingTimes A vector of dwelling times where the value at each entry i corresponds to the dwelling time under the i'th state
216 * Note that this function generates a new tree instance, that must be deleted by the calling function.
217 * @param posteriorProbabilities Posterior probaibitlies to divide the time spent in a shared state between father and son into two transitions
218 @param divMethod The method used in the case that the son and father share the same state (either divide the wdelling time of the staed state by 2 for two transitions (method 0) or allocate the entire dwelling time to be adjacent to the son(method 1))
219 */
220 void updateBranchByDwellingTimes(PhyloNode* node, VDouble& dwellingTimes, VVDouble& posteriorProbabilities, size_t divMethod = 0);
221};
222}
223#endif // BPP_PHYL_MAPPING_STOCHASTICMAPPING_H
vector< vector< vector< double > > > VVVDouble
vector< vector< double > > VVDouble
vector< double > VDouble
This class is used by MutationProcess to store detailed results of simulations.
void giveNamesToInternalNodes(PhyloTree &tree)
void updateBranchMapping(PhyloNode *son, const MutationPath &branchMapping)
void setExpectedAncestrals(shared_ptr< PhyloTree > expectedMapping, VVDouble &posteriorProbabilities)
void setNodeState(PhyloNode *node, size_t state)
void sampleAncestrals(shared_ptr< PhyloTree > mapping)
void sampleMutationsGivenAncestralsPerBranch(PhyloNode *son, size_t maxIterNum=10000)
size_t sampleState(const VDouble &distibution)
std::shared_ptr< LikelihoodCalculationSingleProcess > likelihood_
int getNodeState(const PhyloNode *node) const
std::shared_ptr< PhyloTree > generateAnalyticExpectedMapping(size_t divMethod=0)
Creates a single expected (i.e, average) history based the rewards provided by the algorithm of Minin...
void updateBranchByDwellingTimes(PhyloNode *node, VDouble &dwellingTimes, VVDouble &posteriorProbabilities, size_t divMethod=0)
StochasticMapping * clone() const
cloning function used by the copy constructor of ./Likelihood/JointLikelihoodFunction/h
StochasticMapping(std::shared_ptr< LikelihoodCalculationSingleProcess > drl, size_t numOfMappings=10000)
void setLeafsStates(std::shared_ptr< PhyloTree > mapping)
void computeStatesFrequencies(VVDouble &ancestralStatesFreuquencies, vector< shared_ptr< PhyloTree > > &mappings)
std::shared_ptr< PhyloTree > generateExpectedMapping(std::vector< std::shared_ptr< PhyloTree > > &mappings, size_t divMethod=0)
Creates a single expected (i.e, average) history based on a given set of mappings steps....
StochasticMapping(const StochasticMapping &sm)
std::shared_ptr< PhyloTree > tree_
void sampleMutationsGivenAncestrals(shared_ptr< PhyloTree > mapping)
void generateStochasticMapping(std::vector< std::shared_ptr< PhyloTree > > &mappings)
Defines the basic types of data flow nodes.