bpp-phyl3  3.0.0
StochasticMapping.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <Bpp/Numeric/Number.h>
12 #include <Bpp/Text/TextTools.h>
13 #include <algorithm>
14 #include <fstream>
15 #include <iostream>
16 #include <numeric> // to sum over items in a vector
17 
18 #include "../Simulation/MutationProcess.h"
19 #include "DecompositionReward.h"
21 #include "Reward.h"
22 #include "RewardMappingTools.h"
23 #include "StochasticMapping.h"
24 
25 using namespace bpp;
26 using namespace std;
27 
28 #define STATE "state"
29 
30 /******************************************************************************/
31 
32 StochasticMapping::StochasticMapping(std::shared_ptr<LikelihoodCalculationSingleProcess> drl, size_t numOfMappings) :
33  likelihood_(drl),
34  tree_(make_shared<PhyloTree>(drl->substitutionProcess().parametrizablePhyloTree())),
35 // mappingParameters_(drl->getSubstitutionProcess()),
36  fractionalProbabilities_(),
37  ConditionalProbabilities_(),
38  nodesCounter_(0),
39  numOfMappings_(numOfMappings) // ,
40  // nodeIdToIndex_()
41 {
42  giveNamesToInternalNodes(*tree_); // set names for the internal nodes of the tree, in case of absence
44 }
45 
46 /******************************************************************************/
47 
49 {}
50 
51 /******************************************************************************/
52 
53 void StochasticMapping::generateStochasticMapping(vector<shared_ptr<PhyloTree>>& mappings)
54 {
55  for (size_t i = 0; i < numOfMappings_; ++i)
56  {
57 // auto mapping = std::shared_ptr<PhyloTree>(*baseTree);
58 // setLeafsStates(mapping);
59 
60  // /* step 2: simulate a set of ancestral states, based on the fractional likelihoods from step 1 */
61  // sampleAncestrals(mapping);
62 
63  // /* step 3: simulate mutational history of each lineage of the phylogeny, conditional on the ancestral states */
64  // sampleMutationsGivenAncestrals(mapping);
65 
66  // // add the mapping to the vector of mapping
67  // mappings.push_back(mapping);
68 
69  // // reset the nodes counter
70  // nodesCounter_ = dynamic_cast<TreeTemplate<Node>*>(baseTree_)->getNodes().size() - 1;
71  }
72 }
73 
74 /******************************************************************************/
75 
76 void StochasticMapping::setExpectedAncestrals(shared_ptr<PhyloTree> expectedMapping, VVDouble& ancestralStatesFrequencies)
77 {
78  // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(expectedMapping);
79  // vector<Node*> nodes = ttree->getNodes();
80  // for (size_t i = 0; i < nodes.size(); ++i)
81  // {
82  // Node* node = nodes[i];
83  // int nodeId = node->getId();
84  // size_t j = static_cast<size_t>(nodeId); //Note@Laurent (Julien 17/06/20): is this really intended, as nodeIds can be discontinuous? Should there be some can of index instead?
85  // auto d = distance(ancestralStatesFrequencies[j].begin(), max_element(ancestralStatesFrequencies[j].begin(), ancestralStatesFrequencies[j].end()));
86  // size_t state = static_cast<size_t>(d); //Note@Laurent (Julien 17/06/20): assimuming this is always positive, is that so?
87  // setNodeState(node, state); // in the case of a leaf, the assigned state must be sampled
88  // }
89 }
90 
91 /******************************************************************************/
92 
93 shared_ptr<PhyloTree> StochasticMapping::generateExpectedMapping(vector<shared_ptr<PhyloTree>>& mappings, size_t divMethod)
94 {
95  // // initialize the expected history
96  // nodesCounter_ = dynamic_cast<TreeTemplate<Node>*>(baseTree_)->getNodes().size() - 1;
97  shared_ptr<PhyloTree> expectedMapping(make_shared<PhyloTree>(*tree_));
98  // setLeafsStates(expectedMapping);
99 
100  // // compute a vector of the posterior asssignment probabilities for each inner node
101  // VVDouble ancestralStatesFrequencies;
102  // ancestralStatesFrequencies.clear();
103  // vector<Node*> nodes = dynamic_cast<TreeTemplate<Node>*>(expectedMapping)->getNodes();
104  // size_t statesNum = tl_->getNumberOfStates();
105  // ancestralStatesFrequencies.resize(nodes.size(), VDouble(statesNum));
106  // computeStatesFrequencies(ancestralStatesFrequencies, mappings);
107 
108  // // set the ancestral states accrdonig to the maximal posterior (i.e, conditional) probability
109  // setExpectedAncestrals(expectedMapping, ancestralStatesFrequencies);
110 
111  // // update the expected history with the dwelling times
112  // for (size_t n = 0; n < nodes.size(); ++n)
113  // {
114  // Node* node = nodes[n];
115  // if (node->hasFather()) // for any node except to the root
116  // {
117  // // initialize vector of average dwelling times for the branch stemming from node
118  // VDouble AverageDwellingTimes;
119  // AverageDwellingTimes.clear();
120  // AverageDwellingTimes.resize(statesNum, 0);
121  // // compute the average dwelling times of all the states
122  // for (size_t i = 0; i < mappings.size(); ++i)
123  // {
124  // TreeTemplate<Node>* mapping = dynamic_cast<TreeTemplate<Node>*>(mappings[i]);
125  // // get the pointers to the node and its father in the i'th mapping
126  // Node* curNode = mapping->getNode(node->getName());
127  // Node* father = mapping->getNode(node->getFather()->getName()); // the original father of the node (according to the base tree) in the mapping
128  // while (curNode != father)
129  // {
130  // AverageDwellingTimes[static_cast<size_t>(getNodeState(curNode))] += curNode->getDistanceToFather(); //Note@Laurent (Julien 17/06/20): assuming state is positive, is that so?
131  // curNode = curNode->getFather();
132  // }
133  // }
134  // double branchLength = node->getDistanceToFather(); // this is the length of the original branch in the base tree
135  // bool updateBranch = true;
136  // for (size_t state = 0; state < statesNum; ++state)
137  // {
138  // AverageDwellingTimes[state] /= static_cast<double>(mappings.size());
139  // if (AverageDwellingTimes[state] == branchLength) // if one of the dwelling times equals the branch length, then there is only one state along the branch and there is no need to edit it
140  // {
141  // updateBranch = false;
142  // }
143  // }
144  // // break the branch according to average dwelling times
145  // if (updateBranch)
146  // {
147  // updateBranchByDwellingTimes(node, AverageDwellingTimes, ancestralStatesFrequencies, divMethod);
148  // }
149  // }
150  // }
151  // nodesCounter_ = dynamic_cast<TreeTemplate<Node>*>(baseTree_)->getNodes().size() - 1;
152  return expectedMapping;
153 }
154 
155 /******************************************************************************/
156 
157 shared_ptr<PhyloTree> StochasticMapping::generateAnalyticExpectedMapping(size_t divMethod)
158 {
159  // /* Compute the posterior assignment probabilities to internal nodes, based on the fractional probabilities computed earlier */
160  // const vector<int> states = tl_->getAlphabetStates();
161  // vector<int> nodeIds = baseTree_->getNodesId();
162  // size_t nodeId;
163  // VVDouble posteriorProbabilities;
164  // posteriorProbabilities.clear();
165  // posteriorProbabilities.resize(baseTree_->getNumberOfNodes(), VDouble(states.size()));
166  // double nodeDataProb;
167  // // because the sum of partial likelihoods (i.e, the fractional probabilities) is in fact the probability of the data, it is sufficient to standardize the vector of fractional probabilires for each node to obtain the posterior probabilities
168  // for (size_t n = 0; n < baseTree_->getNumberOfNodes(); ++n)
169  // {
170  // nodeId = static_cast<size_t>(nodeIds[n]); //Note@Laurent (Julien 17/06/20): what is nodeId is negative?
171  // nodeDataProb = 0;
172  // for (size_t s = 0; s < states.size(); ++s)
173  // {
174  // nodeDataProb = nodeDataProb + fractionalProbabilities_[nodeId][s];
175  // }
176  // for (size_t nodeState = 0; nodeState < states.size(); ++nodeState)
177  // {
178  // posteriorProbabilities[nodeId][nodeState] = fractionalProbabilities_[nodeId][nodeState] / nodeDataProb;
179  // }
180  // }
181 
182  // /* Assign states to internal nodes based on the majority rule over the posterior probabilities */
183  shared_ptr<PhyloTree> expectedMapping(make_shared<PhyloTree>(*tree_));
184 
185  // setLeafsStates(expectedMapping);
186  // setExpectedAncestrals(expectedMapping, posteriorProbabilities);
187 
188  // /* Compute the reward per state per site - expect two entries per site (that is, two entries in total).
189  // Let r0 be the reward of state 0 and r1 the reward of state 1. */
190  // UserAlphabetIndex1* alpha = new UserAlphabetIndex1(tl_->getAlphabet());
191  // DiscreteDistribution* rDist = new ConstantRateDistribution();
192  // TransitionModel* tlModel = tl_->getModelForSite(0, 0)->clone();
193  // DRTreeLikelihood* drtl = new DRHomogeneousTreeLikelihood(*baseTree_, *(tl_->getData()), tlModel, rDist, false);
194  // drtl->initialize();
195  // vector<int> ids = baseTree_->getNodesId();
196  // const SubstitutionModel* model = dynamic_cast<const SubstitutionModel*>(tl_->getModelForSite(0, 0));
197 
198  // /* Compute the expected dwelling times per branch and state as follows:
199  // For branch b of length t, the average welling time in state 0 is r0*t (based on Minin and Suchard paper).
200  // The average dwelling time in state 1 should complement to t (make sure of it!) */
201  // vector<Node*> nodes = dynamic_cast<TreeTemplate<Node>*>(expectedMapping)->getNodes();
202  // double branchLength;
203  // Node* node;
204  // VVDouble expectedDwellingTimes;
205  // expectedDwellingTimes.clear();
206  // expectedDwellingTimes.resize(nodes.size(), VDouble(states.size()));
207  // for (size_t s = 0; s < states.size(); ++s)
208  // {
209  // alpha->setIndex(states[s], 1); // set the reward of the state as 1 and the reward for the rest of the states as 0
210  // for (size_t m = 0; m < states.size(); ++m)
211  // {
212  // if (m != s)
213  // {
214  // alpha->setIndex(states[m], 0); //Note@Laurent (Julien 17/06/20): can you check my correction there and above? I changed s/m to states[s] and states[m], is that correct?
215  // }
216  // }
217  // unique_ptr<Reward> reward(new DecompositionReward(model, alpha));
218  // unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(*drtl, ids, *reward, false));
219  // for (size_t n = 0; n < nodes.size(); ++n)
220  // {
221  // node = nodes[n];
222  // if (node->hasFather()) // for any node except to the root
223  // {
224  // expectedDwellingTimes[static_cast<size_t>(node->getId())][s] = mapping->getReward(node->getId(), 0); //Note@Laurent (Julien 17/06/20): what is nodeId is negative?
225  // }
226  // }
227  // }
228 
229  // // standardize expected dwelling itmes, if needed, and update the mapping accorgingly
230  // double sumOfDwellingTimes;
231  // bool updateBranch;
232  // nodesCounter_ = dynamic_cast<TreeTemplate<Node>*>(baseTree_)->getNodes().size() - 1;
233  // for (size_t n = 0; n < nodes.size(); ++n)
234  // {
235  // node = nodes[n];
236  // if (node->hasFather()) // for any node except to the root
237  // {
238  // branchLength = node->getDistanceToFather();
239  // sumOfDwellingTimes = 0;
240  // updateBranch = true;
241  // for (size_t s = 0; s < states.size(); ++s)
242  // {
243  // if (expectedDwellingTimes[static_cast<size_t>(node->getId())][s] == 0) //Note@Laurent (Julien 17/06/20): what is nodeId is negative?
244 
245  // {
246  // updateBranch = false;
247  // }
248  // sumOfDwellingTimes = sumOfDwellingTimes + expectedDwellingTimes[static_cast<size_t>(node->getId())][s]; //Note@Laurent (Julien 17/06/20): what is nodeId is negative?
249 
250  // }
251 
252  // if (branchLength < 0.00001) // branch length is 0 -> no need to update mapping on the branch
253  // {
254  // node->setDistanceToFather(branchLength);
255  // updateBranch = false;
256  // }
257  // else
258  // {
259  // if (sumOfDwellingTimes != branchLength)
260  // {
261  // for (size_t s = 0; s < states.size(); ++s)
262  // {
263  // expectedDwellingTimes[static_cast<size_t>(node->getId())][s] = branchLength * (expectedDwellingTimes[static_cast<size_t>(node->getId())][s]) / sumOfDwellingTimes;
264  // }
265  // }
266  // }
267 
268  // if (updateBranch)
269  // {
270  // updateBranchByDwellingTimes(node, expectedDwellingTimes[static_cast<size_t>(node->getId())], posteriorProbabilities, divMethod);
271  // }
272  // }
273  // }
274  // nodesCounter_ = dynamic_cast<TreeTemplate<Node>*>(baseTree_)->getNodes().size() - 1;
275 
276  // /* free the resources */
277  // delete alpha;
278  // delete rDist;
279  // delete tlModel;
280  // delete drtl;
281 
282  return expectedMapping;
283 }
284 /******************************************************************************/
285 
287 {
288  return (dynamic_cast<const BppInteger*>(node->getProperty(STATE)))->getValue();
289 }
290 
291 /******************************************************************************/
292 
294 {
295  BppInteger stateProperty(static_cast<int>(state));
296  node->setProperty(STATE, stateProperty);
297 }
298 
299 /******************************************************************************/
300 
302 {
303  auto nodes = tree.getAllInnerNodes();
304  for (auto& node:nodes)
305  {
306  if (!node->hasName())
307  node->setName("_baseInternal_" + TextTools::toString(tree.getNodeIndex(node)));
308  }
309 }
310 
311 /******************************************************************************/
312 
313 void StochasticMapping::setLeafsStates(std::shared_ptr<PhyloTree> mapping)
314 {
315  // auto leafsStates = likelihood_->getData();
316  auto leaves = mapping->getAllLeaves();
317 
318  for (auto& leaf: leaves)
319  {
320  string nodeName = leaf->getName();
321 // auto lstates = likelihoods_->getNode(mapping->getNodeIndex(leaf))
322 // size_t leafState = static_cast<size_t>(tl_->getAlphabetStateAsInt(leafsStates->getSequence(nodeName).getValue(0)));
323 // //note@Laurent (Julien on 17/06/20): I thing the above line is incorrect, in particulat the use of the getAlphabetStateAsInt function. It is supposed to take as input a state index (size_t) and return the corresponding character state as an integer. Here you give as input to the method already a sequence character (integer). In most cases that will still work as the characters states for resolved characters are usually 0..n, and there corresponding states 0..n. But it will fail for models with gaps (character state -1) and Markov modulated models (character states 0..n, but state index 0..k*n)
324 // setNodeState(node, leafState);
325 // }
326  }
327 }
328 
329 /******************************************************************************/
330 
331 
333 {
334  // // some auxiliiary variables
335  // size_t statesNum = tl_->getNumberOfStates();
336  // const TransitionModel* model = tl_->getModelForSite(0, 0); // this calls assumes that all the sites and all the branches are assoiacted with the same node
337  // const SiteContainer* leafsStates = tl_->getData();
338  // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(baseTree_);
339  // vector<Node*> nodes = ttree->getNodes();
340 
341  // // compute the fractional probabilities according to Felsenstein prunnig algorithm: for each node nodes[i] and state s compute: P(Data[leafs under node[i]]|node[i] has state s]
342  // for (size_t i = 0; i < nodes.size(); ++i) // traverse the tree in post-order
343  // {
344  // int nodeId = nodes[i]->getId();
345  // string nodeName = nodes[i]->getName();
346  // if (nodes[i]->isLeaf()) // if the node is a leaf, set the fractional probability of its state to 1, and the rest to 0
347  // {
348  // size_t leafState = static_cast<int>(tl_->getAlphabetStateAsInt(leafsStates->getSequence(nodeName).getValue(0)));
349  // for (size_t nodeState = 0; nodeState < statesNum; ++nodeState)
350  // {
351  // if (nodeState != leafState)
352  // {
353  // fractionalProbabilities_[nodeId][nodeState] = 0;
354  // }
355  // else
356  // {
357  // fractionalProbabilities_[nodeId][nodeState] = 1;
358  // }
359  // }
360  // }
361  // else // if the node is internal, follow the Felesenstein computation rule to compute the fractional probability
362  // {
363  // for (size_t nodeState = 0; nodeState < statesNum; ++nodeState)
364  // {
365  // double fullProb = 1;
366  // for (size_t j = 0; j < (nodes[i]->getNumberOfSons()); ++j) // for each son of the node, sum over the probabilities of all its assignments given its father's state (i.e, nodeState)
367  // {
368  // double sonProb = 0;
369  // double bl = nodes[i]->getSon(j)->getDistanceToFather();
370  // for (size_t sonState = 0; sonState < statesNum; ++sonState)
371  // {
372  // sonProb += model->Pij_t(nodeState, sonState, bl) * fractionalProbabilities_[nodes[i]->getSon(j)->getId()][sonState];
373  // }
374  // fullProb *= sonProb;
375  // }
376  // fractionalProbabilities_[nodeId][nodeState] = fullProb;
377  // }
378  // }
379  // }
380 }
381 
382 /******************************************************************************/
383 
385 {
386  // // some auxiliiary variables
387  // size_t statesNum = tl_->getNumberOfStates();
388  // VDouble rootProbabilities = tl_->getRootFrequencies(0);
389  // const TransitionModel* model = tl_->getModelForSite(0, 0);
390  // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(baseTree_);
391  // vector<Node*> nodes = ttree->getNodes(); // here - how many nodes? only 4! what happened?
392 
393  // /* compute the fractional probabilities for each node and state */
394  // fractionalProbabilities_.clear();
395  // fractionalProbabilities_.resize(nodes.size(), VDouble(statesNum));
396  // computeFractionals();
397 
398  // /* compute the conditional probabilities: for each combination of nodes son, father, compute Pr(son receives sonState | father has fatherState) */
399  // ConditionalProbabilities_.clear();
400  // ConditionalProbabilities_.resize((nodes.size()));
401 
402  // for (size_t i = 0; i < nodes.size(); ++i)
403  // {
404  // if (!nodes[i]->isLeaf() || !nodes[i]->hasFather()) // the second condition will catch the root even if it has a single child (in which case, isLeaf() returns true)
405  // {
406  // int nodeId = nodes[i]->getId();
407  // ConditionalProbabilities_[nodes[i]->getId()].resize(statesNum, VDouble(statesNum)); // use aux variable numOfStates
408  // if (!(nodes[i]->hasFather())) // if the node is the root -> set the conditional probability to be same for all "fatherStates"
409  // {
410  // double sum = 0.0;
411  // for (size_t sonState = 0; sonState < statesNum; ++sonState)
412  // {
413  // double stateConditionalNominator = fractionalProbabilities_[nodeId][sonState] * rootProbabilities[sonState];
414  // for (size_t fatherState = 0; fatherState < statesNum; ++fatherState)
415  // {
416  // ConditionalProbabilities_[nodeId][fatherState][sonState] = stateConditionalNominator;
417  // }
418  // sum += stateConditionalNominator;
419  // }
420  // for (size_t fatherState = 0; fatherState < statesNum; ++fatherState)
421  // {
422  // for (size_t sonState = 0; sonState < statesNum; ++sonState)
423  // {
424  // ConditionalProbabilities_[nodeId][fatherState][sonState] /= sum;
425  // }
426  // }
427  // }
428  // else // else -> follow equation (10) from the paper to compute the conditional assignment probabilities given the ones of his father
429  // {
430  // for (size_t fatherState = 0; fatherState < statesNum; ++fatherState)
431  // {
432  // double sum = 0.0;
433  // for (size_t sonState = 0; sonState < statesNum; ++sonState)
434  // {
435  // double stateConditionalNominator = fractionalProbabilities_[nodes[i]->getId()][sonState] * model->Pij_t(fatherState, sonState, nodes[i]->getDistanceToFather());
436  // ConditionalProbabilities_[nodeId][fatherState][sonState] = stateConditionalNominator;
437  // sum += stateConditionalNominator;
438  // }
439  // for (size_t sonState = 0; sonState < statesNum; ++sonState)
440  // {
441  // ConditionalProbabilities_[nodeId][fatherState][sonState] /= sum;
442  // }
443  // }
444  // }
445  // }
446  // }
447 }
448 
449 /******************************************************************************/
450 
451 void StochasticMapping::computeStatesFrequencies(VVDouble& ancestralStatesFrequencies, vector<shared_ptr<PhyloTree>>& mappings)
452 {
453  // // some auxiliiary variables
454  // size_t statesNum = tl_->getNumberOfStates();
455  // const SiteContainer* leafsStates = tl_->getData();
456  // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(baseTree_);
457  // vector<Node*> nodes = ttree->getNodes();
458 
459  // // compute the node assignment probabilities based on their frequency in the mappings
460  // for (size_t i = 0; i < nodes.size(); ++i)
461  // {
462  // Node* node = nodes[i];
463  // int nodeId = node->getId();
464  // string nodeName = node->getName();
465  // // in leafs - don't iterate to save time, as the frequency of a state is either 0 or 1 based on the known character data
466  // if (node->isLeaf())
467  // {
468  // size_t leafState = static_cast<int>(tl_->getAlphabetStateAsInt(leafsStates->getSequence(nodeName).getValue(0)));
469  // for (size_t nodeState = 0; nodeState < statesNum; ++nodeState)
470  // {
471  // if (nodeState != leafState)
472  // {
473  // ancestralStatesFrequencies[nodeId][nodeState] = 0;
474  // }
475  // else
476  // {
477  // ancestralStatesFrequencies[nodeId][nodeState] = 1;
478  // }
479  // }
480  // }
481  // else
482  // {
483  // // else, go over all the mappings and collect the number of states assignment per state
484  // fill(ancestralStatesFrequencies[nodeId].begin(), ancestralStatesFrequencies[nodeId].end(), 0); // reset all the values to 0
485  // for (size_t h = 0; h < mappings.size(); ++h)
486  // {
487  // Node* nodeInMapping = dynamic_cast<TreeTemplate<Node>*>(mappings[h])->getNode(nodeName);
488  // ancestralStatesFrequencies[nodeId][static_cast<size_t>(getNodeState(nodeInMapping))]++; //Note@Laurent (Julien 17/06/20): assuming node state is positive, is that so?
489  // }
490  // // now divide the vector entries by the number of mappings
491  // for (size_t nodeState = 0; nodeState < statesNum; ++nodeState)
492  // {
493  // ancestralStatesFrequencies[nodeId][nodeState] = ancestralStatesFrequencies[nodeId][nodeState] / static_cast<int>(mappings.size());
494  // }
495  // }
496  // }
497 }
498 
499 
500 /******************************************************************************/
501 
502 size_t StochasticMapping::sampleState(const VDouble& distibution)
503 {
504  size_t state = 0; // the default state is 0
505  // double prob = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.0);
506 
507  // for (size_t i = 0; i < distibution.size(); ++i)
508  // {
509  // prob -= distibution[i];
510  // if (prob < 0) // if the the sampled probability is smaller than the probability to choose state i -> set state to be i
511  // {
512  // state = i;
513  // break;
514  // }
515  // }
516  return state;
517 }
518 
519 /******************************************************************************/
520 
521 void StochasticMapping::sampleAncestrals(shared_ptr<PhyloTree> mapping)
522 {
523  // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(mapping);
524  // PreOrderTreeIterator* treeIt = new PreOrderTreeIterator(*ttree);
525  // for (Node* node = treeIt->begin(); node != treeIt->end(); node = treeIt->next())
526  // {
527  // if (!node->isLeaf())
528  // {
529  // int nodeId = node->getId();
530  // if (!node->hasFather())
531  // {
532  // size_t rootState = sampleState(ConditionalProbabilities_[nodeId][0]); // set father state to 0 (all the entries in the fatherState level are the same anyway)
533  // setNodeState(node, rootState);
534  // }
535  // else
536  // {
537  // int fatherState = getNodeState(node->getFather());
538  // size_t sonState = sampleState(ConditionalProbabilities_[nodeId][fatherState]);
539  // setNodeState(node, sonState); // in the case of a leaf, the assigned state must be sampled
540  // }
541  // }
542  // }
543  // delete(treeIt);
544 }
545 
546 /******************************************************************************/
547 
548 void StochasticMapping::sampleMutationsGivenAncestrals(shared_ptr<PhyloTree> mapping)
549 {
550 // TreeTemplate<Node>* ttree = dynamic_cast<TreeTemplate<Node>*>(mapping);
551 // vector<Node*> nodes = ttree->getNodes();
552 // nodesCounter_ = nodes.size() - 1; // initialize nodesCounter_ according to the number of nodes in the base tree
553 // for (size_t i = 0; i < nodes.size(); ++i)
554 // {
555 // Node* son = nodes[i];
556 // if (son->hasFather())
557 // {
558 // sampleMutationsGivenAncestralsPerBranch(son);
559 // }
560 // }
561 }
562 
563 /******************************************************************************/
564 
566 {
567  // const vector<size_t> states = branchMapping.getStates();
568  // const VDouble times = branchMapping.getTimes();
569  // Node* curNode = son;
570  // Node* nextNode;
571  // int eventsNum = static_cast<int>(branchMapping.getNumberOfEvents());
572 
573  // if (eventsNum == 0) // if there are no events >-return nothing
574  // return;
575  // else
576  // {
577  // for (int i = eventsNum - 1; i > -1; --i) // add a new node to represent the transition
578  // {
579  // nodesCounter_ = nodesCounter_ + 1;
580  // const string name = "_mappingInternal" + TextTools::toString(nodesCounter_) + "_";
581  // nextNode = new Node(static_cast<int>(nodesCounter_), name);
582  // setNodeState(nextNode, states[i]);
583  // nextNode->setDistanceToFather(times[i]);
584 
585  // // set the father to no longer be the father of curNode
586  // Node* originalFather = curNode->getFather();
587  // originalFather->removeSon(curNode); // also removes originalFather as the father of curNode
588  // // set nextNode to be the new father of curNode
589  // curNode->setFather(nextNode); // also adds curNode to the sons of nextNode
590  // // set curNode's original father to be the father of nextNode
591  // nextNode->setFather(originalFather); // also adds nextNode to the sons of originalFather - or not? make sure this is the father at all times
592  // curNode = nextNode;
593  // }
594  // return;
595  // }
596 }
597 
598 /******************************************************************************/
599 
601 {
602  // Node* father = son->getFather();
603  // double branchLength = son->getDistanceToFather();
604  // size_t fatherState = getNodeState(father);
605  // size_t sonState = getNodeState(son);
606 
607  // /* simulate mapping on a branch until you manage to finish at the son's state */
608  // for (size_t i = 0; i < maxIterNum; ++i)
609  // {
610  // double disFromNode = 0.0;
611  // size_t curState = fatherState;
612  // MutationPath tryMapping(mappingParameters_->getSubstitutionModel()->getAlphabet(), fatherState, branchLength);
613 
614  // double timeTillChange;
615  // // if the father's state is not the same as the son's state -> use the correction corresponding to equation (11) in the paper
616  // if (fatherState != sonState)
617  // { // sample timeTillChange conditional on it being smaller than branchLength
618  // double u = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.0);
619  // double waitingTimeParam = -1 * mappingParameters_->getSubstitutionModel()->Qij(fatherState, fatherState); // get the parameter for the exoponential distribution to draw the waiting time from
620  // double tmp = u * (1.0 - exp(branchLength * -waitingTimeParam));
621  // timeTillChange = -log(1.0 - tmp) / waitingTimeParam;
622  // assert (timeTillChange < branchLength);
623  // }
624  // else
625  // {
626  // timeTillChange = mappingParameters_->getTimeBeforeNextMutationEvent(fatherState); // draw the time until a transition from exponential distribution with the rate of leaving fatherState
627  // }
628 
629  // while (disFromNode + timeTillChange < branchLength) // a jump occurred but not passed the whole branch ->
630  // {
631  // tryMapping.addEvent(curState, timeTillChange); // add the current state and time to branch history
632 
633  // disFromNode += timeTillChange;
634  // timeTillChange = mappingParameters_->getTimeBeforeNextMutationEvent(curState); // draw the time until a transition from exponential distribution with the rate of leaving curState
635  // curState = mappingParameters_->mutate(curState); // draw the state to transition to after from initial state curState based on the relative transition rates distribution (see MutationProcess.cpp line 50)
636  // }
637  // // the last jump passed the length of the branch -> finish the simulation and check if it's successful (i.e, mapping is finished at the son's state)
638  // if (curState != sonState) // if the simulation failed, try again
639  // {
640  // continue;
641  // }
642  // else // if the simulation was successfully, add it to the build mapping
643  // {
644  // double timeOfJump = branchLength - disFromNode;
645  // son->setDistanceToFather(timeOfJump);
646  // updateBranchMapping(son, tryMapping); // add the successful simulation to the build mapping
647  // return;
648  // }
649  // }
650  // // if all simulations failed -> throw an exception
651  // throw Exception("could not produce simulations with father = " + TextTools::toString(fatherState) + " son " + TextTools::toString(sonState) + " branch length = " + TextTools::toString(branchLength));
652 }
653 
654 /******************************************************************************/
655 
656 void StochasticMapping::updateBranchByDwellingTimes(PhyloNode* node, VDouble& dwellingTimes, VVDouble& ancestralStatesFrequencies, size_t divMethod)
657 {
658  // /* first, convert the dwelling times vector to a mutation path of the branch */
659  // size_t statesNum = tl_->getNumberOfStates();
660  // size_t sonState = getNodeState(node);
661  // size_t fatherState = getNodeState(node->getFather());
662  // double branchLength = node->getDistanceToFather();
663  // double Pf = 1;
664  // double Ps = 1;
665  // double shareOfFather = 0;
666  // double shareOfSon = 0;
667  // MutationPath branchMapping(mappingParameters_->getSubstitutionModel()->getAlphabet(), fatherState, branchLength);
668  // // set the first event with the dwelling time that matches the state of the father
669  // if (fatherState == sonState)
670  // {
671  // if (divMethod == 0)
672  // {
673  // if (node->hasFather())
674  // {
675  // Pf = ancestralStatesFrequencies[node->getFather()->getId()][fatherState];
676  // }
677  // Ps = 1;
678  // if (!node->isLeaf())
679  // {
680  // Ps = ancestralStatesFrequencies[node->getId()][sonState];
681  // }
682  // shareOfFather = Pf / (Pf + Ps);
683  // branchMapping.addEvent(fatherState, dwellingTimes[fatherState] * shareOfFather);
684  // }
685  // else
686  // {
687  // branchMapping.addEvent(fatherState, 0);
688  // }
689  // }
690  // else
691  // {
692  // branchMapping.addEvent(fatherState, dwellingTimes[fatherState]);
693  // }
694  // // set all events except for the one entering the son
695  // for (size_t state = 0; state < statesNum; ++state)
696  // {
697  // if (state != fatherState && state != sonState && dwellingTimes[state] > 0) // if the state matches an event which is not the first or the last -> add it
698  // {
699  // branchMapping.addEvent(state, dwellingTimes[state]);
700  // }
701  // }
702  // // change the length of the branch whose bottom node is the son according to the dwelling time of the relevant state
703  // if (fatherState == sonState)
704  // {
705  // if (divMethod == 0)
706  // {
707  // shareOfSon = 1 - shareOfFather;
708  // node->setDistanceToFather(dwellingTimes[sonState] * shareOfSon);
709  // }
710  // else
711  // {
712  // node->setDistanceToFather(dwellingTimes[sonState]);
713  // }
714  // }
715  // else
716  // {
717  // node->setDistanceToFather(dwellingTimes[sonState]);
718  // }
719 
720  // /* secondly, update the expected history with the dwelling times-based mutation path */
721  // updateBranchMapping(node, branchMapping);
722 }
#define STATE
vector< vector< double > > VVDouble
vector< double > VDouble
virtual NodeIndex getNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual std::vector< std::shared_ptr< N > > getAllInnerNodes() const=0
This class is used by MutationProcess to store detailed results of simulations.
Clonable * getProperty(const std::string &name)
Definition: PhyloNode.h:182
void setProperty(const std::string &name, const Clonable &property)
Set/add a node property.
Definition: PhyloNode.h:175
void giveNamesToInternalNodes(PhyloTree &tree)
void updateBranchMapping(PhyloNode *son, const MutationPath &branchMapping)
void setExpectedAncestrals(shared_ptr< PhyloTree > expectedMapping, VVDouble &posteriorProbabilities)
void setNodeState(PhyloNode *node, size_t state)
void sampleAncestrals(shared_ptr< PhyloTree > mapping)
void sampleMutationsGivenAncestralsPerBranch(PhyloNode *son, size_t maxIterNum=10000)
size_t sampleState(const VDouble &distibution)
int getNodeState(const PhyloNode *node) const
std::shared_ptr< PhyloTree > generateAnalyticExpectedMapping(size_t divMethod=0)
Creates a single expected (i.e, average) history based the rewards provided by the algorithm of Minin...
void updateBranchByDwellingTimes(PhyloNode *node, VDouble &dwellingTimes, VVDouble &posteriorProbabilities, size_t divMethod=0)
void computeStatesFrequencies(VVDouble &ancestralStatesFreuquencies, vector< shared_ptr< PhyloTree >> &mappings)
StochasticMapping(std::shared_ptr< LikelihoodCalculationSingleProcess > drl, size_t numOfMappings=10000)
std::shared_ptr< PhyloTree > generateExpectedMapping(std::vector< std::shared_ptr< PhyloTree >> &mappings, size_t divMethod=0)
Creates a single expected (i.e, average) history based on a given set of mappings steps....
void generateStochasticMapping(std::vector< std::shared_ptr< PhyloTree >> &mappings)
void setLeafsStates(std::shared_ptr< PhyloTree > mapping)
std::shared_ptr< PhyloTree > tree_
void sampleMutationsGivenAncestrals(shared_ptr< PhyloTree > mapping)
std::string toString(T t)
Defines the basic types of data flow nodes.