bpp-phyl3  3.0.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include "../../Mapping/UniformizationSubstitutionCount.h"
7 #include "../../Mapping/DecompositionReward.h"
9 #include "RewardMappingTools.h"
10 #include "../Likelihood/DRTreeLikelihoodTools.h"
11 #include "../Likelihood/MarginalAncestralStateReconstruction.h"
12 
13 #include <Bpp/Text/TextTools.h>
16 #include <Bpp/Numeric/DataTable.h>
18 
19 using namespace bpp;
20 
21 // From the STL:
22 #include <iomanip>
23 
24 using namespace std;
25 
26 /******************************************************************************/
27 
28 unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectors(
29  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
30  const vector<int>& nodeIds,
31  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
32  bool verbose)
33 {
34  // Preamble:
35  if (!drtl->isInitialized())
36  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
37 
38  // A few variables we'll need:
39 
40  const TreeTemplate<Node> tree(drtl->tree());
41  auto& sequences = drtl->data();
42  auto& rDist = drtl->rateDistribution();
43 
44  size_t nbSites = sequences.getNumberOfSites();
45  size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
46  size_t nbStates = sequences.alphabet().getSize();
47  size_t nbClasses = rDist.getNumberOfCategories();
48  size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
49  vector<const Node*> nodes = tree.getNodes();
50  const auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
51  nodes.pop_back(); // Remove root node.
52  size_t nbNodes = nodes.size();
53 
54  // We create a new ProbabilisticSubstitutionMapping object:
55  auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
56 
57  // Store likelihood for each rate for each site:
58  VVVdouble lik;
59  drtl->computeLikelihoodAtNode(tree.getRootId(), lik);
60  Vdouble Lr(nbDistinctSites, 0);
61  Vdouble rcProbs = rDist.getProbabilities();
62  Vdouble rcRates = rDist.getCategories();
63  for (size_t i = 0; i < nbDistinctSites; i++)
64  {
65  VVdouble* lik_i = &lik[i];
66  for (size_t c = 0; c < nbClasses; c++)
67  {
68  Vdouble* lik_i_c = &(*lik_i)[c];
69  double rc = rDist.getProbability(c);
70  for (size_t s = 0; s < nbStates; s++)
71  {
72  Lr[i] += (*lik_i_c)[s] * rc;
73  }
74  }
75  }
76 
77  // Compute the number of substitutions for each class and each branch in the tree:
78  if (verbose)
79  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
80 
81  for (size_t l = 0; l < nbNodes; ++l)
82  {
83  // For each node,
84  const Node* currentNode = nodes[l];
85  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
86  continue;
87 
88  const Node* father = currentNode->getFather();
89 
90  double d = currentNode->getDistanceToFather();
91 
92  if (verbose)
93  ApplicationTools::displayGauge(l, nbNodes - 1);
94  VVdouble substitutionsForCurrentNode(nbDistinctSites);
95  for (size_t i = 0; i < nbDistinctSites; ++i)
96  {
97  substitutionsForCurrentNode[i].resize(nbTypes);
98  }
99 
100  // Now we've got to compute likelihoods in a smart manner... ;)
101  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
102  for (size_t i = 0; i < nbDistinctSites; i++)
103  {
104  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
105  likelihoodsFatherConstantPart_i->resize(nbClasses);
106  for (size_t c = 0; c < nbClasses; c++)
107  {
108  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
109  likelihoodsFatherConstantPart_i_c->resize(nbStates);
110  double rc = rDist.getProbability(c);
111  for (size_t s = 0; s < nbStates; s++)
112  {
113  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
114  // freq is already accounted in the array
115  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
116  }
117  }
118  }
119 
120  // First, what will remain constant:
121  size_t nbSons = father->getNumberOfSons();
122  for (size_t n = 0; n < nbSons; n++)
123  {
124  const Node* currentSon = father->getSon(n);
125  if (currentSon->getId() != currentNode->getId())
126  {
127  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
128 
129  // Now iterate over all site partitions:
130  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
131  VVVdouble pxy;
132  bool first;
133  while (mit->hasNext())
134  {
135  auto bmd = mit->next();
136  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
137  first = true;
138  while (sit->hasNext())
139  {
140  size_t i = sit->next();
141  // We retrieve the transition probabilities for this site partition:
142  if (first)
143  {
144  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
145  first = false;
146  }
147  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
148  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
149  for (size_t c = 0; c < nbClasses; c++)
150  {
151  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
152  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
153  VVdouble& pxy_c = pxy[c];
154  for (size_t x = 0; x < nbStates; x++)
155  {
156  Vdouble& pxy_c_x = pxy_c[x];
157  double likelihood = 0.;
158  for (size_t y = 0; y < nbStates; y++)
159  {
160  likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
161  }
162  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
163  }
164  }
165  }
166  }
167  }
168  }
169  if (father->hasFather())
170  {
171  const Node* currentSon = father->getFather();
172  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
173  // Now iterate over all site partitions:
174  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
175  VVVdouble pxy;
176  bool first;
177  while (mit->hasNext())
178  {
179  auto bmd = mit->next();
180  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
181  first = true;
182  while (sit->hasNext())
183  {
184  size_t i = sit->next();
185  // We retrieve the transition probabilities for this site partition:
186  if (first)
187  {
188  pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
189  first = false;
190  }
191  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
192  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
193  for (size_t c = 0; c < nbClasses; c++)
194  {
195  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
196  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
197  VVdouble& pxy_c = pxy[c];
198  for (size_t x = 0; x < nbStates; x++)
199  {
200  double likelihood = 0.;
201  for (size_t y = 0; y < nbStates; y++)
202  {
203  Vdouble& pxy_c_x = pxy_c[y];
204  likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
205  }
206  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
207  }
208  }
209  }
210  }
211  }
212  else
213  {
214  // Account for root frequencies:
215  for (size_t i = 0; i < nbDistinctSites; i++)
216  {
217  vector<double> freqs = drtl->getRootFrequencies(i);
218  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
219  for (size_t c = 0; c < nbClasses; c++)
220  {
221  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
222  for (size_t x = 0; x < nbStates; x++)
223  {
224  likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
225  }
226  }
227  }
228  }
229 
230 
231  // Then, we deal with the node of interest.
232  // We first average upon 'y' to save computations, and then upon 'x'.
233  // ('y' is the state at 'node' and 'x' the state at 'father'.)
234 
235  // Iterate over all site partitions:
236  const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
237  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
238  VVVdouble pxy;
239  bool first;
240  while (mit->hasNext())
241  {
242  auto bmd = mit->next();
243  substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
244  // compute all nxy first:
245  VVVVdouble nxy(nbClasses);
246  for (size_t c = 0; c < nbClasses; ++c)
247  {
248  VVVdouble& nxy_c = nxy[c];
249  double rc = rcRates[c];
250  nxy_c.resize(nbTypes);
251  for (size_t t = 0; t < nbTypes; ++t)
252  {
253  VVdouble& nxy_c_t = nxy_c[t];
254  auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
255 
256  nxy_c_t.resize(nbStates);
257  for (size_t x = 0; x < nbStates; ++x)
258  {
259  Vdouble& nxy_c_t_x = nxy_c_t[x];
260  nxy_c_t_x.resize(nbStates);
261  for (size_t y = 0; y < nbStates; ++y)
262  {
263  nxy_c_t_x[y] = (*nijt)(x, y);
264  }
265  }
266  }
267  }
268 
269  // Now loop over sites:
270  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
271  first = true;
272  while (sit->hasNext())
273  {
274  size_t i = sit->next();
275  // We retrieve the transition probabilities and substitution counts for this site partition:
276  if (first)
277  {
278  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
279  first = false;
280  }
281  const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
282  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
283  for (size_t c = 0; c < nbClasses; ++c)
284  {
285  const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
286  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
287  const VVdouble& pxy_c = pxy[c];
288  VVVdouble& nxy_c = nxy[c];
289  for (size_t x = 0; x < nbStates; ++x)
290  {
291  double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
292  const Vdouble& pxy_c_x = pxy_c[x];
293  for (size_t y = 0; y < nbStates; ++y)
294  {
295  double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
296  * pxy_c_x[y]
297  * likelihoodsFather_node_i_c[y];
298 
299  for (size_t t = 0; t < nbTypes; ++t)
300  {
301  // Now the vector computation:
302  substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
303  // <------------> <------------>
304  // Posterior probability | |
305  // for site i and rate class c * | |
306  // likelihood for this site----------------+ |
307  // |
308  // Substitution function for site i and rate class c--------+
309  }
310  }
311  }
312  }
313  }
314  }
315 
316  // Now we just have to copy the substitutions into the result vector:
317  for (size_t i = 0; i < nbSites; ++i)
318  {
319  for (size_t t = 0; t < nbTypes; ++t)
320  {
321  (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
322  }
323  }
324  }
325  if (verbose)
326  {
330  }
331 
332  return substitutions;
333 }
334 
335 /******************************************************************************/
336 
337 unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectors(
338  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
339  const SubstitutionModelSet& modelSet,
340  const vector<int>& nodeIds,
341  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
342  bool verbose)
343 {
344  // Preamble:
345  if (!drtl->isInitialized())
346  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
347 
348  // A few variables we'll need:
349 
350  const TreeTemplate<Node> tree(drtl->tree());
351  auto& sequences = drtl->data();
352  auto& rDist = drtl->rateDistribution();
353 
354  size_t nbSites = sequences.getNumberOfSites();
355  size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
356  size_t nbStates = sequences.alphabet().getSize();
357  size_t nbClasses = rDist.getNumberOfCategories();
358  size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
359  vector<const Node*> nodes = tree.getNodes();
360  auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
361  nodes.pop_back(); // Remove root node.
362  size_t nbNodes = nodes.size();
363 
364  // We create a new ProbabilisticSubstitutionMapping object:
365  auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
366 
367  // Store likelihood for each rate for each site:
368  VVVdouble lik;
369  drtl->computeLikelihoodAtNode(tree.getRootId(), lik);
370  Vdouble Lr(nbDistinctSites, 0);
371  Vdouble rcProbs = rDist.getProbabilities();
372  Vdouble rcRates = rDist.getCategories();
373  for (size_t i = 0; i < nbDistinctSites; i++)
374  {
375  VVdouble* lik_i = &lik[i];
376  for (size_t c = 0; c < nbClasses; c++)
377  {
378  Vdouble* lik_i_c = &(*lik_i)[c];
379  double rc = rDist.getProbability(c);
380  for (size_t s = 0; s < nbStates; s++)
381  {
382  Lr[i] += (*lik_i_c)[s] * rc;
383  }
384  }
385  }
386 
387  // Compute the number of substitutions for each class and each branch in the tree:
388  if (verbose)
389  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
390 
391  for (size_t l = 0; l < nbNodes; ++l)
392  {
393  // For each node,
394  const Node* currentNode = nodes[l];
395  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
396  continue;
397 
398  const Node* father = currentNode->getFather();
399 
400  double d = currentNode->getDistanceToFather();
401 
402  if (verbose)
403  ApplicationTools::displayGauge(l, nbNodes - 1);
404  VVdouble substitutionsForCurrentNode(nbDistinctSites);
405  for (size_t i = 0; i < nbDistinctSites; ++i)
406  {
407  substitutionsForCurrentNode[i].resize(nbTypes);
408  }
409 
410  // Now we've got to compute likelihoods in a smart manner... ;)
411  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
412  for (size_t i = 0; i < nbDistinctSites; i++)
413  {
414  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
415  likelihoodsFatherConstantPart_i.resize(nbClasses);
416  for (size_t c = 0; c < nbClasses; c++)
417  {
418  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
419  likelihoodsFatherConstantPart_i_c.resize(nbStates);
420  double rc = rDist.getProbability(c);
421  for (size_t s = 0; s < nbStates; s++)
422  {
423  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
424  // freq is already accounted in the array
425  likelihoodsFatherConstantPart_i_c[s] = rc;
426  }
427  }
428  }
429 
430  // First, what will remain constant:
431  size_t nbSons = father->getNumberOfSons();
432  for (size_t n = 0; n < nbSons; n++)
433  {
434  const Node* currentSon = father->getSon(n);
435  if (currentSon->getId() != currentNode->getId())
436  {
437  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
438 
439  // Now iterate over all site partitions:
440  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
441  VVVdouble pxy;
442  bool first;
443  while (mit->hasNext())
444  {
445  auto bmd = mit->next();
446  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
447  first = true;
448  while (sit->hasNext())
449  {
450  size_t i = sit->next();
451  // We retrieve the transition probabilities for this site partition:
452  if (first)
453  {
454  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
455  first = false;
456  }
457  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
458  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
459  for (size_t c = 0; c < nbClasses; c++)
460  {
461  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
462  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
463  VVdouble& pxy_c = pxy[c];
464  for (size_t x = 0; x < nbStates; x++)
465  {
466  Vdouble& pxy_c_x = pxy_c[x];
467  double likelihood = 0.;
468  for (size_t y = 0; y < nbStates; y++)
469  {
470  likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
471  }
472  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
473  }
474  }
475  }
476  }
477  }
478  }
479  if (father->hasFather())
480  {
481  const Node* currentSon = father->getFather();
482  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
483  // Now iterate over all site partitions:
484  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
485  VVVdouble pxy;
486  bool first;
487  while (mit->hasNext())
488  {
489  auto bmd = mit->next();
490  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
491  first = true;
492  while (sit->hasNext())
493  {
494  size_t i = sit->next();
495  // We retrieve the transition probabilities for this site partition:
496  if (first)
497  {
498  pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
499  first = false;
500  }
501  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
502  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
503  for (size_t c = 0; c < nbClasses; c++)
504  {
505  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
506  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
507  VVdouble& pxy_c = pxy[c];
508  for (size_t x = 0; x < nbStates; x++)
509  {
510  double likelihood = 0.;
511  for (size_t y = 0; y < nbStates; y++)
512  {
513  Vdouble& pxy_c_x = pxy_c[y];
514  likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
515  }
516  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
517  }
518  }
519  }
520  }
521  }
522  else
523  {
524  // Account for root frequencies:
525  for (size_t i = 0; i < nbDistinctSites; i++)
526  {
527  vector<double> freqs = drtl->getRootFrequencies(i);
528  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
529  for (size_t c = 0; c < nbClasses; c++)
530  {
531  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
532  for (size_t x = 0; x < nbStates; x++)
533  {
534  likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
535  }
536  }
537  }
538  }
539 
540 
541  // Then, we deal with the node of interest.
542  // We first average upon 'y' to save computations, and then upon 'x'.
543  // ('y' is the state at 'node' and 'x' the state at 'father'.)
544 
545  // Iterate over all site partitions:
546  const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
547  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
548  VVVdouble pxy;
549  bool first;
550  while (mit->hasNext())
551  {
552  auto bmd = mit->next();
553  substitutionCount->setSubstitutionModel(modelSet.getSubstitutionModelForNode(currentNode->getId()));
554 
555  // compute all nxy first:
556  VVVVdouble nxy(nbClasses);
557  for (size_t c = 0; c < nbClasses; ++c)
558  {
559  VVVdouble& nxy_c = nxy[c];
560  double rc = rcRates[c];
561  nxy_c.resize(nbTypes);
562  for (size_t t = 0; t < nbTypes; ++t)
563  {
564  VVdouble& nxy_c_t = nxy_c[t];
565  auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
566  nxy_c_t.resize(nbStates);
567  for (size_t x = 0; x < nbStates; ++x)
568  {
569  Vdouble& nxy_c_t_x = nxy_c_t[x];
570  nxy_c_t_x.resize(nbStates);
571  for (size_t y = 0; y < nbStates; ++y)
572  {
573  nxy_c_t_x[y] = (*nijt)(x, y);
574  }
575  }
576  }
577  }
578 
579  // Now loop over sites:
580  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
581  first = true;
582  while (sit->hasNext())
583  {
584  size_t i = sit->next();
585  // We retrieve the transition probabilities and substitution counts for this site partition:
586  if (first)
587  {
588  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
589  first = false;
590  }
591  const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
592  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
593  for (size_t c = 0; c < nbClasses; ++c)
594  {
595  const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
596  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
597  const VVdouble& pxy_c = pxy[c];
598  VVVdouble& nxy_c = nxy[c];
599  for (size_t x = 0; x < nbStates; ++x)
600  {
601  double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
602  const Vdouble& pxy_c_x = pxy_c[x];
603  for (size_t y = 0; y < nbStates; ++y)
604  {
605  double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
606  * pxy_c_x[y]
607  * likelihoodsFather_node_i_c[y];
608 
609  for (size_t t = 0; t < nbTypes; ++t)
610  {
611  // Now the vector computation:
612  substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
613  // <------------> <------------>
614  // Posterior probability | |
615  // for site i and rate class c * | |
616  // likelihood for this site----------------+ |
617  // |
618  // Substitution function for site i and rate class c---------+
619  }
620  }
621  }
622  }
623  }
624  }
625 
626  // Now we just have to copy the substitutions into the result vector:
627  for (size_t i = 0; i < nbSites; ++i)
628  {
629  for (size_t t = 0; t < nbTypes; ++t)
630  {
631  (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
632  }
633  }
634  }
635  if (verbose)
636  {
640  }
641 
642  return substitutions;
643 }
644 
645 /**************************************************************************************************/
646 
647 unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(
648  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
649  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
650  bool verbose)
651 {
652  // Preamble:
653  if (!drtl->isInitialized())
654  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(). Likelihood object is not initialized.");
655 
656  // A few variables we'll need:
657  const TreeTemplate<Node> tree(drtl->tree());
658  auto& sequences = drtl->data();
659  auto& rDist = drtl->rateDistribution();
660 
661  size_t nbSites = sequences.getNumberOfSites();
662  size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
663  size_t nbStates = sequences.alphabet().getSize();
664  size_t nbClasses = rDist.getNumberOfCategories();
665  size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
666  vector<const Node*> nodes = tree.getNodes();
667  const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
668  nodes.pop_back(); // Remove root node.
669  size_t nbNodes = nodes.size();
670 
671  // We create a new ProbabilisticSubstitutionMapping object:
672  auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
673 
674  Vdouble rcRates = rDist.getCategories();
675 
676  // Compute the number of substitutions for each class and each branch in the tree:
677  if (verbose)
678  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
679 
680  for (size_t l = 0; l < nbNodes; ++l)
681  {
682  // For each node,
683  const Node* currentNode = nodes[l];
684 
685  const Node* father = currentNode->getFather();
686 
687  double d = currentNode->getDistanceToFather();
688 
689  if (verbose)
690  ApplicationTools::displayGauge(l, nbNodes - 1);
691  VVdouble substitutionsForCurrentNode(nbDistinctSites);
692  for (size_t i = 0; i < nbDistinctSites; ++i)
693  {
694  substitutionsForCurrentNode[i].resize(nbTypes);
695  }
696 
697  // Now we've got to compute likelihoods in a smart manner... ;)
698  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
699  for (size_t i = 0; i < nbDistinctSites; ++i)
700  {
701  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
702  likelihoodsFatherConstantPart_i.resize(nbClasses);
703  for (size_t c = 0; c < nbClasses; ++c)
704  {
705  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
706  likelihoodsFatherConstantPart_i_c.resize(nbStates);
707  double rc = rDist.getProbability(c);
708  for (size_t s = 0; s < nbStates; ++s)
709  {
710  likelihoodsFatherConstantPart_i_c[s] = rc;
711  }
712  }
713  }
714 
715  // First, what will remain constant:
716  size_t nbSons = father->getNumberOfSons();
717  for (size_t n = 0; n < nbSons; ++n)
718  {
719  const Node* currentSon = father->getSon(n);
720  if (currentSon->getId() != currentNode->getId())
721  {
722  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
723 
724  // Now iterate over all site partitions:
725  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
726  VVVdouble pxy;
727  bool first;
728  while (mit->hasNext())
729  {
730  auto bmd = mit->next();
731  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
732  first = true;
733  while (sit->hasNext())
734  {
735  size_t i = sit->next();
736  // We retrieve the transition probabilities for this site partition:
737  if (first)
738  {
739  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
740  first = false;
741  }
742  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
743  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
744  for (size_t c = 0; c < nbClasses; ++c)
745  {
746  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
747  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
748  VVdouble& pxy_c = pxy[c];
749  for (size_t x = 0; x < nbStates; ++x)
750  {
751  Vdouble& pxy_c_x = pxy_c[x];
752  double likelihood = 0.;
753  for (size_t y = 0; y < nbStates; ++y)
754  {
755  likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
756  }
757  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
758  }
759  }
760  }
761  }
762  }
763  }
764  if (father->hasFather())
765  {
766  const Node* currentSon = father->getFather();
767  const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
768  // Now iterate over all site partitions:
769  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
770  VVVdouble pxy;
771  bool first;
772  while (mit->hasNext())
773  {
774  auto bmd = mit->next();
775  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
776  first = true;
777  while (sit->hasNext())
778  {
779  size_t i = sit->next();
780  // We retrieve the transition probabilities for this site partition:
781  if (first)
782  {
783  pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
784  first = false;
785  }
786  const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
787  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
788  for (size_t c = 0; c < nbClasses; ++c)
789  {
790  const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
791  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
792  VVdouble& pxy_c = pxy[c];
793  for (size_t x = 0; x < nbStates; ++x)
794  {
795  double likelihood = 0.;
796  for (size_t y = 0; y < nbStates; ++y)
797  {
798  Vdouble& pxy_c_x = pxy_c[y];
799  likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
800  }
801  likelihoodsFatherConstantPart_i_c[x] *= likelihood;
802  }
803  }
804  }
805  }
806  }
807  else
808  {
809  // Account for root frequencies:
810  for (size_t i = 0; i < nbDistinctSites; ++i)
811  {
812  vector<double> freqs = drtl->getRootFrequencies(i);
813  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
814  for (size_t c = 0; c < nbClasses; ++c)
815  {
816  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
817  for (size_t x = 0; x < nbStates; ++x)
818  {
819  likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
820  }
821  }
822  }
823  }
824 
825  // Then, we deal with the node of interest.
826  // We first average upon 'y' to save computations, and then upon 'x'.
827  // ('y' is the state at 'node' and 'x' the state at 'father'.)
828 
829  // Iterate over all site partitions:
830  const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
831  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
832  VVVdouble pxy;
833  bool first;
834  while (mit->hasNext())
835  {
836  auto bmd = mit->next();
837  substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
838  // compute all nxy first:
839  VVVVdouble nxy(nbClasses);
840  for (size_t c = 0; c < nbClasses; ++c)
841  {
842  double rc = rcRates[c];
843  VVVdouble& nxy_c = nxy[c];
844  nxy_c.resize(nbTypes);
845  for (size_t t = 0; t < nbTypes; ++t)
846  {
847  VVdouble& nxy_c_t = nxy_c[t];
848  nxy_c_t.resize(nbStates);
849  auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
850  for (size_t x = 0; x < nbStates; ++x)
851  {
852  Vdouble& nxy_c_t_x = nxy_c_t[x];
853  nxy_c_t_x.resize(nbStates);
854  for (size_t y = 0; y < nbStates; ++y)
855  {
856  nxy_c_t_x[y] = (*nijt)(x, y);
857  }
858  }
859  }
860  }
861 
862  // Now loop over sites:
863  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
864  first = true;
865  while (sit->hasNext())
866  {
867  size_t i = sit->next();
868  // We retrieve the transition probabilities and substitution counts for this site partition:
869  if (first)
870  {
871  pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
872  first = false;
873  }
874  const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
875  VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
876  RowMatrix<double> pairProbabilities(nbStates, nbStates);
877  MatrixTools::fill(pairProbabilities, 0.);
878  VVVdouble subsCounts(nbStates);
879  for (size_t j = 0; j < nbStates; ++j)
880  {
881  subsCounts[j].resize(nbStates);
882  for (size_t k = 0; k < nbStates; ++k)
883  {
884  subsCounts[j][k].resize(nbTypes);
885  }
886  }
887  for (size_t c = 0; c < nbClasses; ++c)
888  {
889  const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
890  Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
891  const VVdouble& pxy_c = pxy[c];
892  VVVdouble& nxy_c = nxy[c];
893  for (size_t x = 0; x < nbStates; ++x)
894  {
895  double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
896  const Vdouble& pxy_c_x = pxy_c[x];
897  for (size_t y = 0; y < nbStates; ++y)
898  {
899  double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
900  * pxy_c_x[y]
901  * likelihoodsFather_node_i_c[y];
902  pairProbabilities(x, y) += likelihood_cxy; // Sum over all rate classes.
903  for (size_t t = 0; t < nbTypes; ++t)
904  {
905  subsCounts[x][y][t] += likelihood_cxy * nxy_c[t][x][y];
906  }
907  }
908  }
909  }
910  // Now the vector computation:
911  // Here we do not average over all possible pair of ancestral states,
912  // We only consider the one with max likelihood:
913  vector<size_t> xy = MatrixTools::whichMax(pairProbabilities);
914  for (size_t t = 0; t < nbTypes; ++t)
915  {
916  substitutionsForCurrentNode[i][t] += subsCounts[xy[0]][xy[1]][t] / pairProbabilities(xy[0], xy[1]);
917  }
918  }
919  }
920  // Now we just have to copy the substitutions into the result vector:
921  for (size_t i = 0; i < nbSites; i++)
922  {
923  for (size_t t = 0; t < nbTypes; t++)
924  {
925  (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
926  }
927  }
928  }
929  if (verbose)
930  {
934  }
935  return substitutions;
936 }
937 
938 /**************************************************************************************************/
939 
940 unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(
941  shared_ptr<const DRTreeLikelihoodInterface> drtl,
942  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
943  bool verbose)
944 {
945  // Preamble:
946  if (!drtl->isInitialized())
947  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(). Likelihood object is not initialized.");
948 
949  // A few variables we'll need:
950 
951  const TreeTemplate<Node> tree(drtl->tree());
952  auto& sequences = drtl->data();
953  auto& rDist = drtl->rateDistribution();
954  auto alpha = sequences.getAlphabet();
955 
956  size_t nbSites = sequences.getNumberOfSites();
957  size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
958  size_t nbStates = alpha->getSize();
959  size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
960  vector<const Node*> nodes = tree.getNodes();
961  const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
962  nodes.pop_back(); // Remove root node.
963  size_t nbNodes = nodes.size();
964 
965  // We create a new ProbabilisticSubstitutionMapping object:
966  auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
967 
968  // Compute the whole likelihood of the tree according to the specified model:
969 
970  Vdouble rcRates = rDist.getCategories();
971 
972  // Compute the number of substitutions for each class and each branch in the tree:
973  if (verbose)
974  ApplicationTools::displayTask("Compute marginal ancestral states");
976  map<int, vector<size_t>> ancestors = masr.getAllAncestralStates();
977 
978  if (verbose)
980 
981  // Now we just have to compute the substitution vectors:
982  if (verbose)
983  ApplicationTools::displayTask("Compute substitution vectors", true);
984 
985  for (size_t l = 0; l < nbNodes; l++)
986  {
987  const Node* currentNode = nodes[l];
988 
989  const Node* father = currentNode->getFather();
990 
991  double d = currentNode->getDistanceToFather();
992 
993  vector<size_t> nodeStates = ancestors[currentNode->getId()]; // These are not 'true' ancestors ;)
994  vector<size_t> fatherStates = ancestors[father->getId()];
995 
996  // For each node,
997  if (verbose)
998  ApplicationTools::displayGauge(l, nbNodes - 1);
999  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1000  for (size_t i = 0; i < nbDistinctSites; ++i)
1001  {
1002  substitutionsForCurrentNode[i].resize(nbTypes);
1003  }
1004 
1005  // Here, we have no likelihood computation to do!
1006 
1007  // Then, we deal with the node of interest.
1008  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1009  // Iterate over all site partitions:
1010  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
1011  while (mit->hasNext())
1012  {
1013  auto bmd = mit->next();
1014  substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1015  // compute all nxy first:
1016  VVVdouble nxyt(nbTypes);
1017  for (size_t t = 0; t < nbTypes; ++t)
1018  {
1019  nxyt[t].resize(nbStates);
1020  auto nxy = substitutionCount->getAllNumbersOfSubstitutions(d, t + 1);
1021  for (size_t x = 0; x < nbStates; ++x)
1022  {
1023  nxyt[t][x].resize(nbStates);
1024  for (size_t y = 0; y < nbStates; ++y)
1025  {
1026  nxyt[t][x][y] = (*nxy)(x, y);
1027  }
1028  }
1029  }
1030  // Now loop over sites:
1031  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1032  while (sit->hasNext())
1033  {
1034  size_t i = sit->next();
1035  size_t fatherState = fatherStates[i];
1036  size_t nodeState = nodeStates[i];
1037  if (fatherState >= nbStates || nodeState >= nbStates)
1038  for (size_t t = 0; t < nbTypes; ++t)
1039  {
1040  substitutionsForCurrentNode[i][t] = 0;
1041  }// To be conservative! Only in case there are generic characters.
1042  else
1043  for (size_t t = 0; t < nbTypes; ++t)
1044  {
1045  substitutionsForCurrentNode[i][t] = nxyt[t][fatherState][nodeState];
1046  }
1047  }
1048  }
1049 
1050  // Now we just have to copy the substitutions into the result vector:
1051  for (size_t i = 0; i < nbSites; i++)
1052  {
1053  for (size_t t = 0; t < nbTypes; t++)
1054  {
1055  (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1056  }
1057  }
1058  }
1059  if (verbose)
1060  {
1062  *ApplicationTools::message << " ";
1064  }
1065  return substitutions;
1066 }
1067 
1068 /**************************************************************************************************/
1069 
1070 unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectorsMarginal(
1071  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
1072  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
1073  bool verbose)
1074 {
1075  // Preamble:
1076  if (!drtl->isInitialized())
1077  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsMarginal(). Likelihood object is not initialized.");
1078 
1079  // A few variables we'll need:
1080 
1081  const TreeTemplate<Node> tree(drtl->tree());
1082  auto& sequences = drtl->data();
1083  auto& rDist = drtl->rateDistribution();
1084 
1085  size_t nbSites = sequences.getNumberOfSites();
1086  size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
1087  size_t nbStates = sequences.alphabet().getSize();
1088  size_t nbClasses = rDist.getNumberOfCategories();
1089  size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
1090  vector<const Node*> nodes = tree.getNodes();
1091  const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
1092  nodes.pop_back(); // Remove root node.
1093  size_t nbNodes = nodes.size();
1094 
1095  // We create a new ProbabilisticSubstitutionMapping object:
1096  auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
1097 
1098  // Compute the whole likelihood of the tree according to the specified model:
1099 
1100  Vdouble rcProbs = rDist.getProbabilities();
1101  Vdouble rcRates = rDist.getCategories();
1102 
1103  // II) Compute the number of substitutions for each class and each branch in the tree:
1104  if (verbose)
1105  ApplicationTools::displayTask("Compute marginal node-pairs likelihoods", true);
1106 
1107  for (size_t l = 0; l < nbNodes; l++)
1108  {
1109  const Node* currentNode = nodes[l];
1110 
1111  const Node* father = currentNode->getFather();
1112 
1113  double d = currentNode->getDistanceToFather();
1114 
1115  // For each node,
1116  if (verbose)
1117  ApplicationTools::displayGauge(l, nbNodes - 1);
1118  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1119  for (size_t i = 0; i < nbDistinctSites; ++i)
1120  {
1121  substitutionsForCurrentNode[i].resize(nbTypes);
1122  }
1123 
1124  // Then, we deal with the node of interest.
1125  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1128 
1129  // Iterate over all site partitions:
1130  unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
1131  while (mit->hasNext())
1132  {
1133  auto bmd = mit->next();
1134  substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1135  // compute all nxy first:
1136  VVVVdouble nxy(nbClasses);
1137  for (size_t c = 0; c < nbClasses; ++c)
1138  {
1139  VVVdouble& nxy_c = nxy[c];
1140  double rc = rcRates[c];
1141  nxy_c.resize(nbTypes);
1142  for (size_t t = 0; t < nbTypes; ++t)
1143  {
1144  VVdouble& nxy_c_t = nxy_c[t];
1145  auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
1146  nxy_c_t.resize(nbStates);
1147  for (size_t x = 0; x < nbStates; ++x)
1148  {
1149  Vdouble& nxy_c_t_x = nxy_c_t[x];
1150  nxy_c_t_x.resize(nbStates);
1151  for (size_t y = 0; y < nbStates; ++y)
1152  {
1153  nxy_c_t_x[y] = (*nijt)(x, y);
1154  }
1155  }
1156  }
1157  }
1158 
1159  // Now loop over sites:
1160  unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1161  while (sit->hasNext())
1162  {
1163  size_t i = sit->next();
1164  VVdouble& probsNode_i = probsNode[i];
1165  VVdouble& probsFather_i = probsFather[i];
1166  for (size_t c = 0; c < nbClasses; ++c)
1167  {
1168  Vdouble& probsNode_i_c = probsNode_i[c];
1169  Vdouble& probsFather_i_c = probsFather_i[c];
1170  VVVdouble& nxy_c = nxy[c];
1171  for (size_t x = 0; x < nbStates; ++x)
1172  {
1173  for (size_t y = 0; y < nbStates; ++y)
1174  {
1175  double prob_cxy = probsFather_i_c[x] * probsNode_i_c[y];
1176  // Now the vector computation:
1177  for (size_t t = 0; t < nbTypes; ++t)
1178  {
1179  substitutionsForCurrentNode[i][t] += prob_cxy * nxy_c[t][x][y];
1180  // <------> <------------>
1181  // Posterior probability | |
1182  // for site i and rate class c * | |
1183  // likelihood for this site--------------+ |
1184  // |
1185  // Substitution function for site i and rate class c---+
1186  }
1187  }
1188  }
1189  }
1190  }
1191  }
1192 
1193  // Now we just have to copy the substitutions into the result vector:
1194  for (size_t i = 0; i < nbSites; ++i)
1195  {
1196  for (size_t t = 0; t < nbTypes; ++t)
1197  {
1198  (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1199  }
1200  }
1201  }
1202  if (verbose)
1203  {
1205  *ApplicationTools::message << " ";
1207  }
1208  return substitutions;
1209 }
1210 
1211 /**************************************************************************************************/
1212 
1214  const LegacyProbabilisticSubstitutionMapping& substitutions,
1215  const SiteContainerInterface& sites,
1216  size_t type,
1217  ostream& out)
1218 {
1219  if (!out)
1220  throw IOException("LegacySubstitutionMappingTools::writeToFile. Can't write to stream.");
1221  out << "Branches";
1222  out << "\tMean";
1223  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1224  {
1225  out << "\tSite" << sites.site(i).getCoordinate();
1226  }
1227  out << endl;
1228 
1229  for (size_t j = 0; j < substitutions.getNumberOfBranches(); j++)
1230  {
1231  out << substitutions.getNode(j)->getId() << "\t" << substitutions.getNode(j)->getDistanceToFather();
1232  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1233  {
1234  out << "\t" << substitutions(j, i, type);
1235  }
1236  out << endl;
1237  }
1238 }
1239 
1240 /**************************************************************************************************/
1241 
1243  istream& in,
1245  size_t type)
1246 {
1247  try
1248  {
1249  auto data = DataTable::read(in, "\t", true, -1);
1250  vector<string> ids = data->getColumn(0);
1251  data->deleteColumn(0); // Remove ids
1252  data->deleteColumn(0); // Remove means
1253  // Now parse the table:
1254  size_t nbSites = data->getNumberOfColumns();
1255  substitutions.setNumberOfSites(nbSites);
1256  size_t nbBranches = data->getNumberOfRows();
1257  for (size_t i = 0; i < nbBranches; i++)
1258  {
1259  int id = TextTools::toInt(ids[i]);
1260  size_t br = substitutions.getNodeIndex(id);
1261  for (size_t j = 0; j < nbSites; j++)
1262  {
1263  substitutions(br, j, type) = TextTools::toDouble((*data)(i, j));
1264  }
1265  }
1266  // Parse the header:
1267  for (size_t i = 0; i < nbSites; i++)
1268  {
1269  string siteTxt = data->getColumnName(i);
1270  int site = 0;
1271  if (siteTxt.substr(0, 4) == "Site")
1272  site = TextTools::to<int>(siteTxt.substr(4));
1273  else
1274  site = TextTools::to<int>(siteTxt);
1275  substitutions.setSitePosition(i, site);
1276  }
1277  }
1278  catch (Exception& e)
1279  {
1280  throw IOException(string("Bad input file. ") + e.what());
1281  }
1282 }
1283 
1284 /**************************************************************************************************/
1285 
1288  size_t siteIndex)
1289 {
1290  size_t nbBranches = smap.getNumberOfBranches();
1291  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1292  Vdouble v(nbBranches);
1293  for (size_t l = 0; l < nbBranches; ++l)
1294  {
1295  v[l] = 0;
1296  for (size_t t = 0; t < nbTypes; ++t)
1297  {
1298  v[l] += smap(l, siteIndex, t);
1299  }
1300  }
1301  return v;
1302 }
1303 
1304 /**************************************************************************************************/
1305 
1308  size_t siteIndex)
1309 {
1310  size_t nbBranches = smap.getNumberOfBranches();
1311  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1312  Vdouble v(nbTypes);
1313  for (size_t t = 0; t < nbTypes; ++t)
1314  {
1315  v[t] = 0;
1316  for (size_t l = 0; l < nbBranches; ++l)
1317  {
1318  v[t] += smap(l, siteIndex, t);
1319  }
1320  }
1321  return v;
1322 }
1323 
1324 /**************************************************************************************************/
1325 
1328  size_t siteIndex)
1329 {
1330  double sumSquare = 0;
1331  for (size_t l = 0; l < smap.getNumberOfBranches(); ++l)
1332  {
1333  double sum = 0;
1334  for (size_t t = 0; t < smap.getNumberOfSubstitutionTypes(); ++t)
1335  {
1336  sum += smap(l, siteIndex, t);
1337  }
1338  sumSquare += sum * sum;
1339  }
1340  return sqrt(sumSquare);
1341 }
1342 
1343 /**************************************************************************************************/
1344 
1347  size_t branchIndex)
1348 {
1349  size_t nbSites = smap.getNumberOfSites();
1350  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1351  Vdouble v(nbTypes, 0);
1352  for (size_t i = 0; i < nbSites; ++i)
1353  {
1354  for (size_t t = 0; t < nbTypes; ++t)
1355  {
1356  v[t] += smap(branchIndex, i, t);
1357  }
1358  }
1359  return v;
1360 }
1361 
1362 /**************************************************************************************************/
1363 
1366  size_t siteIndex)
1367 {
1368  size_t nbBranches = smap.getNumberOfBranches();
1369  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1370  Vdouble v(nbTypes, 0);
1371  for (size_t i = 0; i < nbBranches; ++i)
1372  {
1373  for (size_t t = 0; t < nbTypes; ++t)
1374  {
1375  v[t] += smap(i, siteIndex, t);
1376  }
1377  }
1378  return v;
1379 }
1380 
1381 /**************************************************************************************************/
1382 
1384  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1385  const vector<int>& ids,
1386  std::shared_ptr<SubstitutionModelInterface> model,
1387  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1388  double threshold,
1389  bool verbose)
1390 {
1391  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1392  auto mapping = LegacySubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, count, false);
1393 
1394  vector< vector<double>> counts(ids.size());
1395  size_t nbSites = mapping->getNumberOfSites();
1396  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1397 
1398  for (size_t k = 0; k < ids.size(); ++k)
1399  {
1400  vector<double> countsf(nbTypes, 0);
1401  vector<double> tmp(nbTypes, 0);
1402  size_t nbIgnored = 0;
1403  bool error = false;
1404  for (size_t i = 0; !error && i < nbSites; ++i)
1405  {
1406  double s = 0;
1407  for (size_t t = 0; t < nbTypes; ++t)
1408  {
1409  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1410  error = std::isnan(tmp[t]);
1411  if (error)
1412  goto ERROR;
1413  s += tmp[t];
1414  }
1415  if (threshold >= 0)
1416  {
1417  if (s <= threshold)
1418  countsf += tmp;
1419  else
1420  {
1421  nbIgnored++;
1422  }
1423  }
1424  else
1425  {
1426  countsf += tmp;
1427  }
1428  }
1429 
1430 ERROR:
1431  if (error)
1432  {
1433  // We do nothing. This happens for small branches.
1434  if (verbose)
1435  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1436  for (size_t t = 0; t < nbTypes; ++t)
1437  {
1438  countsf[t] = 0;
1439  }
1440  }
1441  else
1442  {
1443  if (nbIgnored > 0)
1444  {
1445  if (verbose)
1446  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1447  }
1448  }
1449 
1450  counts[k].resize(countsf.size());
1451  for (size_t j = 0; j < countsf.size(); ++j)
1452  {
1453  counts[k][j] = countsf[j];
1454  }
1455  }
1456 
1457  return counts;
1458 }
1459 
1460 /**************************************************************************************************/
1461 
1463  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1464  const vector<int>& ids,
1465  const SubstitutionModelSet& modelSet,
1466  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1467  double threshold,
1468  bool verbose)
1469 {
1470  auto count = make_shared<UniformizationSubstitutionCount>(modelSet.getSubstitutionModel(0), reg);
1471 
1472  auto mapping = LegacySubstitutionMappingTools::computeSubstitutionVectors(drtl, modelSet, ids, count, false);
1473 
1474  vector< vector<double>> counts(ids.size());
1475  size_t nbSites = mapping->getNumberOfSites();
1476  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1477 
1478  for (size_t k = 0; k < ids.size(); ++k)
1479  {
1480  vector<double> countsf(nbTypes, 0);
1481  vector<double> tmp(nbTypes, 0);
1482  size_t nbIgnored = 0;
1483  bool error = false;
1484  for (size_t i = 0; !error && i < nbSites; ++i)
1485  {
1486  double s = 0;
1487  for (size_t t = 0; t < nbTypes; ++t)
1488  {
1489  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1490  error = std::isnan(tmp[t]);
1491  if (error)
1492  goto ERROR;
1493  s += tmp[t];
1494  }
1495  if (threshold >= 0)
1496  {
1497  if (s <= threshold)
1498  countsf += tmp;
1499  else
1500  {
1501  nbIgnored++;
1502  }
1503  }
1504  else
1505  {
1506  countsf += tmp;
1507  }
1508  }
1509 
1510 ERROR:
1511  if (error)
1512  {
1513  // We do nothing. This happens for small branches.
1514  if (verbose)
1515  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1516  for (size_t t = 0; t < nbTypes; ++t)
1517  {
1518  countsf[t] = 0;
1519  }
1520  }
1521  else
1522  {
1523  if (nbIgnored > 0)
1524  {
1525  if (verbose)
1526  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1527  }
1528  }
1529 
1530  counts[k].resize(countsf.size());
1531  for (size_t j = 0; j < countsf.size(); ++j)
1532  {
1533  counts[k][j] = countsf[j];
1534  }
1535  }
1536 
1537  return counts;
1538 }
1539 
1540 /**************************************************************************************************/
1541 
1543  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1544  const vector<int>& ids,
1545  std::shared_ptr<const SubstitutionModelInterface> nullModel,
1546  const SubstitutionRegisterInterface& reg,
1547  bool verbose)
1548 {
1549  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1550  size_t nbStates = nullModel->alphabet().getSize();
1551  size_t nbSites = drtl->getNumberOfSites();
1552  vector<int> supportedStates = nullModel->getAlphabetStates();
1553 
1554  // compute the AlphabetIndex for each substitutionType
1555  vector<shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1556 
1557  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1558  {
1559  usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1560  for (size_t i = 0; i < nbStates; i++)
1561  {
1562  usai[nbt]->setIndex(supportedStates[i], 0);
1563  }
1564  }
1565 
1566  for (size_t i = 0; i < nbStates; i++)
1567  {
1568  for (size_t j = 0; j < nbStates; j++)
1569  {
1570  if (i != j)
1571  {
1572  size_t nbt = reg.getType(i, j);
1573  if (nbt != 0)
1574  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1575  }
1576  }
1577  }
1578 
1579  // compute the normalization for each substitutionType
1580  vector<vector<double>> rewards(ids.size());
1581 
1582  for (size_t k = 0; k < ids.size(); ++k)
1583  {
1584  rewards[k].resize(nbTypes);
1585  }
1586 
1587  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1588  {
1589  auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1590  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
1591 
1592  for (size_t k = 0; k < ids.size(); ++k)
1593  {
1594  double s = 0;
1595  for (size_t i = 0; i < nbSites; ++i)
1596  {
1597  double tmp = (*mapping)(k, i);
1598  if (std::isnan(tmp))
1599  {
1600  if (verbose)
1601  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1602  s = 0;
1603  break;
1604  }
1605  s += tmp;
1606  }
1607  rewards[k][nbt] = s;
1608  }
1609  }
1610  return rewards;
1611 }
1612 
1613 /**************************************************************************************************/
1614 
1616  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1617  const vector<int>& ids,
1618  std::shared_ptr<const SubstitutionModelSet> nullModelSet,
1619  const SubstitutionRegisterInterface& reg,
1620  bool verbose)
1621 {
1622  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1623  size_t nbStates = nullModelSet->alphabet().getSize();
1624  size_t nbSites = drtl->getNumberOfSites();
1625  size_t nbModels = nullModelSet->getNumberOfModels();
1626 
1627  // compute the AlphabetIndex for each substitutionType
1628  // compute the normalization for each substitutionType
1629  vector<vector<double>> rewards(ids.size());
1630 
1631  for (size_t k = 0; k < ids.size(); ++k)
1632  {
1633  rewards[k].resize(nbTypes);
1634  }
1635 
1636  vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1637  for (size_t i = 0; i < nbTypes; ++i)
1638  {
1639  usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
1640  }
1641 
1642  for (size_t nbm = 0; nbm < nbModels; nbm++)
1643  {
1644  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
1645 
1646  if (mids.size() > 0)
1647  {
1648  auto modn = nullModelSet->getSubstitutionModel(nbm);
1649  vector<int> supportedStates = modn->getAlphabetStates();
1650 
1651  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1652  {
1653  for (size_t i = 0; i < nbStates; i++)
1654  {
1655  usai[nbt]->setIndex(supportedStates[i], 0);
1656  }
1657  }
1658 
1659  for (size_t i = 0; i < nbStates; i++)
1660  {
1661  for (size_t j = 0; j < nbStates; j++)
1662  {
1663  if (i != j)
1664  {
1665  size_t nbt = reg.getType(i, j);
1666  if (nbt != 0)
1667  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
1668  }
1669  }
1670  }
1671 
1672  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1673  {
1674  auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
1675  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
1676 
1677  for (size_t k = 0; k < mids.size(); k++)
1678  {
1679  double s = 0;
1680  for (size_t i = 0; i < nbSites; ++i)
1681  {
1682  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
1683  if (std::isnan(tmp))
1684  {
1685  if (verbose)
1686  ApplicationTools::displayWarning("On branch " + TextTools::toString(mids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1687  s = 0;
1688  break;
1689  }
1690  else
1691  s += tmp;
1692  }
1693 
1694  rewards[VectorTools::which(ids, mids[k])][nbt] = s;
1695  }
1696  }
1697  }
1698  }
1699 
1700  return rewards;
1701 }
1702 
1703 /**************************************************************************************************/
1704 
1706  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1707  const vector<int>& ids,
1708  std::shared_ptr<SubstitutionModelInterface> model,
1709  std::shared_ptr<SubstitutionModelInterface> nullModel,
1710  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1711  VVdouble& result,
1712  bool perTime,
1713  bool perWord,
1714  bool verbose)
1715 {
1716  vector<vector<double>> factors;
1717 
1718  result = getCountsPerBranch(drtl, ids, model, reg, -1, verbose);
1719  factors = getNormalizationsPerBranch(drtl, ids, nullModel, *reg, verbose);
1720 
1721 
1722  // check if perWord
1723 
1724  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
1725 
1726  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1727 
1728 
1729  size_t nbTypes = result[0].size();
1730 
1731  for (size_t k = 0; k < ids.size(); ++k)
1732  {
1733  for (size_t t = 0; t < nbTypes; ++t)
1734  {
1735  if (factors[k][t] != 0)
1736  result[k][t] /= (factors[k][t] * sizeWord);
1737  }
1738  }
1739 
1740  // Multiply by the lengths of the branches of the input tree
1741 
1742  if (!perTime)
1743  {
1744  const TreeTemplate<Node> tree(drtl->tree());
1745 
1746  for (size_t k = 0; k < ids.size(); ++k)
1747  {
1748  double l = tree.getNode(ids[k])->getDistanceToFather();
1749  for (size_t t = 0; t < nbTypes; ++t)
1750  {
1751  result[k][t] *= l;
1752  }
1753  }
1754  }
1755 }
1756 
1757 /**************************************************************************************************/
1758 
1760  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1761  const vector<int>& ids,
1762  std::shared_ptr<SubstitutionModelSet> modelSet,
1763  std::shared_ptr<SubstitutionModelSet> nullModelSet,
1764  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1765  VVdouble& result,
1766  bool perTime,
1767  bool perWord,
1768  bool verbose)
1769 {
1770  vector<vector<double>> factors;
1771 
1772  result = getCountsPerBranch(drtl, ids, *modelSet, reg, -1, verbose);
1773  factors = getNormalizationsPerBranch(drtl, ids, nullModelSet, *reg, verbose);
1774 
1775  size_t nbTypes = result[0].size();
1776 
1777  // check if perWord
1778 
1779  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
1780 
1781  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1782 
1783 
1784  for (size_t k = 0; k < ids.size(); ++k)
1785  {
1786  for (size_t t = 0; t < nbTypes; ++t)
1787  {
1788  if (factors[k][t] != 0)
1789  result[k][t] /= (factors[k][t] * sizeWord);
1790  }
1791  }
1792 
1793  // Multiply by the lengths of the branches of the input tree
1794 
1795  if (!perTime)
1796  {
1797  const TreeTemplate<Node> tree(drtl->tree());
1798 
1799  for (size_t k = 0; k < ids.size(); ++k)
1800  {
1801  double l = tree.getNode(ids[k])->getDistanceToFather();
1802  for (size_t t = 0; t < nbTypes; ++t)
1803  {
1804  result[k][t] *= l;
1805  }
1806  }
1807  }
1808 }
1809 
1810 /**************************************************************************************************/
1811 
1813  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1814  const vector<int>& ids,
1815  std::shared_ptr<SubstitutionModelInterface> model,
1816  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1817  VVdouble& result,
1818  double threshold,
1819  bool verbose)
1820 {
1821  result = getCountsPerBranch(drtl, ids, model, reg, threshold, verbose);
1822 
1823  auto creg = dynamic_pointer_cast<const CategorySubstitutionRegister>(reg);
1824 
1825  if (creg && !creg->isStationary())
1826  {
1827  size_t nbTypes = result[0].size();
1828 
1829  for (size_t k = 0; k < ids.size(); ++k)
1830  {
1831  vector<double> freqs = DRTreeLikelihoodTools::getPosteriorStateFrequencies(*drtl, ids[k]);
1832  // Compute frequencies for types:
1833  vector<double> freqsTypes(creg->getNumberOfCategories());
1834  for (size_t i = 0; i < freqs.size(); ++i)
1835  {
1836  size_t c = creg->getCategory(i);
1837  freqsTypes[c - 1] += freqs[i];
1838  }
1839 
1840  // We divide the counts by the frequencies and rescale:
1841  double s = VectorTools::sum(result[k]);
1842  for (size_t t = 0; t < nbTypes; ++t)
1843  {
1844  result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1845  }
1846 
1847  double s2 = VectorTools::sum(result[k]);
1848  // Scale:
1849  result[k] = (result[k] / s2) * s;
1850  }
1851  }
1852 }
1853 
1854 /**************************************************************************************************/
1855 
1857  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1858  const vector<int>& ids,
1859  std::shared_ptr<SubstitutionModelInterface> model,
1860  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1861  VVdouble& result)
1862 {
1863  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1865 
1866  size_t nbSites = smap->getNumberOfSites();
1867  size_t nbBr = ids.size();
1868 
1869  VectorTools::resize2(result, nbSites, nbBr);
1870 
1871  vector<size_t> sdi(nbBr); // reverse of ids
1872  for (size_t i = 0; i < nbBr; ++i)
1873  {
1874  sdi[i] = smap->getNodeIndex(ids[i]);
1875  }
1876 
1877  for (size_t k = 0; k < nbSites; ++k)
1878  {
1880  Vdouble* resS = &result[k];
1881 
1882  for (size_t i = 0; i < nbBr; ++i)
1883  {
1884  (*resS)[i] = countsf[sdi[i]];
1885  }
1886  }
1887 }
1888 
1889 
1890 /**************************************************************************************************/
1891 
1893  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1894  const vector<int>& ids,
1895  std::shared_ptr<SubstitutionModelInterface> model,
1896  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1897  VVdouble& result)
1898 {
1899  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1901 
1902  size_t nbSites = smap->getNumberOfSites();
1903  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1904 
1905  VectorTools::resize2(result, nbSites, nbTypes);
1906 
1907  vector<unique_ptr<LegacyProbabilisticRewardMapping>> rmap;
1908 
1909  for (size_t k = 0; k < nbSites; ++k)
1910  {
1912 
1913  Vdouble* resS = &result[k];
1914 
1915  for (size_t i = 0; i < nbTypes; ++i)
1916  {
1917  (*resS)[i] = countsf[i];
1918  }
1919  }
1920 }
1921 
1922 
1923 /**************************************************************************************************/
1924 
1926  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1927  const vector<int>& ids,
1928  std::shared_ptr<SubstitutionModelInterface> model,
1929  std::shared_ptr<SubstitutionModelInterface> nullModel,
1930  std::shared_ptr<const SubstitutionRegisterInterface> reg,
1931  VVdouble& result,
1932  bool perTime,
1933  bool perWord)
1934 {
1935  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1937 
1938  size_t nbSites = smap->getNumberOfSites();
1939  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1940  size_t nbStates = nullModel->alphabet().getSize();
1941  vector<int> supportedStates = nullModel->getAlphabetStates();
1942 
1943  VectorTools::resize2(result, nbSites, nbTypes);
1944 
1945  // compute the AlphabetIndex for each substitutionType
1946  vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1947 
1948  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1949  {
1950  usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1951  for (size_t i = 0; i < nbStates; i++)
1952  {
1953  usai[nbt]->setIndex(supportedStates[i], 0);
1954  }
1955  }
1956 
1957  for (size_t i = 0; i < nbStates; i++)
1958  {
1959  for (size_t j = 0; j < nbStates; j++)
1960  {
1961  if (i != j)
1962  {
1963  size_t nbt = reg->getType(i, j);
1964  if (nbt != 0)
1965  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1966  }
1967  }
1968  }
1969 
1970  // compute the normalization for each site for each substitutionType
1971  vector<vector<double>> rewards(nbSites);
1972 
1973  for (size_t k = 0; k < nbSites; ++k)
1974  {
1975  rewards[k].resize(nbTypes);
1976  }
1977 
1978  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1979  {
1980  auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1981  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
1982 
1983  for (size_t i = 0; i < nbSites; ++i)
1984  {
1985  double s = 0;
1986  for (size_t k = 0; k < ids.size(); ++k)
1987  {
1988  double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
1989  if (std::isnan(tmp))
1990  {
1991  s = 0;
1992  break;
1993  }
1994  s += tmp;
1995  }
1996  rewards[i][nbt] = s;
1997  }
1998  }
1999 
2000  // Compute the sum of lengths of concerned branches
2001 
2002  double brlen = 0;
2003 
2004  if (!perTime)
2005  {
2006  const TreeTemplate<Node> tree(drtl->tree());
2007  for (size_t k = 0; k < ids.size(); ++k)
2008  {
2009  brlen += tree.getNode(ids[k])->getDistanceToFather();
2010  }
2011  }
2012  else
2013  brlen = 1;
2014 
2015  // check if perWord
2016 
2017  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2018 
2019  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2020 
2021  // output
2022 
2023  for (size_t k = 0; k < nbSites; ++k)
2024  {
2026 
2027  Vdouble& resS = result[k];
2028  for (size_t i = 0; i < nbTypes; ++i)
2029  {
2030  resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2031  }
2032  }
2033 }
2034 
2035 
2036 /**************************************************************************************************/
2037 
2039  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2040  const vector<int>& ids,
2041  std::shared_ptr<SubstitutionModelSet> modelSet,
2042  std::shared_ptr<SubstitutionModelSet> nullModelSet,
2043  std::shared_ptr<const SubstitutionRegisterInterface> reg,
2044  VVdouble& result,
2045  bool perTime,
2046  bool perWord)
2047 {
2048  auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2050 
2051  size_t nbSites = smap->getNumberOfSites();
2052  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2053  size_t nbStates = nullModelSet->alphabet().getSize();
2054  size_t nbModels = nullModelSet->getNumberOfModels();
2055 
2056  VectorTools::resize2(result, nbSites, nbTypes);
2057 
2058  // compute the normalization for each site for each substitutionType
2059  vector< vector<double>> rewards(nbSites);
2060 
2061  for (size_t k = 0; k < nbSites; ++k)
2062  {
2063  rewards[k].resize(nbTypes);
2064  }
2065 
2066  vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2067 
2068  for (size_t nbm = 0; nbm < nbModels; nbm++)
2069  {
2070  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2071 
2072  if (mids.size() > 0)
2073  {
2074  const std::shared_ptr<SubstitutionModelInterface> modn = nullModelSet->getSubstitutionModel(nbm);
2075  vector<int> supportedStates = modn->getAlphabetStates();
2076 
2077  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2078  {
2079  usai[nbt] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2080  for (size_t i = 0; i < nbStates; i++)
2081  {
2082  usai[nbt]->setIndex(supportedStates[i], 0);
2083  }
2084  }
2085 
2086  for (size_t i = 0; i < nbStates; i++)
2087  {
2088  for (size_t j = 0; j < nbStates; j++)
2089  {
2090  if (i != j)
2091  {
2092  size_t nbt = reg->getType(i, j);
2093  if (nbt != 0)
2094  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2095  }
2096  }
2097  }
2098 
2099  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2100  {
2101  auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2102  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
2103 
2104  for (size_t i = 0; i < nbSites; ++i)
2105  {
2106  double s = 0;
2107  for (size_t k = 0; k < mids.size(); k++)
2108  {
2109  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2110 
2111  if (std::isnan(tmp))
2112  {
2113  s = 0;
2114  break;
2115  }
2116  else
2117  s += tmp;
2118  }
2119  rewards[i][nbt] += s;
2120  }
2121  }
2122  }
2123  }
2124 
2125  // Compute the sum of lengths of concerned branches
2126 
2127 
2128  double brlen = 0;
2129 
2130  if (!perTime)
2131  {
2132  const TreeTemplate<Node> tree(drtl->tree());
2133  for (size_t k = 0; k < ids.size(); ++k)
2134  {
2135  brlen += tree.getNode(ids[k])->getDistanceToFather();
2136  }
2137  }
2138  else
2139  brlen = 1.;
2140 
2141 
2142  // check if perWord
2143  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2144  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2145 
2146  // output
2147 
2148  for (size_t k = 0; k < nbSites; ++k)
2149  {
2151 
2152  Vdouble& resS = result[k];
2153 
2154  for (size_t i = 0; i < nbTypes; ++i)
2155  {
2156  resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2157  }
2158  }
2159 }
2160 
2161 
2162 /**************************************************************************************************/
2163 
2165  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2166  const vector<int>& ids,
2167  std::shared_ptr<SubstitutionModelInterface> model,
2168  std::shared_ptr<const SubstitutionRegisterInterface> reg,
2169  VVVdouble& result)
2170 {
2171  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2173 
2174  size_t nbSites = smap->getNumberOfSites();
2175  size_t nbBr = ids.size();
2176  size_t nbTypes = reg->getNumberOfSubstitutionTypes();
2177 
2178  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2179 
2180  for (size_t i = 0; i < nbTypes; ++i)
2181  {
2182  for (size_t j = 0; j < nbSites; ++j)
2183  {
2184  VVdouble& resS = result[j];
2185 
2186  for (size_t k = 0; k < nbBr; ++k)
2187  {
2188  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i);
2189  }
2190  }
2191  }
2192 }
2193 
2194 /**************************************************************************************************/
2195 
2197  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2198  const vector<int>& ids,
2199  std::shared_ptr<SubstitutionModelInterface> model,
2200  std::shared_ptr<SubstitutionModelInterface> nullModel,
2201  std::shared_ptr<const SubstitutionRegisterInterface> reg,
2202  VVVdouble& result,
2203  bool perTime,
2204  bool perWord)
2205 {
2206  auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2208 
2209  size_t nbSites = smap->getNumberOfSites();
2210  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2211  size_t nbStates = nullModel->alphabet().getSize();
2212  size_t nbBr = ids.size();
2213 
2214  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2215 
2216  // compute the normalization for each site for each substitutionType
2217  map<int, vector< vector<double>>> rewards;
2218 
2219  for (auto id : ids)
2220  {
2221  VectorTools::resize2(rewards[id], nbSites, nbTypes);
2222  }
2223 
2224  vector<int> supportedStates = nullModel->getAlphabetStates();
2225 
2226  // compute the AlphabetIndex for each substitutionType
2227  vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2228 
2229  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2230  {
2231  usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
2232  for (size_t i = 0; i < nbStates; i++)
2233  {
2234  usai[nbt]->setIndex(supportedStates[i], 0);
2235  }
2236  }
2237 
2238  for (size_t i = 0; i < nbStates; i++)
2239  {
2240  for (size_t j = 0; j < nbStates; j++)
2241  {
2242  if (i != j)
2243  {
2244  size_t nbt = reg->getType(i, j);
2245  if (nbt != 0)
2246  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
2247  }
2248  }
2249  }
2250 
2251  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2252  {
2253  auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
2254  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
2255 
2256  for (size_t i = 0; i < nbSites; ++i)
2257  {
2258  for (size_t k = 0; k < ids.size(); ++k)
2259  {
2260  double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
2261 
2262  if (std::isnan(tmp))
2263  tmp = 0;
2264 
2265  rewards[ids[k]][i][nbt] = tmp;
2266  }
2267  }
2268  }
2269 
2270  // check if perWord
2271  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2272  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2273 
2274 
2275  // output
2276  const TreeTemplate<Node>& tree = drtl->tree();
2277 
2278  for (size_t i = 0; i < nbTypes; ++i)
2279  {
2280  for (size_t j = 0; j < nbSites; ++j)
2281  {
2282  VVdouble& resS = result[j];
2283 
2284  for (size_t k = 0; k < nbBr; ++k)
2285  {
2286  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.getNode(ids[k])->getDistanceToFather()) / sizeWord;
2287  }
2288  }
2289  }
2290 }
2291 
2292 /**************************************************************************************************/
2293 
2295  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2296  const vector<int>& ids,
2297  std::shared_ptr<SubstitutionModelSet> modelSet,
2298  std::shared_ptr<SubstitutionModelSet> nullModelSet,
2299  std::shared_ptr<const SubstitutionRegisterInterface> reg,
2300  VVVdouble& result,
2301  bool perTime,
2302  bool perWord)
2303 {
2304  auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2306 
2307  size_t nbSites = smap->getNumberOfSites();
2308  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2309  size_t nbStates = nullModelSet->alphabet().getSize();
2310  size_t nbModels = nullModelSet->getNumberOfModels();
2311  size_t nbBr = ids.size();
2312 
2313  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2314 
2315  // compute the normalization for each site for each substitutionType
2316  map<int, vector<vector<double>>> rewards;
2317 
2318  for (auto id : ids)
2319  {
2320  VectorTools::resize2(rewards[id], nbSites, nbTypes);
2321  }
2322 
2323  vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2324  for (size_t i = 0; i < nbTypes; ++i)
2325  {
2326  usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2327  }
2328 
2329  for (size_t nbm = 0; nbm < nbModels; nbm++)
2330  {
2331  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2332 
2333  if (mids.size() > 0)
2334  {
2335  auto modn = nullModelSet->getSubstitutionModel(nbm);
2336  vector<int> supportedStates = modn->getAlphabetStates();
2337 
2338  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2339  {
2340  for (size_t i = 0; i < nbStates; i++)
2341  {
2342  usai[nbt]->setIndex(supportedStates[i], 0);
2343  }
2344  }
2345 
2346  for (size_t i = 0; i < nbStates; i++)
2347  {
2348  for (size_t j = 0; j < nbStates; j++)
2349  {
2350  if (i != j)
2351  {
2352  size_t nbt = reg->getType(i, j);
2353  if (nbt != 0)
2354  usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2355  }
2356  }
2357  }
2358 
2359  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2360  {
2361  auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2362  auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
2363 
2364  for (size_t i = 0; i < nbSites; ++i)
2365  {
2366  for (size_t k = 0; k < mids.size(); k++)
2367  {
2368  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2369 
2370  if (std::isnan(tmp))
2371  tmp = 0;
2372 
2373  rewards[mids[k]][i][nbt] = tmp;
2374  }
2375  }
2376  }
2377  }
2378  }
2379 
2380  // check if perWord
2381  auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2382  float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2383 
2384 
2385  // output
2386  const TreeTemplate<Node>& tree = drtl->tree();
2387 
2388  for (size_t i = 0; i < nbTypes; ++i)
2389  {
2390  for (size_t j = 0; j < nbSites; ++j)
2391  {
2392  VVdouble& resS = result[j];
2393 
2394  for (size_t k = 0; k < nbBr; ++k)
2395  {
2396  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.getNode(ids[k])->getDistanceToFather()) / sizeWord;
2397  }
2398  }
2399  }
2400 }
2401 
2402 
2403 /**************************************************************************************************/
2404 
2406  const string& filename,
2407  const vector<int>& ids,
2408  const VVdouble& counts)
2409 {
2410  size_t nbSites = counts.size();
2411  if (nbSites == 0)
2412  return;
2413  size_t nbBr = counts[0].size();
2414 
2415  ofstream file;
2416  file.open(filename.c_str());
2417 
2418  file << "sites";
2419  for (size_t i = 0; i < nbBr; ++i)
2420  {
2421  file << "\t" << ids[i];
2422  }
2423  file << endl;
2424 
2425  for (size_t k = 0; k < nbSites; ++k)
2426  {
2427  const Vdouble& countS = counts[k];
2428  file << k;
2429  for (size_t i = 0; i < nbBr; ++i)
2430  {
2431  file << "\t" << countS[i];
2432  }
2433  file << endl;
2434  }
2435  file.close();
2436 }
2437 
2438 
2439 /**************************************************************************************************/
2440 
2442  const string& filename,
2443  const SubstitutionRegisterInterface& reg,
2444  const VVdouble& counts)
2445 {
2446  size_t nbSites = counts.size();
2447  if (nbSites == 0)
2448  return;
2449  size_t nbTypes = counts[0].size();
2450 
2451  ofstream file;
2452  file.open(filename.c_str());
2453 
2454  file << "sites";
2455  for (size_t i = 0; i < nbTypes; ++i)
2456  {
2457  file << "\t" << reg.getTypeName(i + 1);
2458  }
2459  file << endl;
2460 
2461  for (size_t k = 0; k < nbSites; ++k)
2462  {
2463  file << k;
2464  const Vdouble& resS = counts[k];
2465  for (size_t i = 0; i < nbTypes; ++i)
2466  {
2467  file << "\t" << resS[i];
2468  }
2469  file << endl;
2470  }
2471  file.close();
2472 }
2473 
2474 
2475 /**************************************************************************************************/
2476 
2478  const string& filenamePrefix,
2479  const vector<int>& ids,
2480  const SubstitutionRegisterInterface& reg,
2481  const VVVdouble& counts)
2482 {
2483  size_t nbSites = counts.size();
2484  if (nbSites == 0)
2485  return;
2486  size_t nbBr = counts[0].size();
2487  if (nbBr == 0)
2488  return;
2489  size_t nbTypes = counts[0][0].size();
2490 
2491  ofstream file;
2492 
2493  for (size_t i = 0; i < nbTypes; ++i)
2494  {
2495  string name = reg.getTypeName(i + 1);
2496  if (name == "")
2497  name = TextTools::toString(i + 1);
2498 
2499  string path = filenamePrefix + name + string(".count");
2500 
2501  ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
2502  file.open(path.c_str());
2503 
2504  file << "sites";
2505  for (size_t k = 0; k < nbBr; ++k)
2506  {
2507  file << "\t" << ids[k];
2508  }
2509  file << endl;
2510 
2511  for (size_t j = 0; j < nbSites; ++j)
2512  {
2513  const VVdouble& resS = counts[j];
2514 
2515  file << j;
2516  for (size_t k = 0; k < nbBr; ++k)
2517  {
2518  file << "\t" << resS[k][i];
2519  }
2520  file << endl;
2521  }
2522  file.close();
2523  }
2524 }
int getCoordinate() const override
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayWarning(const std::string &text)
static void displayResult(const std::string &text, const T &result)
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static VVVdouble getPosteriorProbabilitiesPerStatePerRate(const DRTreeLikelihoodInterface &drl, int nodeId)
Compute the posterior probabilities for each state and each rate of each distinct site.
static Vdouble getPosteriorStateFrequencies(const DRTreeLikelihoodInterface &drl, int nodeId)
Compute the posterior probabilities for each state for a given node.
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
size_t getNumberOfSites() const override
Definition: Mapping.h:158
void setSitePosition(size_t index, int position) override
Set the position of a given site.
Definition: Mapping.h:152
virtual size_t getNodeIndex(int nodeId) const override
Definition: Mapping.h:184
size_t getNumberOfBranches() const override
Definition: Mapping.h:160
virtual const Node * getNode(size_t nodeIndex) const
Definition: Mapping.h:162
virtual size_t getNumberOfBranches() const =0
virtual size_t getNumberOfSites() const =0
Likelihood ancestral states reconstruction: marginal method.
std::map< int, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
Legacy data storage class for probabilistic substitution mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
static std::unique_ptr< LegacyProbabilisticRewardMapping > computeRewardVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, const std::vector< int > &nodeIds, std::shared_ptr< Reward > reward, bool verbose=true)
Compute the reward vectors for a particular dataset using the double-recursive likelihood computation...
Legacy interface for storing mapping data.
virtual size_t getNumberOfSubstitutionTypes() const =0
static void outputPerSitePerBranch(const std::string &filename, const std::vector< int > &ids, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const VVdouble &counts)
Output Per Site Per Type.
static std::vector< double > computeTotalSubstitutionVectorForSitePerBranch(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index).
static std::vector< double > computeSumForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
static void readFromStream(std::istream &in, LegacyProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void computeCountsPerSitePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &array)
Per Branch Per Site methods.
static std::vector< std::vector< double > > getNormalizationsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< const SubstitutionModelInterface > nullModel, const SubstitutionRegisterInterface &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
static void computeCountsPerSitePerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result)
Per Type Per Site methods.
static std::vector< double > computeSumForBranch(const LegacySubstitutionMappingInterface &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveragingMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< int > &ids, const SubstitutionRegisterInterface &reg, const VVVdouble &counts)
Output Per Site Per Branch Per Type.
static void writeToStream(const LegacyProbabilisticSubstitutionMapping &substitutions, const SiteContainerInterface &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
static std::vector< double > computeTotalSubstitutionVectorForSitePerType(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each type of a given position (specified by its index).
static std::vector< std::vector< double > > getCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, double threshold=-1, bool verbose=true)
Per Branch methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveraging(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void computeCountsPerSitePerBranchPerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVVdouble &result)
Per Branch Per Site Per Type methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static double computeNormForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
static void computeCountsPerTypePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result, double threshold=-1, bool verbose=true)
Per Type Per Branch methods.
static std::vector< size_t > whichMax(const Matrix &m)
static void fill(Matrix &M, Scalar x)
The phylogenetic node class.
Definition: Node.h:59
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:250
virtual size_t getNumberOfSons() const
Definition: Node.h:355
Substitution models manager for non-homogeneous / non-reversible models of evolution.
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModelForNode(int nodeId) const
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModel(size_t i) const
Return a markovian substitution model (or null)
The SubstitutionRegister interface.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
virtual size_t getNumberOfSubstitutionTypes() const =0
virtual size_t getType(size_t fromState, size_t toState) const =0
Get the substitution type far a given pair of model states.
virtual const Site & site(size_t sitePosition) const override=0
The phylogenetic tree class.
Definition: TreeTemplate.h:59
virtual std::vector< const N * > getNodes() const
Definition: TreeTemplate.h:397
int getRootId() const
Definition: TreeTemplate.h:127
virtual N * getNode(int id, bool checkId=false)
Definition: TreeTemplate.h:405
static T sum(const std::vector< T > &v1)
static void resize3(VVVdouble &vvv, size_t n1, size_t n2, size_t n3)
static size_t which(const std::vector< T > &v, const T &which)
static bool contains(const std::vector< T > &vec, T el)
static void resize2(VVdouble &vv, size_t n1, size_t n2)
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVVdouble > VVVVdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble