bpp-phyl3 3.0.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6#include "../../Mapping/UniformizationSubstitutionCount.h"
7#include "../../Mapping/DecompositionReward.h"
10#include "../Likelihood/DRTreeLikelihoodTools.h"
11#include "../Likelihood/MarginalAncestralStateReconstruction.h"
12
13#include <Bpp/Text/TextTools.h>
18
19using namespace bpp;
20
21// From the STL:
22#include <iomanip>
23
24using namespace std;
25
26/******************************************************************************/
27
28unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectors(
29 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
30 const vector<int>& nodeIds,
31 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
32 bool verbose)
33{
34 // Preamble:
35 if (!drtl->isInitialized())
36 throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
37
38 // A few variables we'll need:
39
40 const TreeTemplate<Node> tree(drtl->tree());
41 auto& sequences = drtl->data();
42 auto& rDist = drtl->rateDistribution();
43
44 size_t nbSites = sequences.getNumberOfSites();
45 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
46 size_t nbStates = sequences.alphabet().getSize();
47 size_t nbClasses = rDist.getNumberOfCategories();
48 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
49 vector<const Node*> nodes = tree.getNodes();
50 const auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
51 nodes.pop_back(); // Remove root node.
52 size_t nbNodes = nodes.size();
53
54 // We create a new ProbabilisticSubstitutionMapping object:
55 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
56
57 // Store likelihood for each rate for each site:
58 VVVdouble lik;
59 drtl->computeLikelihoodAtNode(tree.getRootId(), lik);
60 Vdouble Lr(nbDistinctSites, 0);
61 Vdouble rcProbs = rDist.getProbabilities();
62 Vdouble rcRates = rDist.getCategories();
63 for (size_t i = 0; i < nbDistinctSites; i++)
64 {
65 VVdouble* lik_i = &lik[i];
66 for (size_t c = 0; c < nbClasses; c++)
67 {
68 Vdouble* lik_i_c = &(*lik_i)[c];
69 double rc = rDist.getProbability(c);
70 for (size_t s = 0; s < nbStates; s++)
71 {
72 Lr[i] += (*lik_i_c)[s] * rc;
73 }
74 }
75 }
76
77 // Compute the number of substitutions for each class and each branch in the tree:
78 if (verbose)
79 ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
80
81 for (size_t l = 0; l < nbNodes; ++l)
82 {
83 // For each node,
84 const Node* currentNode = nodes[l];
85 if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
86 continue;
87
88 const Node* father = currentNode->getFather();
89
90 double d = currentNode->getDistanceToFather();
91
92 if (verbose)
93 ApplicationTools::displayGauge(l, nbNodes - 1);
94 VVdouble substitutionsForCurrentNode(nbDistinctSites);
95 for (size_t i = 0; i < nbDistinctSites; ++i)
96 {
97 substitutionsForCurrentNode[i].resize(nbTypes);
98 }
99
100 // Now we've got to compute likelihoods in a smart manner... ;)
101 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
102 for (size_t i = 0; i < nbDistinctSites; i++)
103 {
104 VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
105 likelihoodsFatherConstantPart_i->resize(nbClasses);
106 for (size_t c = 0; c < nbClasses; c++)
107 {
108 Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
109 likelihoodsFatherConstantPart_i_c->resize(nbStates);
110 double rc = rDist.getProbability(c);
111 for (size_t s = 0; s < nbStates; s++)
112 {
113 // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
114 // freq is already accounted in the array
115 (*likelihoodsFatherConstantPart_i_c)[s] = rc;
116 }
117 }
118 }
119
120 // First, what will remain constant:
121 size_t nbSons = father->getNumberOfSons();
122 for (size_t n = 0; n < nbSons; n++)
123 {
124 const Node* currentSon = father->getSon(n);
125 if (currentSon->getId() != currentNode->getId())
126 {
127 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
128
129 // Now iterate over all site partitions:
130 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
131 VVVdouble pxy;
132 bool first;
133 while (mit->hasNext())
134 {
135 auto bmd = mit->next();
136 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
137 first = true;
138 while (sit->hasNext())
139 {
140 size_t i = sit->next();
141 // We retrieve the transition probabilities for this site partition:
142 if (first)
143 {
144 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
145 first = false;
146 }
147 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
148 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
149 for (size_t c = 0; c < nbClasses; c++)
150 {
151 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
152 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
153 VVdouble& pxy_c = pxy[c];
154 for (size_t x = 0; x < nbStates; x++)
155 {
156 Vdouble& pxy_c_x = pxy_c[x];
157 double likelihood = 0.;
158 for (size_t y = 0; y < nbStates; y++)
159 {
160 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
161 }
162 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
163 }
164 }
165 }
166 }
167 }
168 }
169 if (father->hasFather())
170 {
171 const Node* currentSon = father->getFather();
172 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
173 // Now iterate over all site partitions:
174 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
175 VVVdouble pxy;
176 bool first;
177 while (mit->hasNext())
178 {
179 auto bmd = mit->next();
180 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
181 first = true;
182 while (sit->hasNext())
183 {
184 size_t i = sit->next();
185 // We retrieve the transition probabilities for this site partition:
186 if (first)
187 {
188 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
189 first = false;
190 }
191 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
192 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
193 for (size_t c = 0; c < nbClasses; c++)
194 {
195 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
196 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
197 VVdouble& pxy_c = pxy[c];
198 for (size_t x = 0; x < nbStates; x++)
199 {
200 double likelihood = 0.;
201 for (size_t y = 0; y < nbStates; y++)
202 {
203 Vdouble& pxy_c_x = pxy_c[y];
204 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
205 }
206 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
207 }
208 }
209 }
210 }
211 }
212 else
213 {
214 // Account for root frequencies:
215 for (size_t i = 0; i < nbDistinctSites; i++)
216 {
217 vector<double> freqs = drtl->getRootFrequencies(i);
218 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
219 for (size_t c = 0; c < nbClasses; c++)
220 {
221 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
222 for (size_t x = 0; x < nbStates; x++)
223 {
224 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
225 }
226 }
227 }
228 }
229
230
231 // Then, we deal with the node of interest.
232 // We first average upon 'y' to save computations, and then upon 'x'.
233 // ('y' is the state at 'node' and 'x' the state at 'father'.)
234
235 // Iterate over all site partitions:
236 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
237 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
238 VVVdouble pxy;
239 bool first;
240 while (mit->hasNext())
241 {
242 auto bmd = mit->next();
243 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
244 // compute all nxy first:
245 VVVVdouble nxy(nbClasses);
246 for (size_t c = 0; c < nbClasses; ++c)
247 {
248 VVVdouble& nxy_c = nxy[c];
249 double rc = rcRates[c];
250 nxy_c.resize(nbTypes);
251 for (size_t t = 0; t < nbTypes; ++t)
252 {
253 VVdouble& nxy_c_t = nxy_c[t];
254 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
255
256 nxy_c_t.resize(nbStates);
257 for (size_t x = 0; x < nbStates; ++x)
258 {
259 Vdouble& nxy_c_t_x = nxy_c_t[x];
260 nxy_c_t_x.resize(nbStates);
261 for (size_t y = 0; y < nbStates; ++y)
262 {
263 nxy_c_t_x[y] = (*nijt)(x, y);
264 }
265 }
266 }
267 }
268
269 // Now loop over sites:
270 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
271 first = true;
272 while (sit->hasNext())
273 {
274 size_t i = sit->next();
275 // We retrieve the transition probabilities and substitution counts for this site partition:
276 if (first)
277 {
278 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
279 first = false;
280 }
281 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
282 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
283 for (size_t c = 0; c < nbClasses; ++c)
284 {
285 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
286 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
287 const VVdouble& pxy_c = pxy[c];
288 VVVdouble& nxy_c = nxy[c];
289 for (size_t x = 0; x < nbStates; ++x)
290 {
291 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
292 const Vdouble& pxy_c_x = pxy_c[x];
293 for (size_t y = 0; y < nbStates; ++y)
294 {
295 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
296 * pxy_c_x[y]
297 * likelihoodsFather_node_i_c[y];
298
299 for (size_t t = 0; t < nbTypes; ++t)
300 {
301 // Now the vector computation:
302 substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
303 // <------------> <------------>
304 // Posterior probability | |
305 // for site i and rate class c * | |
306 // likelihood for this site----------------+ |
307 // |
308 // Substitution function for site i and rate class c--------+
309 }
310 }
311 }
312 }
313 }
314 }
315
316 // Now we just have to copy the substitutions into the result vector:
317 for (size_t i = 0; i < nbSites; ++i)
318 {
319 for (size_t t = 0; t < nbTypes; ++t)
320 {
321 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
322 }
323 }
324 }
325 if (verbose)
326 {
330 }
331
332 return substitutions;
333}
334
335/******************************************************************************/
336
337unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectors(
338 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
339 const SubstitutionModelSet& modelSet,
340 const vector<int>& nodeIds,
341 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
342 bool verbose)
343{
344 // Preamble:
345 if (!drtl->isInitialized())
346 throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
347
348 // A few variables we'll need:
349
350 const TreeTemplate<Node> tree(drtl->tree());
351 auto& sequences = drtl->data();
352 auto& rDist = drtl->rateDistribution();
353
354 size_t nbSites = sequences.getNumberOfSites();
355 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
356 size_t nbStates = sequences.alphabet().getSize();
357 size_t nbClasses = rDist.getNumberOfCategories();
358 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
359 vector<const Node*> nodes = tree.getNodes();
360 auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
361 nodes.pop_back(); // Remove root node.
362 size_t nbNodes = nodes.size();
363
364 // We create a new ProbabilisticSubstitutionMapping object:
365 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
366
367 // Store likelihood for each rate for each site:
368 VVVdouble lik;
369 drtl->computeLikelihoodAtNode(tree.getRootId(), lik);
370 Vdouble Lr(nbDistinctSites, 0);
371 Vdouble rcProbs = rDist.getProbabilities();
372 Vdouble rcRates = rDist.getCategories();
373 for (size_t i = 0; i < nbDistinctSites; i++)
374 {
375 VVdouble* lik_i = &lik[i];
376 for (size_t c = 0; c < nbClasses; c++)
377 {
378 Vdouble* lik_i_c = &(*lik_i)[c];
379 double rc = rDist.getProbability(c);
380 for (size_t s = 0; s < nbStates; s++)
381 {
382 Lr[i] += (*lik_i_c)[s] * rc;
383 }
384 }
385 }
386
387 // Compute the number of substitutions for each class and each branch in the tree:
388 if (verbose)
389 ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
390
391 for (size_t l = 0; l < nbNodes; ++l)
392 {
393 // For each node,
394 const Node* currentNode = nodes[l];
395 if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
396 continue;
397
398 const Node* father = currentNode->getFather();
399
400 double d = currentNode->getDistanceToFather();
401
402 if (verbose)
403 ApplicationTools::displayGauge(l, nbNodes - 1);
404 VVdouble substitutionsForCurrentNode(nbDistinctSites);
405 for (size_t i = 0; i < nbDistinctSites; ++i)
406 {
407 substitutionsForCurrentNode[i].resize(nbTypes);
408 }
409
410 // Now we've got to compute likelihoods in a smart manner... ;)
411 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
412 for (size_t i = 0; i < nbDistinctSites; i++)
413 {
414 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
415 likelihoodsFatherConstantPart_i.resize(nbClasses);
416 for (size_t c = 0; c < nbClasses; c++)
417 {
418 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
419 likelihoodsFatherConstantPart_i_c.resize(nbStates);
420 double rc = rDist.getProbability(c);
421 for (size_t s = 0; s < nbStates; s++)
422 {
423 // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
424 // freq is already accounted in the array
425 likelihoodsFatherConstantPart_i_c[s] = rc;
426 }
427 }
428 }
429
430 // First, what will remain constant:
431 size_t nbSons = father->getNumberOfSons();
432 for (size_t n = 0; n < nbSons; n++)
433 {
434 const Node* currentSon = father->getSon(n);
435 if (currentSon->getId() != currentNode->getId())
436 {
437 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
438
439 // Now iterate over all site partitions:
440 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
441 VVVdouble pxy;
442 bool first;
443 while (mit->hasNext())
444 {
445 auto bmd = mit->next();
446 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
447 first = true;
448 while (sit->hasNext())
449 {
450 size_t i = sit->next();
451 // We retrieve the transition probabilities for this site partition:
452 if (first)
453 {
454 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
455 first = false;
456 }
457 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
458 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
459 for (size_t c = 0; c < nbClasses; c++)
460 {
461 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
462 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
463 VVdouble& pxy_c = pxy[c];
464 for (size_t x = 0; x < nbStates; x++)
465 {
466 Vdouble& pxy_c_x = pxy_c[x];
467 double likelihood = 0.;
468 for (size_t y = 0; y < nbStates; y++)
469 {
470 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
471 }
472 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
473 }
474 }
475 }
476 }
477 }
478 }
479 if (father->hasFather())
480 {
481 const Node* currentSon = father->getFather();
482 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
483 // Now iterate over all site partitions:
484 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
485 VVVdouble pxy;
486 bool first;
487 while (mit->hasNext())
488 {
489 auto bmd = mit->next();
490 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
491 first = true;
492 while (sit->hasNext())
493 {
494 size_t i = sit->next();
495 // We retrieve the transition probabilities for this site partition:
496 if (first)
497 {
498 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
499 first = false;
500 }
501 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
502 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
503 for (size_t c = 0; c < nbClasses; c++)
504 {
505 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
506 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
507 VVdouble& pxy_c = pxy[c];
508 for (size_t x = 0; x < nbStates; x++)
509 {
510 double likelihood = 0.;
511 for (size_t y = 0; y < nbStates; y++)
512 {
513 Vdouble& pxy_c_x = pxy_c[y];
514 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
515 }
516 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
517 }
518 }
519 }
520 }
521 }
522 else
523 {
524 // Account for root frequencies:
525 for (size_t i = 0; i < nbDistinctSites; i++)
526 {
527 vector<double> freqs = drtl->getRootFrequencies(i);
528 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
529 for (size_t c = 0; c < nbClasses; c++)
530 {
531 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
532 for (size_t x = 0; x < nbStates; x++)
533 {
534 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
535 }
536 }
537 }
538 }
539
540
541 // Then, we deal with the node of interest.
542 // We first average upon 'y' to save computations, and then upon 'x'.
543 // ('y' is the state at 'node' and 'x' the state at 'father'.)
544
545 // Iterate over all site partitions:
546 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
547 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
548 VVVdouble pxy;
549 bool first;
550 while (mit->hasNext())
551 {
552 auto bmd = mit->next();
553 substitutionCount->setSubstitutionModel(modelSet.getSubstitutionModelForNode(currentNode->getId()));
554
555 // compute all nxy first:
556 VVVVdouble nxy(nbClasses);
557 for (size_t c = 0; c < nbClasses; ++c)
558 {
559 VVVdouble& nxy_c = nxy[c];
560 double rc = rcRates[c];
561 nxy_c.resize(nbTypes);
562 for (size_t t = 0; t < nbTypes; ++t)
563 {
564 VVdouble& nxy_c_t = nxy_c[t];
565 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
566 nxy_c_t.resize(nbStates);
567 for (size_t x = 0; x < nbStates; ++x)
568 {
569 Vdouble& nxy_c_t_x = nxy_c_t[x];
570 nxy_c_t_x.resize(nbStates);
571 for (size_t y = 0; y < nbStates; ++y)
572 {
573 nxy_c_t_x[y] = (*nijt)(x, y);
574 }
575 }
576 }
577 }
578
579 // Now loop over sites:
580 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
581 first = true;
582 while (sit->hasNext())
583 {
584 size_t i = sit->next();
585 // We retrieve the transition probabilities and substitution counts for this site partition:
586 if (first)
587 {
588 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
589 first = false;
590 }
591 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
592 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
593 for (size_t c = 0; c < nbClasses; ++c)
594 {
595 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
596 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
597 const VVdouble& pxy_c = pxy[c];
598 VVVdouble& nxy_c = nxy[c];
599 for (size_t x = 0; x < nbStates; ++x)
600 {
601 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
602 const Vdouble& pxy_c_x = pxy_c[x];
603 for (size_t y = 0; y < nbStates; ++y)
604 {
605 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
606 * pxy_c_x[y]
607 * likelihoodsFather_node_i_c[y];
608
609 for (size_t t = 0; t < nbTypes; ++t)
610 {
611 // Now the vector computation:
612 substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
613 // <------------> <------------>
614 // Posterior probability | |
615 // for site i and rate class c * | |
616 // likelihood for this site----------------+ |
617 // |
618 // Substitution function for site i and rate class c---------+
619 }
620 }
621 }
622 }
623 }
624 }
625
626 // Now we just have to copy the substitutions into the result vector:
627 for (size_t i = 0; i < nbSites; ++i)
628 {
629 for (size_t t = 0; t < nbTypes; ++t)
630 {
631 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
632 }
633 }
634 }
635 if (verbose)
636 {
640 }
641
642 return substitutions;
643}
644
645/**************************************************************************************************/
646
647unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(
648 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
649 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
650 bool verbose)
651{
652 // Preamble:
653 if (!drtl->isInitialized())
654 throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(). Likelihood object is not initialized.");
655
656 // A few variables we'll need:
657 const TreeTemplate<Node> tree(drtl->tree());
658 auto& sequences = drtl->data();
659 auto& rDist = drtl->rateDistribution();
660
661 size_t nbSites = sequences.getNumberOfSites();
662 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
663 size_t nbStates = sequences.alphabet().getSize();
664 size_t nbClasses = rDist.getNumberOfCategories();
665 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
666 vector<const Node*> nodes = tree.getNodes();
667 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
668 nodes.pop_back(); // Remove root node.
669 size_t nbNodes = nodes.size();
670
671 // We create a new ProbabilisticSubstitutionMapping object:
672 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
673
674 Vdouble rcRates = rDist.getCategories();
675
676 // Compute the number of substitutions for each class and each branch in the tree:
677 if (verbose)
678 ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
679
680 for (size_t l = 0; l < nbNodes; ++l)
681 {
682 // For each node,
683 const Node* currentNode = nodes[l];
684
685 const Node* father = currentNode->getFather();
686
687 double d = currentNode->getDistanceToFather();
688
689 if (verbose)
690 ApplicationTools::displayGauge(l, nbNodes - 1);
691 VVdouble substitutionsForCurrentNode(nbDistinctSites);
692 for (size_t i = 0; i < nbDistinctSites; ++i)
693 {
694 substitutionsForCurrentNode[i].resize(nbTypes);
695 }
696
697 // Now we've got to compute likelihoods in a smart manner... ;)
698 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
699 for (size_t i = 0; i < nbDistinctSites; ++i)
700 {
701 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
702 likelihoodsFatherConstantPart_i.resize(nbClasses);
703 for (size_t c = 0; c < nbClasses; ++c)
704 {
705 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
706 likelihoodsFatherConstantPart_i_c.resize(nbStates);
707 double rc = rDist.getProbability(c);
708 for (size_t s = 0; s < nbStates; ++s)
709 {
710 likelihoodsFatherConstantPart_i_c[s] = rc;
711 }
712 }
713 }
714
715 // First, what will remain constant:
716 size_t nbSons = father->getNumberOfSons();
717 for (size_t n = 0; n < nbSons; ++n)
718 {
719 const Node* currentSon = father->getSon(n);
720 if (currentSon->getId() != currentNode->getId())
721 {
722 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
723
724 // Now iterate over all site partitions:
725 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
726 VVVdouble pxy;
727 bool first;
728 while (mit->hasNext())
729 {
730 auto bmd = mit->next();
731 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
732 first = true;
733 while (sit->hasNext())
734 {
735 size_t i = sit->next();
736 // We retrieve the transition probabilities for this site partition:
737 if (first)
738 {
739 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
740 first = false;
741 }
742 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
743 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
744 for (size_t c = 0; c < nbClasses; ++c)
745 {
746 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
747 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
748 VVdouble& pxy_c = pxy[c];
749 for (size_t x = 0; x < nbStates; ++x)
750 {
751 Vdouble& pxy_c_x = pxy_c[x];
752 double likelihood = 0.;
753 for (size_t y = 0; y < nbStates; ++y)
754 {
755 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
756 }
757 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
758 }
759 }
760 }
761 }
762 }
763 }
764 if (father->hasFather())
765 {
766 const Node* currentSon = father->getFather();
767 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
768 // Now iterate over all site partitions:
769 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
770 VVVdouble pxy;
771 bool first;
772 while (mit->hasNext())
773 {
774 auto bmd = mit->next();
775 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
776 first = true;
777 while (sit->hasNext())
778 {
779 size_t i = sit->next();
780 // We retrieve the transition probabilities for this site partition:
781 if (first)
782 {
783 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
784 first = false;
785 }
786 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
787 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
788 for (size_t c = 0; c < nbClasses; ++c)
789 {
790 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
791 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
792 VVdouble& pxy_c = pxy[c];
793 for (size_t x = 0; x < nbStates; ++x)
794 {
795 double likelihood = 0.;
796 for (size_t y = 0; y < nbStates; ++y)
797 {
798 Vdouble& pxy_c_x = pxy_c[y];
799 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
800 }
801 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
802 }
803 }
804 }
805 }
806 }
807 else
808 {
809 // Account for root frequencies:
810 for (size_t i = 0; i < nbDistinctSites; ++i)
811 {
812 vector<double> freqs = drtl->getRootFrequencies(i);
813 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
814 for (size_t c = 0; c < nbClasses; ++c)
815 {
816 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
817 for (size_t x = 0; x < nbStates; ++x)
818 {
819 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
820 }
821 }
822 }
823 }
824
825 // Then, we deal with the node of interest.
826 // We first average upon 'y' to save computations, and then upon 'x'.
827 // ('y' is the state at 'node' and 'x' the state at 'father'.)
828
829 // Iterate over all site partitions:
830 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
831 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
832 VVVdouble pxy;
833 bool first;
834 while (mit->hasNext())
835 {
836 auto bmd = mit->next();
837 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
838 // compute all nxy first:
839 VVVVdouble nxy(nbClasses);
840 for (size_t c = 0; c < nbClasses; ++c)
841 {
842 double rc = rcRates[c];
843 VVVdouble& nxy_c = nxy[c];
844 nxy_c.resize(nbTypes);
845 for (size_t t = 0; t < nbTypes; ++t)
846 {
847 VVdouble& nxy_c_t = nxy_c[t];
848 nxy_c_t.resize(nbStates);
849 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
850 for (size_t x = 0; x < nbStates; ++x)
851 {
852 Vdouble& nxy_c_t_x = nxy_c_t[x];
853 nxy_c_t_x.resize(nbStates);
854 for (size_t y = 0; y < nbStates; ++y)
855 {
856 nxy_c_t_x[y] = (*nijt)(x, y);
857 }
858 }
859 }
860 }
861
862 // Now loop over sites:
863 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
864 first = true;
865 while (sit->hasNext())
866 {
867 size_t i = sit->next();
868 // We retrieve the transition probabilities and substitution counts for this site partition:
869 if (first)
870 {
871 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
872 first = false;
873 }
874 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
875 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
876 RowMatrix<double> pairProbabilities(nbStates, nbStates);
877 MatrixTools::fill(pairProbabilities, 0.);
878 VVVdouble subsCounts(nbStates);
879 for (size_t j = 0; j < nbStates; ++j)
880 {
881 subsCounts[j].resize(nbStates);
882 for (size_t k = 0; k < nbStates; ++k)
883 {
884 subsCounts[j][k].resize(nbTypes);
885 }
886 }
887 for (size_t c = 0; c < nbClasses; ++c)
888 {
889 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
890 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
891 const VVdouble& pxy_c = pxy[c];
892 VVVdouble& nxy_c = nxy[c];
893 for (size_t x = 0; x < nbStates; ++x)
894 {
895 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
896 const Vdouble& pxy_c_x = pxy_c[x];
897 for (size_t y = 0; y < nbStates; ++y)
898 {
899 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
900 * pxy_c_x[y]
901 * likelihoodsFather_node_i_c[y];
902 pairProbabilities(x, y) += likelihood_cxy; // Sum over all rate classes.
903 for (size_t t = 0; t < nbTypes; ++t)
904 {
905 subsCounts[x][y][t] += likelihood_cxy * nxy_c[t][x][y];
906 }
907 }
908 }
909 }
910 // Now the vector computation:
911 // Here we do not average over all possible pair of ancestral states,
912 // We only consider the one with max likelihood:
913 vector<size_t> xy = MatrixTools::whichMax(pairProbabilities);
914 for (size_t t = 0; t < nbTypes; ++t)
915 {
916 substitutionsForCurrentNode[i][t] += subsCounts[xy[0]][xy[1]][t] / pairProbabilities(xy[0], xy[1]);
917 }
918 }
919 }
920 // Now we just have to copy the substitutions into the result vector:
921 for (size_t i = 0; i < nbSites; i++)
922 {
923 for (size_t t = 0; t < nbTypes; t++)
924 {
925 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
926 }
927 }
928 }
929 if (verbose)
930 {
934 }
935 return substitutions;
936}
937
938/**************************************************************************************************/
939
941 shared_ptr<const DRTreeLikelihoodInterface> drtl,
942 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
943 bool verbose)
944{
945 // Preamble:
946 if (!drtl->isInitialized())
947 throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(). Likelihood object is not initialized.");
948
949 // A few variables we'll need:
950
951 const TreeTemplate<Node> tree(drtl->tree());
952 auto& sequences = drtl->data();
953 auto& rDist = drtl->rateDistribution();
954 auto alpha = sequences.getAlphabet();
955
956 size_t nbSites = sequences.getNumberOfSites();
957 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
958 size_t nbStates = alpha->getSize();
959 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
960 vector<const Node*> nodes = tree.getNodes();
961 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
962 nodes.pop_back(); // Remove root node.
963 size_t nbNodes = nodes.size();
964
965 // We create a new ProbabilisticSubstitutionMapping object:
966 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
967
968 // Compute the whole likelihood of the tree according to the specified model:
969
970 Vdouble rcRates = rDist.getCategories();
971
972 // Compute the number of substitutions for each class and each branch in the tree:
973 if (verbose)
974 ApplicationTools::displayTask("Compute marginal ancestral states");
976 map<int, vector<size_t>> ancestors = masr.getAllAncestralStates();
977
978 if (verbose)
980
981 // Now we just have to compute the substitution vectors:
982 if (verbose)
983 ApplicationTools::displayTask("Compute substitution vectors", true);
984
985 for (size_t l = 0; l < nbNodes; l++)
986 {
987 const Node* currentNode = nodes[l];
988
989 const Node* father = currentNode->getFather();
990
991 double d = currentNode->getDistanceToFather();
992
993 vector<size_t> nodeStates = ancestors[currentNode->getId()]; // These are not 'true' ancestors ;)
994 vector<size_t> fatherStates = ancestors[father->getId()];
995
996 // For each node,
997 if (verbose)
998 ApplicationTools::displayGauge(l, nbNodes - 1);
999 VVdouble substitutionsForCurrentNode(nbDistinctSites);
1000 for (size_t i = 0; i < nbDistinctSites; ++i)
1001 {
1002 substitutionsForCurrentNode[i].resize(nbTypes);
1003 }
1004
1005 // Here, we have no likelihood computation to do!
1006
1007 // Then, we deal with the node of interest.
1008 // ('y' is the state at 'node' and 'x' the state at 'father'.)
1009 // Iterate over all site partitions:
1010 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
1011 while (mit->hasNext())
1012 {
1013 auto bmd = mit->next();
1014 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1015 // compute all nxy first:
1016 VVVdouble nxyt(nbTypes);
1017 for (size_t t = 0; t < nbTypes; ++t)
1018 {
1019 nxyt[t].resize(nbStates);
1020 auto nxy = substitutionCount->getAllNumbersOfSubstitutions(d, t + 1);
1021 for (size_t x = 0; x < nbStates; ++x)
1022 {
1023 nxyt[t][x].resize(nbStates);
1024 for (size_t y = 0; y < nbStates; ++y)
1025 {
1026 nxyt[t][x][y] = (*nxy)(x, y);
1027 }
1028 }
1029 }
1030 // Now loop over sites:
1031 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1032 while (sit->hasNext())
1033 {
1034 size_t i = sit->next();
1035 size_t fatherState = fatherStates[i];
1036 size_t nodeState = nodeStates[i];
1037 if (fatherState >= nbStates || nodeState >= nbStates)
1038 for (size_t t = 0; t < nbTypes; ++t)
1039 {
1040 substitutionsForCurrentNode[i][t] = 0;
1041 }// To be conservative! Only in case there are generic characters.
1042 else
1043 for (size_t t = 0; t < nbTypes; ++t)
1044 {
1045 substitutionsForCurrentNode[i][t] = nxyt[t][fatherState][nodeState];
1046 }
1047 }
1048 }
1049
1050 // Now we just have to copy the substitutions into the result vector:
1051 for (size_t i = 0; i < nbSites; i++)
1052 {
1053 for (size_t t = 0; t < nbTypes; t++)
1054 {
1055 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1056 }
1057 }
1058 }
1059 if (verbose)
1060 {
1064 }
1065 return substitutions;
1066}
1067
1068/**************************************************************************************************/
1069
1070unique_ptr<LegacyProbabilisticSubstitutionMapping> LegacySubstitutionMappingTools::computeSubstitutionVectorsMarginal(
1071 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
1072 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
1073 bool verbose)
1074{
1075 // Preamble:
1076 if (!drtl->isInitialized())
1077 throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsMarginal(). Likelihood object is not initialized.");
1078
1079 // A few variables we'll need:
1080
1081 const TreeTemplate<Node> tree(drtl->tree());
1082 auto& sequences = drtl->data();
1083 auto& rDist = drtl->rateDistribution();
1084
1085 size_t nbSites = sequences.getNumberOfSites();
1086 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
1087 size_t nbStates = sequences.alphabet().getSize();
1088 size_t nbClasses = rDist.getNumberOfCategories();
1089 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
1090 vector<const Node*> nodes = tree.getNodes();
1091 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
1092 nodes.pop_back(); // Remove root node.
1093 size_t nbNodes = nodes.size();
1094
1095 // We create a new ProbabilisticSubstitutionMapping object:
1096 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
1097
1098 // Compute the whole likelihood of the tree according to the specified model:
1099
1100 Vdouble rcProbs = rDist.getProbabilities();
1101 Vdouble rcRates = rDist.getCategories();
1102
1103 // II) Compute the number of substitutions for each class and each branch in the tree:
1104 if (verbose)
1105 ApplicationTools::displayTask("Compute marginal node-pairs likelihoods", true);
1106
1107 for (size_t l = 0; l < nbNodes; l++)
1108 {
1109 const Node* currentNode = nodes[l];
1110
1111 const Node* father = currentNode->getFather();
1112
1113 double d = currentNode->getDistanceToFather();
1114
1115 // For each node,
1116 if (verbose)
1117 ApplicationTools::displayGauge(l, nbNodes - 1);
1118 VVdouble substitutionsForCurrentNode(nbDistinctSites);
1119 for (size_t i = 0; i < nbDistinctSites; ++i)
1120 {
1121 substitutionsForCurrentNode[i].resize(nbTypes);
1122 }
1123
1124 // Then, we deal with the node of interest.
1125 // ('y' is the state at 'node' and 'x' the state at 'father'.)
1128
1129 // Iterate over all site partitions:
1130 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
1131 while (mit->hasNext())
1132 {
1133 auto bmd = mit->next();
1134 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1135 // compute all nxy first:
1136 VVVVdouble nxy(nbClasses);
1137 for (size_t c = 0; c < nbClasses; ++c)
1138 {
1139 VVVdouble& nxy_c = nxy[c];
1140 double rc = rcRates[c];
1141 nxy_c.resize(nbTypes);
1142 for (size_t t = 0; t < nbTypes; ++t)
1143 {
1144 VVdouble& nxy_c_t = nxy_c[t];
1145 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
1146 nxy_c_t.resize(nbStates);
1147 for (size_t x = 0; x < nbStates; ++x)
1148 {
1149 Vdouble& nxy_c_t_x = nxy_c_t[x];
1150 nxy_c_t_x.resize(nbStates);
1151 for (size_t y = 0; y < nbStates; ++y)
1152 {
1153 nxy_c_t_x[y] = (*nijt)(x, y);
1154 }
1155 }
1156 }
1157 }
1158
1159 // Now loop over sites:
1160 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1161 while (sit->hasNext())
1162 {
1163 size_t i = sit->next();
1164 VVdouble& probsNode_i = probsNode[i];
1165 VVdouble& probsFather_i = probsFather[i];
1166 for (size_t c = 0; c < nbClasses; ++c)
1167 {
1168 Vdouble& probsNode_i_c = probsNode_i[c];
1169 Vdouble& probsFather_i_c = probsFather_i[c];
1170 VVVdouble& nxy_c = nxy[c];
1171 for (size_t x = 0; x < nbStates; ++x)
1172 {
1173 for (size_t y = 0; y < nbStates; ++y)
1174 {
1175 double prob_cxy = probsFather_i_c[x] * probsNode_i_c[y];
1176 // Now the vector computation:
1177 for (size_t t = 0; t < nbTypes; ++t)
1178 {
1179 substitutionsForCurrentNode[i][t] += prob_cxy * nxy_c[t][x][y];
1180 // <------> <------------>
1181 // Posterior probability | |
1182 // for site i and rate class c * | |
1183 // likelihood for this site--------------+ |
1184 // |
1185 // Substitution function for site i and rate class c---+
1186 }
1187 }
1188 }
1189 }
1190 }
1191 }
1192
1193 // Now we just have to copy the substitutions into the result vector:
1194 for (size_t i = 0; i < nbSites; ++i)
1195 {
1196 for (size_t t = 0; t < nbTypes; ++t)
1197 {
1198 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1199 }
1200 }
1201 }
1202 if (verbose)
1203 {
1207 }
1208 return substitutions;
1209}
1210
1211/**************************************************************************************************/
1212
1214 const LegacyProbabilisticSubstitutionMapping& substitutions,
1215 const SiteContainerInterface& sites,
1216 size_t type,
1217 ostream& out)
1218{
1219 if (!out)
1220 throw IOException("LegacySubstitutionMappingTools::writeToFile. Can't write to stream.");
1221 out << "Branches";
1222 out << "\tMean";
1223 for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1224 {
1225 out << "\tSite" << sites.site(i).getCoordinate();
1226 }
1227 out << endl;
1228
1229 for (size_t j = 0; j < substitutions.getNumberOfBranches(); j++)
1230 {
1231 out << substitutions.getNode(j)->getId() << "\t" << substitutions.getNode(j)->getDistanceToFather();
1232 for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1233 {
1234 out << "\t" << substitutions(j, i, type);
1235 }
1236 out << endl;
1237 }
1238}
1239
1240/**************************************************************************************************/
1241
1243 istream& in,
1245 size_t type)
1246{
1247 try
1248 {
1249 auto data = DataTable::read(in, "\t", true, -1);
1250 vector<string> ids = data->getColumn(0);
1251 data->deleteColumn(0); // Remove ids
1252 data->deleteColumn(0); // Remove means
1253 // Now parse the table:
1254 size_t nbSites = data->getNumberOfColumns();
1255 substitutions.setNumberOfSites(nbSites);
1256 size_t nbBranches = data->getNumberOfRows();
1257 for (size_t i = 0; i < nbBranches; i++)
1258 {
1259 int id = TextTools::toInt(ids[i]);
1260 size_t br = substitutions.getNodeIndex(id);
1261 for (size_t j = 0; j < nbSites; j++)
1262 {
1263 substitutions(br, j, type) = TextTools::toDouble((*data)(i, j));
1264 }
1265 }
1266 // Parse the header:
1267 for (size_t i = 0; i < nbSites; i++)
1268 {
1269 string siteTxt = data->getColumnName(i);
1270 int site = 0;
1271 if (siteTxt.substr(0, 4) == "Site")
1272 site = TextTools::to<int>(siteTxt.substr(4));
1273 else
1274 site = TextTools::to<int>(siteTxt);
1275 substitutions.setSitePosition(i, site);
1276 }
1277 }
1278 catch (Exception& e)
1279 {
1280 throw IOException(string("Bad input file. ") + e.what());
1281 }
1282}
1283
1284/**************************************************************************************************/
1285
1288 size_t siteIndex)
1289{
1290 size_t nbBranches = smap.getNumberOfBranches();
1291 size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1292 Vdouble v(nbBranches);
1293 for (size_t l = 0; l < nbBranches; ++l)
1294 {
1295 v[l] = 0;
1296 for (size_t t = 0; t < nbTypes; ++t)
1297 {
1298 v[l] += smap(l, siteIndex, t);
1299 }
1300 }
1301 return v;
1302}
1303
1304/**************************************************************************************************/
1305
1308 size_t siteIndex)
1309{
1310 size_t nbBranches = smap.getNumberOfBranches();
1311 size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1312 Vdouble v(nbTypes);
1313 for (size_t t = 0; t < nbTypes; ++t)
1314 {
1315 v[t] = 0;
1316 for (size_t l = 0; l < nbBranches; ++l)
1317 {
1318 v[t] += smap(l, siteIndex, t);
1319 }
1320 }
1321 return v;
1322}
1323
1324/**************************************************************************************************/
1325
1328 size_t siteIndex)
1329{
1330 double sumSquare = 0;
1331 for (size_t l = 0; l < smap.getNumberOfBranches(); ++l)
1332 {
1333 double sum = 0;
1334 for (size_t t = 0; t < smap.getNumberOfSubstitutionTypes(); ++t)
1335 {
1336 sum += smap(l, siteIndex, t);
1337 }
1338 sumSquare += sum * sum;
1339 }
1340 return sqrt(sumSquare);
1341}
1342
1343/**************************************************************************************************/
1344
1347 size_t branchIndex)
1348{
1349 size_t nbSites = smap.getNumberOfSites();
1350 size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1351 Vdouble v(nbTypes, 0);
1352 for (size_t i = 0; i < nbSites; ++i)
1353 {
1354 for (size_t t = 0; t < nbTypes; ++t)
1355 {
1356 v[t] += smap(branchIndex, i, t);
1357 }
1358 }
1359 return v;
1360}
1361
1362/**************************************************************************************************/
1363
1366 size_t siteIndex)
1367{
1368 size_t nbBranches = smap.getNumberOfBranches();
1369 size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1370 Vdouble v(nbTypes, 0);
1371 for (size_t i = 0; i < nbBranches; ++i)
1372 {
1373 for (size_t t = 0; t < nbTypes; ++t)
1374 {
1375 v[t] += smap(i, siteIndex, t);
1376 }
1377 }
1378 return v;
1379}
1380
1381/**************************************************************************************************/
1382
1384 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1385 const vector<int>& ids,
1386 std::shared_ptr<SubstitutionModelInterface> model,
1387 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1388 double threshold,
1389 bool verbose)
1390{
1391 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1392 auto mapping = LegacySubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, count, false);
1393
1394 vector< vector<double>> counts(ids.size());
1395 size_t nbSites = mapping->getNumberOfSites();
1396 size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1397
1398 for (size_t k = 0; k < ids.size(); ++k)
1399 {
1400 vector<double> countsf(nbTypes, 0);
1401 vector<double> tmp(nbTypes, 0);
1402 size_t nbIgnored = 0;
1403 bool error = false;
1404 for (size_t i = 0; !error && i < nbSites; ++i)
1405 {
1406 double s = 0;
1407 for (size_t t = 0; t < nbTypes; ++t)
1408 {
1409 tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1410 error = std::isnan(tmp[t]);
1411 if (error)
1412 goto ERROR;
1413 s += tmp[t];
1414 }
1415 if (threshold >= 0)
1416 {
1417 if (s <= threshold)
1418 countsf += tmp;
1419 else
1420 {
1421 nbIgnored++;
1422 }
1423 }
1424 else
1425 {
1426 countsf += tmp;
1427 }
1428 }
1429
1430ERROR:
1431 if (error)
1432 {
1433 // We do nothing. This happens for small branches.
1434 if (verbose)
1435 ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1436 for (size_t t = 0; t < nbTypes; ++t)
1437 {
1438 countsf[t] = 0;
1439 }
1440 }
1441 else
1442 {
1443 if (nbIgnored > 0)
1444 {
1445 if (verbose)
1446 ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1447 }
1448 }
1449
1450 counts[k].resize(countsf.size());
1451 for (size_t j = 0; j < countsf.size(); ++j)
1452 {
1453 counts[k][j] = countsf[j];
1454 }
1455 }
1456
1457 return counts;
1458}
1459
1460/**************************************************************************************************/
1461
1463 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1464 const vector<int>& ids,
1465 const SubstitutionModelSet& modelSet,
1466 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1467 double threshold,
1468 bool verbose)
1469{
1470 auto count = make_shared<UniformizationSubstitutionCount>(modelSet.getSubstitutionModel(0), reg);
1471
1472 auto mapping = LegacySubstitutionMappingTools::computeSubstitutionVectors(drtl, modelSet, ids, count, false);
1473
1474 vector< vector<double>> counts(ids.size());
1475 size_t nbSites = mapping->getNumberOfSites();
1476 size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1477
1478 for (size_t k = 0; k < ids.size(); ++k)
1479 {
1480 vector<double> countsf(nbTypes, 0);
1481 vector<double> tmp(nbTypes, 0);
1482 size_t nbIgnored = 0;
1483 bool error = false;
1484 for (size_t i = 0; !error && i < nbSites; ++i)
1485 {
1486 double s = 0;
1487 for (size_t t = 0; t < nbTypes; ++t)
1488 {
1489 tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1490 error = std::isnan(tmp[t]);
1491 if (error)
1492 goto ERROR;
1493 s += tmp[t];
1494 }
1495 if (threshold >= 0)
1496 {
1497 if (s <= threshold)
1498 countsf += tmp;
1499 else
1500 {
1501 nbIgnored++;
1502 }
1503 }
1504 else
1505 {
1506 countsf += tmp;
1507 }
1508 }
1509
1510ERROR:
1511 if (error)
1512 {
1513 // We do nothing. This happens for small branches.
1514 if (verbose)
1515 ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1516 for (size_t t = 0; t < nbTypes; ++t)
1517 {
1518 countsf[t] = 0;
1519 }
1520 }
1521 else
1522 {
1523 if (nbIgnored > 0)
1524 {
1525 if (verbose)
1526 ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1527 }
1528 }
1529
1530 counts[k].resize(countsf.size());
1531 for (size_t j = 0; j < countsf.size(); ++j)
1532 {
1533 counts[k][j] = countsf[j];
1534 }
1535 }
1536
1537 return counts;
1538}
1539
1540/**************************************************************************************************/
1541
1543 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1544 const vector<int>& ids,
1545 std::shared_ptr<const SubstitutionModelInterface> nullModel,
1547 bool verbose)
1548{
1549 size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1550 size_t nbStates = nullModel->alphabet().getSize();
1551 size_t nbSites = drtl->getNumberOfSites();
1552 vector<int> supportedStates = nullModel->getAlphabetStates();
1553
1554 // compute the AlphabetIndex for each substitutionType
1555 vector<shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1556
1557 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1558 {
1559 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1560 for (size_t i = 0; i < nbStates; i++)
1561 {
1562 usai[nbt]->setIndex(supportedStates[i], 0);
1563 }
1564 }
1565
1566 for (size_t i = 0; i < nbStates; i++)
1567 {
1568 for (size_t j = 0; j < nbStates; j++)
1569 {
1570 if (i != j)
1571 {
1572 size_t nbt = reg.getType(i, j);
1573 if (nbt != 0)
1574 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1575 }
1576 }
1577 }
1578
1579 // compute the normalization for each substitutionType
1580 vector<vector<double>> rewards(ids.size());
1581
1582 for (size_t k = 0; k < ids.size(); ++k)
1583 {
1584 rewards[k].resize(nbTypes);
1585 }
1586
1587 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1588 {
1589 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1590 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
1591
1592 for (size_t k = 0; k < ids.size(); ++k)
1593 {
1594 double s = 0;
1595 for (size_t i = 0; i < nbSites; ++i)
1596 {
1597 double tmp = (*mapping)(k, i);
1598 if (std::isnan(tmp))
1599 {
1600 if (verbose)
1601 ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1602 s = 0;
1603 break;
1604 }
1605 s += tmp;
1606 }
1607 rewards[k][nbt] = s;
1608 }
1609 }
1610 return rewards;
1611}
1612
1613/**************************************************************************************************/
1614
1616 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1617 const vector<int>& ids,
1618 std::shared_ptr<const SubstitutionModelSet> nullModelSet,
1620 bool verbose)
1621{
1622 size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1623 size_t nbStates = nullModelSet->alphabet().getSize();
1624 size_t nbSites = drtl->getNumberOfSites();
1625 size_t nbModels = nullModelSet->getNumberOfModels();
1626
1627 // compute the AlphabetIndex for each substitutionType
1628 // compute the normalization for each substitutionType
1629 vector<vector<double>> rewards(ids.size());
1630
1631 for (size_t k = 0; k < ids.size(); ++k)
1632 {
1633 rewards[k].resize(nbTypes);
1634 }
1635
1636 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1637 for (size_t i = 0; i < nbTypes; ++i)
1638 {
1639 usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
1640 }
1641
1642 for (size_t nbm = 0; nbm < nbModels; nbm++)
1643 {
1644 vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
1645
1646 if (mids.size() > 0)
1647 {
1648 auto modn = nullModelSet->getSubstitutionModel(nbm);
1649 vector<int> supportedStates = modn->getAlphabetStates();
1650
1651 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1652 {
1653 for (size_t i = 0; i < nbStates; i++)
1654 {
1655 usai[nbt]->setIndex(supportedStates[i], 0);
1656 }
1657 }
1658
1659 for (size_t i = 0; i < nbStates; i++)
1660 {
1661 for (size_t j = 0; j < nbStates; j++)
1662 {
1663 if (i != j)
1664 {
1665 size_t nbt = reg.getType(i, j);
1666 if (nbt != 0)
1667 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
1668 }
1669 }
1670 }
1671
1672 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1673 {
1674 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
1675 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
1676
1677 for (size_t k = 0; k < mids.size(); k++)
1678 {
1679 double s = 0;
1680 for (size_t i = 0; i < nbSites; ++i)
1681 {
1682 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
1683 if (std::isnan(tmp))
1684 {
1685 if (verbose)
1686 ApplicationTools::displayWarning("On branch " + TextTools::toString(mids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1687 s = 0;
1688 break;
1689 }
1690 else
1691 s += tmp;
1692 }
1693
1694 rewards[VectorTools::which(ids, mids[k])][nbt] = s;
1695 }
1696 }
1697 }
1698 }
1699
1700 return rewards;
1701}
1702
1703/**************************************************************************************************/
1704
1706 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1707 const vector<int>& ids,
1708 std::shared_ptr<SubstitutionModelInterface> model,
1709 std::shared_ptr<SubstitutionModelInterface> nullModel,
1710 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1711 VVdouble& result,
1712 bool perTime,
1713 bool perWord,
1714 bool verbose)
1715{
1716 vector<vector<double>> factors;
1717
1718 result = getCountsPerBranch(drtl, ids, model, reg, -1, verbose);
1719 factors = getNormalizationsPerBranch(drtl, ids, nullModel, *reg, verbose);
1720
1721
1722 // check if perWord
1723
1724 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
1725
1726 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1727
1728
1729 size_t nbTypes = result[0].size();
1730
1731 for (size_t k = 0; k < ids.size(); ++k)
1732 {
1733 for (size_t t = 0; t < nbTypes; ++t)
1734 {
1735 if (factors[k][t] != 0)
1736 result[k][t] /= (factors[k][t] * sizeWord);
1737 }
1738 }
1739
1740 // Multiply by the lengths of the branches of the input tree
1741
1742 if (!perTime)
1743 {
1744 const TreeTemplate<Node> tree(drtl->tree());
1745
1746 for (size_t k = 0; k < ids.size(); ++k)
1747 {
1748 double l = tree.getNode(ids[k])->getDistanceToFather();
1749 for (size_t t = 0; t < nbTypes; ++t)
1750 {
1751 result[k][t] *= l;
1752 }
1753 }
1754 }
1755}
1756
1757/**************************************************************************************************/
1758
1760 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1761 const vector<int>& ids,
1762 std::shared_ptr<SubstitutionModelSet> modelSet,
1763 std::shared_ptr<SubstitutionModelSet> nullModelSet,
1764 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1765 VVdouble& result,
1766 bool perTime,
1767 bool perWord,
1768 bool verbose)
1769{
1770 vector<vector<double>> factors;
1771
1772 result = getCountsPerBranch(drtl, ids, *modelSet, reg, -1, verbose);
1773 factors = getNormalizationsPerBranch(drtl, ids, nullModelSet, *reg, verbose);
1774
1775 size_t nbTypes = result[0].size();
1776
1777 // check if perWord
1778
1779 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
1780
1781 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1782
1783
1784 for (size_t k = 0; k < ids.size(); ++k)
1785 {
1786 for (size_t t = 0; t < nbTypes; ++t)
1787 {
1788 if (factors[k][t] != 0)
1789 result[k][t] /= (factors[k][t] * sizeWord);
1790 }
1791 }
1792
1793 // Multiply by the lengths of the branches of the input tree
1794
1795 if (!perTime)
1796 {
1797 const TreeTemplate<Node> tree(drtl->tree());
1798
1799 for (size_t k = 0; k < ids.size(); ++k)
1800 {
1801 double l = tree.getNode(ids[k])->getDistanceToFather();
1802 for (size_t t = 0; t < nbTypes; ++t)
1803 {
1804 result[k][t] *= l;
1805 }
1806 }
1807 }
1808}
1809
1810/**************************************************************************************************/
1811
1813 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1814 const vector<int>& ids,
1815 std::shared_ptr<SubstitutionModelInterface> model,
1816 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1817 VVdouble& result,
1818 double threshold,
1819 bool verbose)
1820{
1821 result = getCountsPerBranch(drtl, ids, model, reg, threshold, verbose);
1822
1823 auto creg = dynamic_pointer_cast<const CategorySubstitutionRegister>(reg);
1824
1825 if (creg && !creg->isStationary())
1826 {
1827 size_t nbTypes = result[0].size();
1828
1829 for (size_t k = 0; k < ids.size(); ++k)
1830 {
1831 vector<double> freqs = DRTreeLikelihoodTools::getPosteriorStateFrequencies(*drtl, ids[k]);
1832 // Compute frequencies for types:
1833 vector<double> freqsTypes(creg->getNumberOfCategories());
1834 for (size_t i = 0; i < freqs.size(); ++i)
1835 {
1836 size_t c = creg->getCategory(i);
1837 freqsTypes[c - 1] += freqs[i];
1838 }
1839
1840 // We divide the counts by the frequencies and rescale:
1841 double s = VectorTools::sum(result[k]);
1842 for (size_t t = 0; t < nbTypes; ++t)
1843 {
1844 result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1845 }
1846
1847 double s2 = VectorTools::sum(result[k]);
1848 // Scale:
1849 result[k] = (result[k] / s2) * s;
1850 }
1851 }
1852}
1853
1854/**************************************************************************************************/
1855
1857 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1858 const vector<int>& ids,
1859 std::shared_ptr<SubstitutionModelInterface> model,
1860 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1861 VVdouble& result)
1862{
1863 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1865
1866 size_t nbSites = smap->getNumberOfSites();
1867 size_t nbBr = ids.size();
1868
1869 VectorTools::resize2(result, nbSites, nbBr);
1870
1871 vector<size_t> sdi(nbBr); // reverse of ids
1872 for (size_t i = 0; i < nbBr; ++i)
1873 {
1874 sdi[i] = smap->getNodeIndex(ids[i]);
1875 }
1876
1877 for (size_t k = 0; k < nbSites; ++k)
1878 {
1880 Vdouble* resS = &result[k];
1881
1882 for (size_t i = 0; i < nbBr; ++i)
1883 {
1884 (*resS)[i] = countsf[sdi[i]];
1885 }
1886 }
1887}
1888
1889
1890/**************************************************************************************************/
1891
1893 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1894 const vector<int>& ids,
1895 std::shared_ptr<SubstitutionModelInterface> model,
1896 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1897 VVdouble& result)
1898{
1899 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1901
1902 size_t nbSites = smap->getNumberOfSites();
1903 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1904
1905 VectorTools::resize2(result, nbSites, nbTypes);
1906
1907 vector<unique_ptr<LegacyProbabilisticRewardMapping>> rmap;
1908
1909 for (size_t k = 0; k < nbSites; ++k)
1910 {
1912
1913 Vdouble* resS = &result[k];
1914
1915 for (size_t i = 0; i < nbTypes; ++i)
1916 {
1917 (*resS)[i] = countsf[i];
1918 }
1919 }
1920}
1921
1922
1923/**************************************************************************************************/
1924
1926 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1927 const vector<int>& ids,
1928 std::shared_ptr<SubstitutionModelInterface> model,
1929 std::shared_ptr<SubstitutionModelInterface> nullModel,
1930 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1931 VVdouble& result,
1932 bool perTime,
1933 bool perWord)
1934{
1935 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1937
1938 size_t nbSites = smap->getNumberOfSites();
1939 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1940 size_t nbStates = nullModel->alphabet().getSize();
1941 vector<int> supportedStates = nullModel->getAlphabetStates();
1942
1943 VectorTools::resize2(result, nbSites, nbTypes);
1944
1945 // compute the AlphabetIndex for each substitutionType
1946 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1947
1948 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1949 {
1950 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1951 for (size_t i = 0; i < nbStates; i++)
1952 {
1953 usai[nbt]->setIndex(supportedStates[i], 0);
1954 }
1955 }
1956
1957 for (size_t i = 0; i < nbStates; i++)
1958 {
1959 for (size_t j = 0; j < nbStates; j++)
1960 {
1961 if (i != j)
1962 {
1963 size_t nbt = reg->getType(i, j);
1964 if (nbt != 0)
1965 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1966 }
1967 }
1968 }
1969
1970 // compute the normalization for each site for each substitutionType
1971 vector<vector<double>> rewards(nbSites);
1972
1973 for (size_t k = 0; k < nbSites; ++k)
1974 {
1975 rewards[k].resize(nbTypes);
1976 }
1977
1978 for (size_t nbt = 0; nbt < nbTypes; nbt++)
1979 {
1980 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1981 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
1982
1983 for (size_t i = 0; i < nbSites; ++i)
1984 {
1985 double s = 0;
1986 for (size_t k = 0; k < ids.size(); ++k)
1987 {
1988 double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
1989 if (std::isnan(tmp))
1990 {
1991 s = 0;
1992 break;
1993 }
1994 s += tmp;
1995 }
1996 rewards[i][nbt] = s;
1997 }
1998 }
1999
2000 // Compute the sum of lengths of concerned branches
2001
2002 double brlen = 0;
2003
2004 if (!perTime)
2005 {
2006 const TreeTemplate<Node> tree(drtl->tree());
2007 for (size_t k = 0; k < ids.size(); ++k)
2008 {
2009 brlen += tree.getNode(ids[k])->getDistanceToFather();
2010 }
2011 }
2012 else
2013 brlen = 1;
2014
2015 // check if perWord
2016
2017 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2018
2019 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2020
2021 // output
2022
2023 for (size_t k = 0; k < nbSites; ++k)
2024 {
2026
2027 Vdouble& resS = result[k];
2028 for (size_t i = 0; i < nbTypes; ++i)
2029 {
2030 resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2031 }
2032 }
2033}
2034
2035
2036/**************************************************************************************************/
2037
2039 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2040 const vector<int>& ids,
2041 std::shared_ptr<SubstitutionModelSet> modelSet,
2042 std::shared_ptr<SubstitutionModelSet> nullModelSet,
2043 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2044 VVdouble& result,
2045 bool perTime,
2046 bool perWord)
2047{
2048 auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2050
2051 size_t nbSites = smap->getNumberOfSites();
2052 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2053 size_t nbStates = nullModelSet->alphabet().getSize();
2054 size_t nbModels = nullModelSet->getNumberOfModels();
2055
2056 VectorTools::resize2(result, nbSites, nbTypes);
2057
2058 // compute the normalization for each site for each substitutionType
2059 vector< vector<double>> rewards(nbSites);
2060
2061 for (size_t k = 0; k < nbSites; ++k)
2062 {
2063 rewards[k].resize(nbTypes);
2064 }
2065
2066 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2067
2068 for (size_t nbm = 0; nbm < nbModels; nbm++)
2069 {
2070 vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2071
2072 if (mids.size() > 0)
2073 {
2074 const std::shared_ptr<SubstitutionModelInterface> modn = nullModelSet->getSubstitutionModel(nbm);
2075 vector<int> supportedStates = modn->getAlphabetStates();
2076
2077 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2078 {
2079 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2080 for (size_t i = 0; i < nbStates; i++)
2081 {
2082 usai[nbt]->setIndex(supportedStates[i], 0);
2083 }
2084 }
2085
2086 for (size_t i = 0; i < nbStates; i++)
2087 {
2088 for (size_t j = 0; j < nbStates; j++)
2089 {
2090 if (i != j)
2091 {
2092 size_t nbt = reg->getType(i, j);
2093 if (nbt != 0)
2094 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2095 }
2096 }
2097 }
2098
2099 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2100 {
2101 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2102 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
2103
2104 for (size_t i = 0; i < nbSites; ++i)
2105 {
2106 double s = 0;
2107 for (size_t k = 0; k < mids.size(); k++)
2108 {
2109 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2110
2111 if (std::isnan(tmp))
2112 {
2113 s = 0;
2114 break;
2115 }
2116 else
2117 s += tmp;
2118 }
2119 rewards[i][nbt] += s;
2120 }
2121 }
2122 }
2123 }
2124
2125 // Compute the sum of lengths of concerned branches
2126
2127
2128 double brlen = 0;
2129
2130 if (!perTime)
2131 {
2132 const TreeTemplate<Node> tree(drtl->tree());
2133 for (size_t k = 0; k < ids.size(); ++k)
2134 {
2135 brlen += tree.getNode(ids[k])->getDistanceToFather();
2136 }
2137 }
2138 else
2139 brlen = 1.;
2140
2141
2142 // check if perWord
2143 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2144 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2145
2146 // output
2147
2148 for (size_t k = 0; k < nbSites; ++k)
2149 {
2151
2152 Vdouble& resS = result[k];
2153
2154 for (size_t i = 0; i < nbTypes; ++i)
2155 {
2156 resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2157 }
2158 }
2159}
2160
2161
2162/**************************************************************************************************/
2163
2165 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2166 const vector<int>& ids,
2167 std::shared_ptr<SubstitutionModelInterface> model,
2168 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2169 VVVdouble& result)
2170{
2171 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2173
2174 size_t nbSites = smap->getNumberOfSites();
2175 size_t nbBr = ids.size();
2176 size_t nbTypes = reg->getNumberOfSubstitutionTypes();
2177
2178 VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2179
2180 for (size_t i = 0; i < nbTypes; ++i)
2181 {
2182 for (size_t j = 0; j < nbSites; ++j)
2183 {
2184 VVdouble& resS = result[j];
2185
2186 for (size_t k = 0; k < nbBr; ++k)
2187 {
2188 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i);
2189 }
2190 }
2191 }
2192}
2193
2194/**************************************************************************************************/
2195
2197 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2198 const vector<int>& ids,
2199 std::shared_ptr<SubstitutionModelInterface> model,
2200 std::shared_ptr<SubstitutionModelInterface> nullModel,
2201 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2202 VVVdouble& result,
2203 bool perTime,
2204 bool perWord)
2205{
2206 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2208
2209 size_t nbSites = smap->getNumberOfSites();
2210 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2211 size_t nbStates = nullModel->alphabet().getSize();
2212 size_t nbBr = ids.size();
2213
2214 VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2215
2216 // compute the normalization for each site for each substitutionType
2217 map<int, vector< vector<double>>> rewards;
2218
2219 for (auto id : ids)
2220 {
2221 VectorTools::resize2(rewards[id], nbSites, nbTypes);
2222 }
2223
2224 vector<int> supportedStates = nullModel->getAlphabetStates();
2225
2226 // compute the AlphabetIndex for each substitutionType
2227 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2228
2229 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2230 {
2231 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
2232 for (size_t i = 0; i < nbStates; i++)
2233 {
2234 usai[nbt]->setIndex(supportedStates[i], 0);
2235 }
2236 }
2237
2238 for (size_t i = 0; i < nbStates; i++)
2239 {
2240 for (size_t j = 0; j < nbStates; j++)
2241 {
2242 if (i != j)
2243 {
2244 size_t nbt = reg->getType(i, j);
2245 if (nbt != 0)
2246 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
2247 }
2248 }
2249 }
2250
2251 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2252 {
2253 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
2254 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, ids, reward, false);
2255
2256 for (size_t i = 0; i < nbSites; ++i)
2257 {
2258 for (size_t k = 0; k < ids.size(); ++k)
2259 {
2260 double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
2261
2262 if (std::isnan(tmp))
2263 tmp = 0;
2264
2265 rewards[ids[k]][i][nbt] = tmp;
2266 }
2267 }
2268 }
2269
2270 // check if perWord
2271 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2272 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2273
2274
2275 // output
2276 const TreeTemplate<Node>& tree = drtl->tree();
2277
2278 for (size_t i = 0; i < nbTypes; ++i)
2279 {
2280 for (size_t j = 0; j < nbSites; ++j)
2281 {
2282 VVdouble& resS = result[j];
2283
2284 for (size_t k = 0; k < nbBr; ++k)
2285 {
2286 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.getNode(ids[k])->getDistanceToFather()) / sizeWord;
2287 }
2288 }
2289 }
2290}
2291
2292/**************************************************************************************************/
2293
2295 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2296 const vector<int>& ids,
2297 std::shared_ptr<SubstitutionModelSet> modelSet,
2298 std::shared_ptr<SubstitutionModelSet> nullModelSet,
2299 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2300 VVVdouble& result,
2301 bool perTime,
2302 bool perWord)
2303{
2304 auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2306
2307 size_t nbSites = smap->getNumberOfSites();
2308 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2309 size_t nbStates = nullModelSet->alphabet().getSize();
2310 size_t nbModels = nullModelSet->getNumberOfModels();
2311 size_t nbBr = ids.size();
2312
2313 VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2314
2315 // compute the normalization for each site for each substitutionType
2316 map<int, vector<vector<double>>> rewards;
2317
2318 for (auto id : ids)
2319 {
2320 VectorTools::resize2(rewards[id], nbSites, nbTypes);
2321 }
2322
2323 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2324 for (size_t i = 0; i < nbTypes; ++i)
2325 {
2326 usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2327 }
2328
2329 for (size_t nbm = 0; nbm < nbModels; nbm++)
2330 {
2331 vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2332
2333 if (mids.size() > 0)
2334 {
2335 auto modn = nullModelSet->getSubstitutionModel(nbm);
2336 vector<int> supportedStates = modn->getAlphabetStates();
2337
2338 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2339 {
2340 for (size_t i = 0; i < nbStates; i++)
2341 {
2342 usai[nbt]->setIndex(supportedStates[i], 0);
2343 }
2344 }
2345
2346 for (size_t i = 0; i < nbStates; i++)
2347 {
2348 for (size_t j = 0; j < nbStates; j++)
2349 {
2350 if (i != j)
2351 {
2352 size_t nbt = reg->getType(i, j);
2353 if (nbt != 0)
2354 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2355 }
2356 }
2357 }
2358
2359 for (size_t nbt = 0; nbt < nbTypes; nbt++)
2360 {
2361 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2362 auto mapping = LegacyRewardMappingTools::computeRewardVectors(drtl, mids, reward, false);
2363
2364 for (size_t i = 0; i < nbSites; ++i)
2365 {
2366 for (size_t k = 0; k < mids.size(); k++)
2367 {
2368 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2369
2370 if (std::isnan(tmp))
2371 tmp = 0;
2372
2373 rewards[mids[k]][i][nbt] = tmp;
2374 }
2375 }
2376 }
2377 }
2378 }
2379
2380 // check if perWord
2381 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2382 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2383
2384
2385 // output
2386 const TreeTemplate<Node>& tree = drtl->tree();
2387
2388 for (size_t i = 0; i < nbTypes; ++i)
2389 {
2390 for (size_t j = 0; j < nbSites; ++j)
2391 {
2392 VVdouble& resS = result[j];
2393
2394 for (size_t k = 0; k < nbBr; ++k)
2395 {
2396 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.getNode(ids[k])->getDistanceToFather()) / sizeWord;
2397 }
2398 }
2399 }
2400}
2401
2402
2403/**************************************************************************************************/
2404
2406 const string& filename,
2407 const vector<int>& ids,
2408 const VVdouble& counts)
2409{
2410 size_t nbSites = counts.size();
2411 if (nbSites == 0)
2412 return;
2413 size_t nbBr = counts[0].size();
2414
2415 ofstream file;
2416 file.open(filename.c_str());
2417
2418 file << "sites";
2419 for (size_t i = 0; i < nbBr; ++i)
2420 {
2421 file << "\t" << ids[i];
2422 }
2423 file << endl;
2424
2425 for (size_t k = 0; k < nbSites; ++k)
2426 {
2427 const Vdouble& countS = counts[k];
2428 file << k;
2429 for (size_t i = 0; i < nbBr; ++i)
2430 {
2431 file << "\t" << countS[i];
2432 }
2433 file << endl;
2434 }
2435 file.close();
2436}
2437
2438
2439/**************************************************************************************************/
2440
2442 const string& filename,
2444 const VVdouble& counts)
2445{
2446 size_t nbSites = counts.size();
2447 if (nbSites == 0)
2448 return;
2449 size_t nbTypes = counts[0].size();
2450
2451 ofstream file;
2452 file.open(filename.c_str());
2453
2454 file << "sites";
2455 for (size_t i = 0; i < nbTypes; ++i)
2456 {
2457 file << "\t" << reg.getTypeName(i + 1);
2458 }
2459 file << endl;
2460
2461 for (size_t k = 0; k < nbSites; ++k)
2462 {
2463 file << k;
2464 const Vdouble& resS = counts[k];
2465 for (size_t i = 0; i < nbTypes; ++i)
2466 {
2467 file << "\t" << resS[i];
2468 }
2469 file << endl;
2470 }
2471 file.close();
2472}
2473
2474
2475/**************************************************************************************************/
2476
2478 const string& filenamePrefix,
2479 const vector<int>& ids,
2481 const VVVdouble& counts)
2482{
2483 size_t nbSites = counts.size();
2484 if (nbSites == 0)
2485 return;
2486 size_t nbBr = counts[0].size();
2487 if (nbBr == 0)
2488 return;
2489 size_t nbTypes = counts[0][0].size();
2490
2491 ofstream file;
2492
2493 for (size_t i = 0; i < nbTypes; ++i)
2494 {
2495 string name = reg.getTypeName(i + 1);
2496 if (name == "")
2497 name = TextTools::toString(i + 1);
2498
2499 string path = filenamePrefix + name + string(".count");
2500
2501 ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
2502 file.open(path.c_str());
2503
2504 file << "sites";
2505 for (size_t k = 0; k < nbBr; ++k)
2506 {
2507 file << "\t" << ids[k];
2508 }
2509 file << endl;
2510
2511 for (size_t j = 0; j < nbSites; ++j)
2512 {
2513 const VVdouble& resS = counts[j];
2514
2515 file << j;
2516 for (size_t k = 0; k < nbBr; ++k)
2517 {
2518 file << "\t" << resS[k][i];
2519 }
2520 file << endl;
2521 }
2522 file.close();
2523 }
2524}
int getCoordinate() const override
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayWarning(const std::string &text)
static void displayResult(const std::string &text, const T &result)
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static VVVdouble getPosteriorProbabilitiesPerStatePerRate(const DRTreeLikelihoodInterface &drl, int nodeId)
Compute the posterior probabilities for each state and each rate of each distinct site.
static Vdouble getPosteriorStateFrequencies(const DRTreeLikelihoodInterface &drl, int nodeId)
Compute the posterior probabilities for each state for a given node.
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
size_t getNumberOfSites() const override
Definition: Mapping.h:158
void setSitePosition(size_t index, int position) override
Set the position of a given site.
Definition: Mapping.h:152
virtual size_t getNodeIndex(int nodeId) const override
Definition: Mapping.h:184
size_t getNumberOfBranches() const override
Definition: Mapping.h:160
virtual const Node * getNode(size_t nodeIndex) const
Definition: Mapping.h:162
virtual size_t getNumberOfBranches() const =0
virtual size_t getNumberOfSites() const =0
Likelihood ancestral states reconstruction: marginal method.
std::map< int, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
Legacy data storage class for probabilistic substitution mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
static std::unique_ptr< LegacyProbabilisticRewardMapping > computeRewardVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, const std::vector< int > &nodeIds, std::shared_ptr< Reward > reward, bool verbose=true)
Compute the reward vectors for a particular dataset using the double-recursive likelihood computation...
Legacy interface for storing mapping data.
virtual size_t getNumberOfSubstitutionTypes() const =0
static void outputPerSitePerBranch(const std::string &filename, const std::vector< int > &ids, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const VVdouble &counts)
Output Per Site Per Type.
static std::vector< double > computeTotalSubstitutionVectorForSitePerBranch(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index).
static std::vector< double > computeSumForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
static void readFromStream(std::istream &in, LegacyProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
static void computeCountsPerSitePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &array)
Per Branch Per Site methods.
static std::vector< std::vector< double > > getNormalizationsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< const SubstitutionModelInterface > nullModel, const SubstitutionRegisterInterface &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
static void computeCountsPerSitePerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result)
Per Type Per Site methods.
static std::vector< double > computeSumForBranch(const LegacySubstitutionMappingInterface &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveragingMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< int > &ids, const SubstitutionRegisterInterface &reg, const VVVdouble &counts)
Output Per Site Per Branch Per Type.
static void writeToStream(const LegacyProbabilisticSubstitutionMapping &substitutions, const SiteContainerInterface &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
static std::vector< double > computeTotalSubstitutionVectorForSitePerType(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each type of a given position (specified by its index).
static std::vector< std::vector< double > > getCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, double threshold=-1, bool verbose=true)
Per Branch methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveraging(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void computeCountsPerSitePerBranchPerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVVdouble &result)
Per Branch Per Site Per Type methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static double computeNormForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
static void computeCountsPerTypePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result, double threshold=-1, bool verbose=true)
Per Type Per Branch methods.
static std::vector< size_t > whichMax(const Matrix &m)
static void fill(Matrix &M, Scalar x)
The phylogenetic node class.
Definition: Node.h:59
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:250
virtual size_t getNumberOfSons() const
Definition: Node.h:355
Substitution models manager for non-homogeneous / non-reversible models of evolution.
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModelForNode(int nodeId) const
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModel(size_t i) const
Return a markovian substitution model (or null)
The SubstitutionRegister interface.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
virtual size_t getNumberOfSubstitutionTypes() const =0
virtual size_t getType(size_t fromState, size_t toState) const =0
Get the substitution type far a given pair of model states.
virtual const Site & site(size_t sitePosition) const override=0
The phylogenetic tree class.
Definition: TreeTemplate.h:59
int getRootId() const
Definition: TreeTemplate.h:127
virtual std::vector< const N * > getNodes() const
Definition: TreeTemplate.h:421
virtual N * getNode(int id, bool checkId=false)
Definition: TreeTemplate.h:429
static T sum(const std::vector< T > &v1)
static void resize3(VVVdouble &vvv, size_t n1, size_t n2, size_t n3)
static size_t which(const std::vector< T > &v, const T &which)
static bool contains(const std::vector< T > &vec, T el)
static void resize2(VVdouble &vv, size_t n1, size_t n2)
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVVdouble > VVVVdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble