6 #include "../../Mapping/UniformizationSubstitutionCount.h"
7 #include "../../Mapping/DecompositionReward.h"
10 #include "../Likelihood/DRTreeLikelihoodTools.h"
11 #include "../Likelihood/MarginalAncestralStateReconstruction.h"
29 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
30 const vector<int>& nodeIds,
31 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
35 if (!drtl->isInitialized())
36 throw Exception(
"SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
41 auto& sequences = drtl->data();
42 auto& rDist = drtl->rateDistribution();
44 size_t nbSites = sequences.getNumberOfSites();
45 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
46 size_t nbStates = sequences.alphabet().getSize();
47 size_t nbClasses = rDist.getNumberOfCategories();
48 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
49 vector<const Node*> nodes = tree.
getNodes();
50 const auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
52 size_t nbNodes = nodes.size();
55 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
59 drtl->computeLikelihoodAtNode(tree.
getRootId(), lik);
61 Vdouble rcProbs = rDist.getProbabilities();
62 Vdouble rcRates = rDist.getCategories();
63 for (
size_t i = 0; i < nbDistinctSites; i++)
66 for (
size_t c = 0; c < nbClasses; c++)
68 Vdouble* lik_i_c = &(*lik_i)[c];
69 double rc = rDist.getProbability(c);
70 for (
size_t s = 0; s < nbStates; s++)
72 Lr[i] += (*lik_i_c)[s] * rc;
81 for (
size_t l = 0; l < nbNodes; ++l)
84 const Node* currentNode = nodes[l];
94 VVdouble substitutionsForCurrentNode(nbDistinctSites);
95 for (
size_t i = 0; i < nbDistinctSites; ++i)
97 substitutionsForCurrentNode[i].resize(nbTypes);
101 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
102 for (
size_t i = 0; i < nbDistinctSites; i++)
104 VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
105 likelihoodsFatherConstantPart_i->resize(nbClasses);
106 for (
size_t c = 0; c < nbClasses; c++)
108 Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
109 likelihoodsFatherConstantPart_i_c->resize(nbStates);
110 double rc = rDist.getProbability(c);
111 for (
size_t s = 0; s < nbStates; s++)
115 (*likelihoodsFatherConstantPart_i_c)[s] = rc;
122 for (
size_t n = 0; n < nbSons; n++)
125 if (currentSon->
getId() != currentNode->
getId())
127 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
130 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->
getId()));
133 while (mit->hasNext())
135 auto bmd = mit->next();
136 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
138 while (sit->hasNext())
140 size_t i = sit->next();
144 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->
getId(), i);
147 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
148 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
149 for (
size_t c = 0; c < nbClasses; c++)
151 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
152 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
154 for (
size_t x = 0; x < nbStates; x++)
157 double likelihood = 0.;
158 for (
size_t y = 0; y < nbStates; y++)
160 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
162 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
172 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
174 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->
getId()));
177 while (mit->hasNext())
179 auto bmd = mit->next();
180 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
182 while (sit->hasNext())
184 size_t i = sit->next();
188 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->
getId(), i);
191 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
192 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
193 for (
size_t c = 0; c < nbClasses; c++)
195 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
196 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
198 for (
size_t x = 0; x < nbStates; x++)
200 double likelihood = 0.;
201 for (
size_t y = 0; y < nbStates; y++)
204 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
206 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
215 for (
size_t i = 0; i < nbDistinctSites; i++)
217 vector<double> freqs = drtl->getRootFrequencies(i);
218 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
219 for (
size_t c = 0; c < nbClasses; c++)
221 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
222 for (
size_t x = 0; x < nbStates; x++)
224 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
236 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentNode->
getId());
237 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->
getId()));
240 while (mit->hasNext())
242 auto bmd = mit->next();
243 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
246 for (
size_t c = 0; c < nbClasses; ++c)
249 double rc = rcRates[c];
250 nxy_c.resize(nbTypes);
251 for (
size_t t = 0; t < nbTypes; ++t)
254 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
256 nxy_c_t.resize(nbStates);
257 for (
size_t x = 0; x < nbStates; ++x)
259 Vdouble& nxy_c_t_x = nxy_c_t[x];
260 nxy_c_t_x.resize(nbStates);
261 for (
size_t y = 0; y < nbStates; ++y)
263 nxy_c_t_x[y] = (*nijt)(x, y);
270 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
272 while (sit->hasNext())
274 size_t i = sit->next();
278 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->
getId(), i);
281 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
282 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
283 for (
size_t c = 0; c < nbClasses; ++c)
285 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
286 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
289 for (
size_t x = 0; x < nbStates; ++x)
291 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
292 const Vdouble& pxy_c_x = pxy_c[x];
293 for (
size_t y = 0; y < nbStates; ++y)
295 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
297 * likelihoodsFather_node_i_c[y];
299 for (
size_t t = 0; t < nbTypes; ++t)
302 substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
317 for (
size_t i = 0; i < nbSites; ++i)
319 for (
size_t t = 0; t < nbTypes; ++t)
321 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
332 return substitutions;
338 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
340 const vector<int>& nodeIds,
341 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
345 if (!drtl->isInitialized())
346 throw Exception(
"SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
351 auto& sequences = drtl->data();
352 auto& rDist = drtl->rateDistribution();
354 size_t nbSites = sequences.getNumberOfSites();
355 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
356 size_t nbStates = sequences.alphabet().getSize();
357 size_t nbClasses = rDist.getNumberOfCategories();
358 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
359 vector<const Node*> nodes = tree.
getNodes();
360 auto& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
362 size_t nbNodes = nodes.size();
365 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
369 drtl->computeLikelihoodAtNode(tree.
getRootId(), lik);
370 Vdouble Lr(nbDistinctSites, 0);
371 Vdouble rcProbs = rDist.getProbabilities();
372 Vdouble rcRates = rDist.getCategories();
373 for (
size_t i = 0; i < nbDistinctSites; i++)
376 for (
size_t c = 0; c < nbClasses; c++)
378 Vdouble* lik_i_c = &(*lik_i)[c];
379 double rc = rDist.getProbability(c);
380 for (
size_t s = 0; s < nbStates; s++)
382 Lr[i] += (*lik_i_c)[s] * rc;
391 for (
size_t l = 0; l < nbNodes; ++l)
394 const Node* currentNode = nodes[l];
404 VVdouble substitutionsForCurrentNode(nbDistinctSites);
405 for (
size_t i = 0; i < nbDistinctSites; ++i)
407 substitutionsForCurrentNode[i].resize(nbTypes);
411 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
412 for (
size_t i = 0; i < nbDistinctSites; i++)
414 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
415 likelihoodsFatherConstantPart_i.resize(nbClasses);
416 for (
size_t c = 0; c < nbClasses; c++)
418 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
419 likelihoodsFatherConstantPart_i_c.resize(nbStates);
420 double rc = rDist.getProbability(c);
421 for (
size_t s = 0; s < nbStates; s++)
425 likelihoodsFatherConstantPart_i_c[s] = rc;
432 for (
size_t n = 0; n < nbSons; n++)
435 if (currentSon->
getId() != currentNode->
getId())
437 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
440 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->
getId()));
443 while (mit->hasNext())
445 auto bmd = mit->next();
446 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
448 while (sit->hasNext())
450 size_t i = sit->next();
454 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->
getId(), i);
457 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
458 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
459 for (
size_t c = 0; c < nbClasses; c++)
461 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
462 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
464 for (
size_t x = 0; x < nbStates; x++)
467 double likelihood = 0.;
468 for (
size_t y = 0; y < nbStates; y++)
470 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
472 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
482 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
484 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->
getId()));
487 while (mit->hasNext())
489 auto bmd = mit->next();
490 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
492 while (sit->hasNext())
494 size_t i = sit->next();
498 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->
getId(), i);
501 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
502 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
503 for (
size_t c = 0; c < nbClasses; c++)
505 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
506 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
508 for (
size_t x = 0; x < nbStates; x++)
510 double likelihood = 0.;
511 for (
size_t y = 0; y < nbStates; y++)
514 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
516 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
525 for (
size_t i = 0; i < nbDistinctSites; i++)
527 vector<double> freqs = drtl->getRootFrequencies(i);
528 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
529 for (
size_t c = 0; c < nbClasses; c++)
531 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
532 for (
size_t x = 0; x < nbStates; x++)
534 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
546 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentNode->
getId());
547 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->
getId()));
550 while (mit->hasNext())
552 auto bmd = mit->next();
557 for (
size_t c = 0; c < nbClasses; ++c)
560 double rc = rcRates[c];
561 nxy_c.resize(nbTypes);
562 for (
size_t t = 0; t < nbTypes; ++t)
565 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
566 nxy_c_t.resize(nbStates);
567 for (
size_t x = 0; x < nbStates; ++x)
569 Vdouble& nxy_c_t_x = nxy_c_t[x];
570 nxy_c_t_x.resize(nbStates);
571 for (
size_t y = 0; y < nbStates; ++y)
573 nxy_c_t_x[y] = (*nijt)(x, y);
580 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
582 while (sit->hasNext())
584 size_t i = sit->next();
588 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->
getId(), i);
591 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
592 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
593 for (
size_t c = 0; c < nbClasses; ++c)
595 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
596 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
599 for (
size_t x = 0; x < nbStates; ++x)
601 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
602 const Vdouble& pxy_c_x = pxy_c[x];
603 for (
size_t y = 0; y < nbStates; ++y)
605 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
607 * likelihoodsFather_node_i_c[y];
609 for (
size_t t = 0; t < nbTypes; ++t)
612 substitutionsForCurrentNode[i][t] += likelihood_cxy * nxy_c[t][x][y];
627 for (
size_t i = 0; i < nbSites; ++i)
629 for (
size_t t = 0; t < nbTypes; ++t)
631 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t] / Lr[rootPatternLinks[i]];
642 return substitutions;
648 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
649 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
653 if (!drtl->isInitialized())
654 throw Exception(
"SubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(). Likelihood object is not initialized.");
658 auto& sequences = drtl->data();
659 auto& rDist = drtl->rateDistribution();
661 size_t nbSites = sequences.getNumberOfSites();
662 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
663 size_t nbStates = sequences.alphabet().getSize();
664 size_t nbClasses = rDist.getNumberOfCategories();
665 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
666 vector<const Node*> nodes = tree.
getNodes();
667 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
669 size_t nbNodes = nodes.size();
672 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
674 Vdouble rcRates = rDist.getCategories();
680 for (
size_t l = 0; l < nbNodes; ++l)
683 const Node* currentNode = nodes[l];
691 VVdouble substitutionsForCurrentNode(nbDistinctSites);
692 for (
size_t i = 0; i < nbDistinctSites; ++i)
694 substitutionsForCurrentNode[i].resize(nbTypes);
698 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
699 for (
size_t i = 0; i < nbDistinctSites; ++i)
701 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
702 likelihoodsFatherConstantPart_i.resize(nbClasses);
703 for (
size_t c = 0; c < nbClasses; ++c)
705 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
706 likelihoodsFatherConstantPart_i_c.resize(nbStates);
707 double rc = rDist.getProbability(c);
708 for (
size_t s = 0; s < nbStates; ++s)
710 likelihoodsFatherConstantPart_i_c[s] = rc;
717 for (
size_t n = 0; n < nbSons; ++n)
720 if (currentSon->
getId() != currentNode->
getId())
722 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
725 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->
getId()));
728 while (mit->hasNext())
730 auto bmd = mit->next();
731 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
733 while (sit->hasNext())
735 size_t i = sit->next();
739 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->
getId(), i);
742 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
743 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
744 for (
size_t c = 0; c < nbClasses; ++c)
746 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
747 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
749 for (
size_t x = 0; x < nbStates; ++x)
752 double likelihood = 0.;
753 for (
size_t y = 0; y < nbStates; ++y)
755 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
757 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
767 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentSon->
getId());
769 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->
getId()));
772 while (mit->hasNext())
774 auto bmd = mit->next();
775 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
777 while (sit->hasNext())
779 size_t i = sit->next();
783 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->
getId(), i);
786 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
787 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
788 for (
size_t c = 0; c < nbClasses; ++c)
790 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
791 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
793 for (
size_t x = 0; x < nbStates; ++x)
795 double likelihood = 0.;
796 for (
size_t y = 0; y < nbStates; ++y)
799 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
801 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
810 for (
size_t i = 0; i < nbDistinctSites; ++i)
812 vector<double> freqs = drtl->getRootFrequencies(i);
813 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
814 for (
size_t c = 0; c < nbClasses; ++c)
816 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
817 for (
size_t x = 0; x < nbStates; ++x)
819 likelihoodsFatherConstantPart_i_c[x] *= freqs[x];
830 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->
getId(), currentNode->
getId());
831 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->
getId()));
834 while (mit->hasNext())
836 auto bmd = mit->next();
837 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
840 for (
size_t c = 0; c < nbClasses; ++c)
842 double rc = rcRates[c];
844 nxy_c.resize(nbTypes);
845 for (
size_t t = 0; t < nbTypes; ++t)
848 nxy_c_t.resize(nbStates);
849 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
850 for (
size_t x = 0; x < nbStates; ++x)
852 Vdouble& nxy_c_t_x = nxy_c_t[x];
853 nxy_c_t_x.resize(nbStates);
854 for (
size_t y = 0; y < nbStates; ++y)
856 nxy_c_t_x[y] = (*nijt)(x, y);
863 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
865 while (sit->hasNext())
867 size_t i = sit->next();
871 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->
getId(), i);
874 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
875 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
879 for (
size_t j = 0; j < nbStates; ++j)
881 subsCounts[j].resize(nbStates);
882 for (
size_t k = 0; k < nbStates; ++k)
884 subsCounts[j][k].resize(nbTypes);
887 for (
size_t c = 0; c < nbClasses; ++c)
889 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
890 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
893 for (
size_t x = 0; x < nbStates; ++x)
895 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
896 const Vdouble& pxy_c_x = pxy_c[x];
897 for (
size_t y = 0; y < nbStates; ++y)
899 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
901 * likelihoodsFather_node_i_c[y];
902 pairProbabilities(x, y) += likelihood_cxy;
903 for (
size_t t = 0; t < nbTypes; ++t)
905 subsCounts[x][y][t] += likelihood_cxy * nxy_c[t][x][y];
914 for (
size_t t = 0; t < nbTypes; ++t)
916 substitutionsForCurrentNode[i][t] += subsCounts[xy[0]][xy[1]][t] / pairProbabilities(xy[0], xy[1]);
921 for (
size_t i = 0; i < nbSites; i++)
923 for (
size_t t = 0; t < nbTypes; t++)
925 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
935 return substitutions;
941 shared_ptr<const DRTreeLikelihoodInterface> drtl,
942 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
946 if (!drtl->isInitialized())
947 throw Exception(
"SubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(). Likelihood object is not initialized.");
952 auto& sequences = drtl->data();
953 auto& rDist = drtl->rateDistribution();
954 auto alpha = sequences.getAlphabet();
956 size_t nbSites = sequences.getNumberOfSites();
957 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
958 size_t nbStates = alpha->getSize();
959 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
960 vector<const Node*> nodes = tree.
getNodes();
961 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
963 size_t nbNodes = nodes.size();
966 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
970 Vdouble rcRates = rDist.getCategories();
985 for (
size_t l = 0; l < nbNodes; l++)
987 const Node* currentNode = nodes[l];
993 vector<size_t> nodeStates = ancestors[currentNode->
getId()];
994 vector<size_t> fatherStates = ancestors[father->
getId()];
999 VVdouble substitutionsForCurrentNode(nbDistinctSites);
1000 for (
size_t i = 0; i < nbDistinctSites; ++i)
1002 substitutionsForCurrentNode[i].resize(nbTypes);
1010 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->
getId()));
1011 while (mit->hasNext())
1013 auto bmd = mit->next();
1014 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1017 for (
size_t t = 0; t < nbTypes; ++t)
1019 nxyt[t].resize(nbStates);
1020 auto nxy = substitutionCount->getAllNumbersOfSubstitutions(d, t + 1);
1021 for (
size_t x = 0; x < nbStates; ++x)
1023 nxyt[t][x].resize(nbStates);
1024 for (
size_t y = 0; y < nbStates; ++y)
1026 nxyt[t][x][y] = (*nxy)(x, y);
1031 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1032 while (sit->hasNext())
1034 size_t i = sit->next();
1035 size_t fatherState = fatherStates[i];
1036 size_t nodeState = nodeStates[i];
1037 if (fatherState >= nbStates || nodeState >= nbStates)
1038 for (
size_t t = 0; t < nbTypes; ++t)
1040 substitutionsForCurrentNode[i][t] = 0;
1043 for (
size_t t = 0; t < nbTypes; ++t)
1045 substitutionsForCurrentNode[i][t] = nxyt[t][fatherState][nodeState];
1051 for (
size_t i = 0; i < nbSites; i++)
1053 for (
size_t t = 0; t < nbTypes; t++)
1055 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1065 return substitutions;
1071 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
1072 std::shared_ptr<SubstitutionCountInterface> substitutionCount,
1076 if (!drtl->isInitialized())
1077 throw Exception(
"SubstitutionMappingTools::computeSubstitutionVectorsMarginal(). Likelihood object is not initialized.");
1082 auto& sequences = drtl->data();
1083 auto& rDist = drtl->rateDistribution();
1085 size_t nbSites = sequences.getNumberOfSites();
1086 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
1087 size_t nbStates = sequences.alphabet().getSize();
1088 size_t nbClasses = rDist.getNumberOfCategories();
1089 size_t nbTypes = substitutionCount->getNumberOfSubstitutionTypes();
1090 vector<const Node*> nodes = tree.
getNodes();
1091 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
1093 size_t nbNodes = nodes.size();
1096 auto substitutions = make_unique<LegacyProbabilisticSubstitutionMapping>(tree, substitutionCount, nbSites);
1100 Vdouble rcProbs = rDist.getProbabilities();
1101 Vdouble rcRates = rDist.getCategories();
1107 for (
size_t l = 0; l < nbNodes; l++)
1109 const Node* currentNode = nodes[l];
1118 VVdouble substitutionsForCurrentNode(nbDistinctSites);
1119 for (
size_t i = 0; i < nbDistinctSites; ++i)
1121 substitutionsForCurrentNode[i].resize(nbTypes);
1130 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->
getId()));
1131 while (mit->hasNext())
1133 auto bmd = mit->next();
1134 substitutionCount->setSubstitutionModel(bmd->getSubstitutionModel());
1137 for (
size_t c = 0; c < nbClasses; ++c)
1140 double rc = rcRates[c];
1141 nxy_c.resize(nbTypes);
1142 for (
size_t t = 0; t < nbTypes; ++t)
1145 auto nijt = substitutionCount->getAllNumbersOfSubstitutions(d * rc, t + 1);
1146 nxy_c_t.resize(nbStates);
1147 for (
size_t x = 0; x < nbStates; ++x)
1149 Vdouble& nxy_c_t_x = nxy_c_t[x];
1150 nxy_c_t_x.resize(nbStates);
1151 for (
size_t y = 0; y < nbStates; ++y)
1153 nxy_c_t_x[y] = (*nijt)(x, y);
1160 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
1161 while (sit->hasNext())
1163 size_t i = sit->next();
1164 VVdouble& probsNode_i = probsNode[i];
1165 VVdouble& probsFather_i = probsFather[i];
1166 for (
size_t c = 0; c < nbClasses; ++c)
1168 Vdouble& probsNode_i_c = probsNode_i[c];
1169 Vdouble& probsFather_i_c = probsFather_i[c];
1171 for (
size_t x = 0; x < nbStates; ++x)
1173 for (
size_t y = 0; y < nbStates; ++y)
1175 double prob_cxy = probsFather_i_c[x] * probsNode_i_c[y];
1177 for (
size_t t = 0; t < nbTypes; ++t)
1179 substitutionsForCurrentNode[i][t] += prob_cxy * nxy_c[t][x][y];
1194 for (
size_t i = 0; i < nbSites; ++i)
1196 for (
size_t t = 0; t < nbTypes; ++t)
1198 (*substitutions)(l, i, t) = substitutionsForCurrentNode[rootPatternLinks[i]][t];
1208 return substitutions;
1220 throw IOException(
"LegacySubstitutionMappingTools::writeToFile. Can't write to stream.");
1234 out <<
"\t" << substitutions(j, i, type);
1250 vector<string> ids = data->getColumn(0);
1251 data->deleteColumn(0);
1252 data->deleteColumn(0);
1254 size_t nbSites = data->getNumberOfColumns();
1256 size_t nbBranches = data->getNumberOfRows();
1257 for (
size_t i = 0; i < nbBranches; i++)
1261 for (
size_t j = 0; j < nbSites; j++)
1267 for (
size_t i = 0; i < nbSites; i++)
1269 string siteTxt = data->getColumnName(i);
1271 if (siteTxt.substr(0, 4) ==
"Site")
1272 site = TextTools::to<int>(siteTxt.substr(4));
1274 site = TextTools::to<int>(siteTxt);
1293 for (
size_t l = 0; l < nbBranches; ++l)
1296 for (
size_t t = 0; t < nbTypes; ++t)
1298 v[l] += smap(l, siteIndex, t);
1313 for (
size_t t = 0; t < nbTypes; ++t)
1316 for (
size_t l = 0; l < nbBranches; ++l)
1318 v[t] += smap(l, siteIndex, t);
1330 double sumSquare = 0;
1336 sum += smap(l, siteIndex, t);
1338 sumSquare += sum * sum;
1340 return sqrt(sumSquare);
1352 for (
size_t i = 0; i < nbSites; ++i)
1354 for (
size_t t = 0; t < nbTypes; ++t)
1356 v[t] += smap(branchIndex, i, t);
1371 for (
size_t i = 0; i < nbBranches; ++i)
1373 for (
size_t t = 0; t < nbTypes; ++t)
1375 v[t] += smap(i, siteIndex, t);
1384 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1385 const vector<int>& ids,
1386 std::shared_ptr<SubstitutionModelInterface> model,
1387 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1391 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1394 vector< vector<double>> counts(ids.size());
1395 size_t nbSites = mapping->getNumberOfSites();
1396 size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1398 for (
size_t k = 0; k < ids.size(); ++k)
1400 vector<double> countsf(nbTypes, 0);
1401 vector<double> tmp(nbTypes, 0);
1402 size_t nbIgnored = 0;
1404 for (
size_t i = 0; !error && i < nbSites; ++i)
1407 for (
size_t t = 0; t < nbTypes; ++t)
1409 tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1410 error = std::isnan(tmp[t]);
1436 for (
size_t t = 0; t < nbTypes; ++t)
1450 counts[k].resize(countsf.size());
1451 for (
size_t j = 0; j < countsf.size(); ++j)
1453 counts[k][j] = countsf[j];
1463 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1464 const vector<int>& ids,
1466 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1474 vector< vector<double>> counts(ids.size());
1475 size_t nbSites = mapping->getNumberOfSites();
1476 size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1478 for (
size_t k = 0; k < ids.size(); ++k)
1480 vector<double> countsf(nbTypes, 0);
1481 vector<double> tmp(nbTypes, 0);
1482 size_t nbIgnored = 0;
1484 for (
size_t i = 0; !error && i < nbSites; ++i)
1487 for (
size_t t = 0; t < nbTypes; ++t)
1489 tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1490 error = std::isnan(tmp[t]);
1516 for (
size_t t = 0; t < nbTypes; ++t)
1530 counts[k].resize(countsf.size());
1531 for (
size_t j = 0; j < countsf.size(); ++j)
1533 counts[k][j] = countsf[j];
1543 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1544 const vector<int>& ids,
1545 std::shared_ptr<const SubstitutionModelInterface> nullModel,
1550 size_t nbStates = nullModel->alphabet().getSize();
1551 size_t nbSites = drtl->getNumberOfSites();
1552 vector<int> supportedStates = nullModel->getAlphabetStates();
1555 vector<shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1557 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1559 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1560 for (
size_t i = 0; i < nbStates; i++)
1562 usai[nbt]->setIndex(supportedStates[i], 0);
1566 for (
size_t i = 0; i < nbStates; i++)
1568 for (
size_t j = 0; j < nbStates; j++)
1572 size_t nbt = reg.
getType(i, j);
1574 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1580 vector<vector<double>> rewards(ids.size());
1582 for (
size_t k = 0; k < ids.size(); ++k)
1584 rewards[k].resize(nbTypes);
1587 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1589 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1592 for (
size_t k = 0; k < ids.size(); ++k)
1595 for (
size_t i = 0; i < nbSites; ++i)
1597 double tmp = (*mapping)(k, i);
1598 if (std::isnan(tmp))
1607 rewards[k][nbt] = s;
1616 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1617 const vector<int>& ids,
1618 std::shared_ptr<const SubstitutionModelSet> nullModelSet,
1623 size_t nbStates = nullModelSet->alphabet().getSize();
1624 size_t nbSites = drtl->getNumberOfSites();
1625 size_t nbModels = nullModelSet->getNumberOfModels();
1629 vector<vector<double>> rewards(ids.size());
1631 for (
size_t k = 0; k < ids.size(); ++k)
1633 rewards[k].resize(nbTypes);
1636 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1637 for (
size_t i = 0; i < nbTypes; ++i)
1639 usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
1642 for (
size_t nbm = 0; nbm < nbModels; nbm++)
1646 if (mids.size() > 0)
1648 auto modn = nullModelSet->getSubstitutionModel(nbm);
1649 vector<int> supportedStates = modn->getAlphabetStates();
1651 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1653 for (
size_t i = 0; i < nbStates; i++)
1655 usai[nbt]->setIndex(supportedStates[i], 0);
1659 for (
size_t i = 0; i < nbStates; i++)
1661 for (
size_t j = 0; j < nbStates; j++)
1665 size_t nbt = reg.
getType(i, j);
1667 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
1672 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1674 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
1677 for (
size_t k = 0; k < mids.size(); k++)
1680 for (
size_t i = 0; i < nbSites; ++i)
1682 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
1683 if (std::isnan(tmp))
1706 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1707 const vector<int>& ids,
1708 std::shared_ptr<SubstitutionModelInterface> model,
1709 std::shared_ptr<SubstitutionModelInterface> nullModel,
1710 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1716 vector<vector<double>> factors;
1718 result = getCountsPerBranch(drtl, ids, model, reg, -1, verbose);
1719 factors = getNormalizationsPerBranch(drtl, ids, nullModel, *reg, verbose);
1724 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
1726 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1729 size_t nbTypes = result[0].size();
1731 for (
size_t k = 0; k < ids.size(); ++k)
1733 for (
size_t t = 0; t < nbTypes; ++t)
1735 if (factors[k][t] != 0)
1736 result[k][t] /= (factors[k][t] * sizeWord);
1746 for (
size_t k = 0; k < ids.size(); ++k)
1748 double l = tree.
getNode(ids[k])->getDistanceToFather();
1749 for (
size_t t = 0; t < nbTypes; ++t)
1760 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1761 const vector<int>& ids,
1762 std::shared_ptr<SubstitutionModelSet> modelSet,
1763 std::shared_ptr<SubstitutionModelSet> nullModelSet,
1764 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1770 vector<vector<double>> factors;
1772 result = getCountsPerBranch(drtl, ids, *modelSet, reg, -1, verbose);
1773 factors = getNormalizationsPerBranch(drtl, ids, nullModelSet, *reg, verbose);
1775 size_t nbTypes = result[0].size();
1779 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
1781 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
1784 for (
size_t k = 0; k < ids.size(); ++k)
1786 for (
size_t t = 0; t < nbTypes; ++t)
1788 if (factors[k][t] != 0)
1789 result[k][t] /= (factors[k][t] * sizeWord);
1799 for (
size_t k = 0; k < ids.size(); ++k)
1801 double l = tree.
getNode(ids[k])->getDistanceToFather();
1802 for (
size_t t = 0; t < nbTypes; ++t)
1813 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1814 const vector<int>& ids,
1815 std::shared_ptr<SubstitutionModelInterface> model,
1816 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1821 result = getCountsPerBranch(drtl, ids, model, reg, threshold, verbose);
1823 auto creg = dynamic_pointer_cast<const CategorySubstitutionRegister>(reg);
1825 if (creg && !creg->isStationary())
1827 size_t nbTypes = result[0].size();
1829 for (
size_t k = 0; k < ids.size(); ++k)
1833 vector<double> freqsTypes(creg->getNumberOfCategories());
1834 for (
size_t i = 0; i < freqs.size(); ++i)
1836 size_t c = creg->getCategory(i);
1837 freqsTypes[c - 1] += freqs[i];
1842 for (
size_t t = 0; t < nbTypes; ++t)
1844 result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1849 result[k] = (result[k] / s2) * s;
1857 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1858 const vector<int>& ids,
1859 std::shared_ptr<SubstitutionModelInterface> model,
1860 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1863 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1866 size_t nbSites = smap->getNumberOfSites();
1867 size_t nbBr = ids.size();
1871 vector<size_t> sdi(nbBr);
1872 for (
size_t i = 0; i < nbBr; ++i)
1874 sdi[i] = smap->getNodeIndex(ids[i]);
1877 for (
size_t k = 0; k < nbSites; ++k)
1882 for (
size_t i = 0; i < nbBr; ++i)
1884 (*resS)[i] = countsf[sdi[i]];
1893 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1894 const vector<int>& ids,
1895 std::shared_ptr<SubstitutionModelInterface> model,
1896 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1899 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1902 size_t nbSites = smap->getNumberOfSites();
1903 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1907 vector<unique_ptr<LegacyProbabilisticRewardMapping>> rmap;
1909 for (
size_t k = 0; k < nbSites; ++k)
1915 for (
size_t i = 0; i < nbTypes; ++i)
1917 (*resS)[i] = countsf[i];
1926 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
1927 const vector<int>& ids,
1928 std::shared_ptr<SubstitutionModelInterface> model,
1929 std::shared_ptr<SubstitutionModelInterface> nullModel,
1930 std::shared_ptr<const SubstitutionRegisterInterface> reg,
1935 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
1938 size_t nbSites = smap->getNumberOfSites();
1939 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1940 size_t nbStates = nullModel->alphabet().getSize();
1941 vector<int> supportedStates = nullModel->getAlphabetStates();
1946 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
1948 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1950 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
1951 for (
size_t i = 0; i < nbStates; i++)
1953 usai[nbt]->setIndex(supportedStates[i], 0);
1957 for (
size_t i = 0; i < nbStates; i++)
1959 for (
size_t j = 0; j < nbStates; j++)
1963 size_t nbt = reg->getType(i, j);
1965 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1971 vector<vector<double>> rewards(nbSites);
1973 for (
size_t k = 0; k < nbSites; ++k)
1975 rewards[k].resize(nbTypes);
1978 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
1980 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
1983 for (
size_t i = 0; i < nbSites; ++i)
1986 for (
size_t k = 0; k < ids.size(); ++k)
1988 double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
1989 if (std::isnan(tmp))
1996 rewards[i][nbt] = s;
2007 for (
size_t k = 0; k < ids.size(); ++k)
2009 brlen += tree.
getNode(ids[k])->getDistanceToFather();
2017 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2019 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2023 for (
size_t k = 0; k < nbSites; ++k)
2028 for (
size_t i = 0; i < nbTypes; ++i)
2030 resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2039 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2040 const vector<int>& ids,
2041 std::shared_ptr<SubstitutionModelSet> modelSet,
2042 std::shared_ptr<SubstitutionModelSet> nullModelSet,
2043 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2048 auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2051 size_t nbSites = smap->getNumberOfSites();
2052 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2053 size_t nbStates = nullModelSet->alphabet().getSize();
2054 size_t nbModels = nullModelSet->getNumberOfModels();
2059 vector< vector<double>> rewards(nbSites);
2061 for (
size_t k = 0; k < nbSites; ++k)
2063 rewards[k].resize(nbTypes);
2066 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2068 for (
size_t nbm = 0; nbm < nbModels; nbm++)
2072 if (mids.size() > 0)
2074 const std::shared_ptr<SubstitutionModelInterface> modn = nullModelSet->getSubstitutionModel(nbm);
2075 vector<int> supportedStates = modn->getAlphabetStates();
2077 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2079 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2080 for (
size_t i = 0; i < nbStates; i++)
2082 usai[nbt]->setIndex(supportedStates[i], 0);
2086 for (
size_t i = 0; i < nbStates; i++)
2088 for (
size_t j = 0; j < nbStates; j++)
2092 size_t nbt = reg->getType(i, j);
2094 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2099 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2101 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2104 for (
size_t i = 0; i < nbSites; ++i)
2107 for (
size_t k = 0; k < mids.size(); k++)
2109 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2111 if (std::isnan(tmp))
2119 rewards[i][nbt] += s;
2133 for (
size_t k = 0; k < ids.size(); ++k)
2135 brlen += tree.
getNode(ids[k])->getDistanceToFather();
2143 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2144 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2148 for (
size_t k = 0; k < nbSites; ++k)
2154 for (
size_t i = 0; i < nbTypes; ++i)
2156 resS[i] = countsf[i] / rewards[k][i] * brlen / sizeWord;
2165 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2166 const vector<int>& ids,
2167 std::shared_ptr<SubstitutionModelInterface> model,
2168 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2171 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2174 size_t nbSites = smap->getNumberOfSites();
2175 size_t nbBr = ids.size();
2176 size_t nbTypes = reg->getNumberOfSubstitutionTypes();
2180 for (
size_t i = 0; i < nbTypes; ++i)
2182 for (
size_t j = 0; j < nbSites; ++j)
2186 for (
size_t k = 0; k < nbBr; ++k)
2188 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i);
2197 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2198 const vector<int>& ids,
2199 std::shared_ptr<SubstitutionModelInterface> model,
2200 std::shared_ptr<SubstitutionModelInterface> nullModel,
2201 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2206 auto count = make_shared<UniformizationSubstitutionCount>(model, reg);
2209 size_t nbSites = smap->getNumberOfSites();
2210 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2211 size_t nbStates = nullModel->alphabet().getSize();
2212 size_t nbBr = ids.size();
2217 map<int, vector< vector<double>>> rewards;
2224 vector<int> supportedStates = nullModel->getAlphabetStates();
2227 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2229 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2231 usai[nbt] = make_shared<UserAlphabetIndex1>(nullModel->getAlphabet());
2232 for (
size_t i = 0; i < nbStates; i++)
2234 usai[nbt]->setIndex(supportedStates[i], 0);
2238 for (
size_t i = 0; i < nbStates; i++)
2240 for (
size_t j = 0; j < nbStates; j++)
2244 size_t nbt = reg->getType(i, j);
2246 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + nullModel->Qij(i, j));
2251 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2253 auto reward = make_shared<DecompositionReward>(nullModel, usai[nbt]);
2256 for (
size_t i = 0; i < nbSites; ++i)
2258 for (
size_t k = 0; k < ids.size(); ++k)
2260 double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
2262 if (std::isnan(tmp))
2265 rewards[ids[k]][i][nbt] = tmp;
2271 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModel->getAlphabet());
2272 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2278 for (
size_t i = 0; i < nbTypes; ++i)
2280 for (
size_t j = 0; j < nbSites; ++j)
2284 for (
size_t k = 0; k < nbBr; ++k)
2286 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.
getNode(ids[k])->getDistanceToFather()) / sizeWord;
2295 std::shared_ptr<DRTreeLikelihoodInterface> drtl,
2296 const vector<int>& ids,
2297 std::shared_ptr<SubstitutionModelSet> modelSet,
2298 std::shared_ptr<SubstitutionModelSet> nullModelSet,
2299 std::shared_ptr<const SubstitutionRegisterInterface> reg,
2304 auto count = make_shared<UniformizationSubstitutionCount>(modelSet->getSubstitutionModel(0), reg);
2307 size_t nbSites = smap->getNumberOfSites();
2308 size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2309 size_t nbStates = nullModelSet->alphabet().getSize();
2310 size_t nbModels = nullModelSet->getNumberOfModels();
2311 size_t nbBr = ids.size();
2316 map<int, vector<vector<double>>> rewards;
2323 vector<std::shared_ptr<UserAlphabetIndex1>> usai(nbTypes);
2324 for (
size_t i = 0; i < nbTypes; ++i)
2326 usai[i] = make_shared<UserAlphabetIndex1>(nullModelSet->getAlphabet());
2329 for (
size_t nbm = 0; nbm < nbModels; nbm++)
2333 if (mids.size() > 0)
2335 auto modn = nullModelSet->getSubstitutionModel(nbm);
2336 vector<int> supportedStates = modn->getAlphabetStates();
2338 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2340 for (
size_t i = 0; i < nbStates; i++)
2342 usai[nbt]->setIndex(supportedStates[i], 0);
2346 for (
size_t i = 0; i < nbStates; i++)
2348 for (
size_t j = 0; j < nbStates; j++)
2352 size_t nbt = reg->getType(i, j);
2354 usai[nbt - 1]->setIndex(supportedStates[i], usai[nbt - 1]->getIndex(supportedStates[i]) + modn->Qij(i, j));
2359 for (
size_t nbt = 0; nbt < nbTypes; nbt++)
2361 auto reward = make_shared<DecompositionReward>(nullModelSet->getSubstitutionModel(nbm), usai[nbt]);
2364 for (
size_t i = 0; i < nbSites; ++i)
2366 for (
size_t k = 0; k < mids.size(); k++)
2368 double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2370 if (std::isnan(tmp))
2373 rewards[mids[k]][i][nbt] = tmp;
2381 auto wAlp = dynamic_pointer_cast<const CoreWordAlphabet>(nullModelSet->getAlphabet());
2382 float sizeWord = float((wAlp && !perWord) ? wAlp->getLength() : 1);
2388 for (
size_t i = 0; i < nbTypes; ++i)
2390 for (
size_t j = 0; j < nbSites; ++j)
2394 for (
size_t k = 0; k < nbBr; ++k)
2396 resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i) / rewards[ids[k]][j][i] * (perTime ? 1 : tree.
getNode(ids[k])->getDistanceToFather()) / sizeWord;
2406 const string& filename,
2407 const vector<int>& ids,
2410 size_t nbSites = counts.size();
2413 size_t nbBr = counts[0].size();
2416 file.open(filename.c_str());
2419 for (
size_t i = 0; i < nbBr; ++i)
2421 file <<
"\t" << ids[i];
2425 for (
size_t k = 0; k < nbSites; ++k)
2427 const Vdouble& countS = counts[k];
2429 for (
size_t i = 0; i < nbBr; ++i)
2431 file <<
"\t" << countS[i];
2442 const string& filename,
2446 size_t nbSites = counts.size();
2449 size_t nbTypes = counts[0].size();
2452 file.open(filename.c_str());
2455 for (
size_t i = 0; i < nbTypes; ++i)
2461 for (
size_t k = 0; k < nbSites; ++k)
2464 const Vdouble& resS = counts[k];
2465 for (
size_t i = 0; i < nbTypes; ++i)
2467 file <<
"\t" << resS[i];
2478 const string& filenamePrefix,
2479 const vector<int>& ids,
2483 size_t nbSites = counts.size();
2486 size_t nbBr = counts[0].size();
2489 size_t nbTypes = counts[0][0].size();
2493 for (
size_t i = 0; i < nbTypes; ++i)
2499 string path = filenamePrefix + name + string(
".count");
2502 file.open(path.c_str());
2505 for (
size_t k = 0; k < nbBr; ++k)
2507 file <<
"\t" << ids[k];
2511 for (
size_t j = 0; j < nbSites; ++j)
2516 for (
size_t k = 0; k < nbBr; ++k)
2518 file <<
"\t" << resS[k][i];
int getCoordinate() const override
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
size_t getNumberOfSites() const override
void setSitePosition(size_t index, int position) override
Set the position of a given site.
virtual size_t getNodeIndex(int nodeId) const override
size_t getNumberOfBranches() const override
virtual const Node * getNode(size_t nodeIndex) const
virtual size_t getNumberOfBranches() const =0
virtual size_t getNumberOfSites() const =0
Likelihood ancestral states reconstruction: marginal method.
std::map< int, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
Legacy data storage class for probabilistic substitution mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
Legacy interface for storing mapping data.
virtual size_t getNumberOfSubstitutionTypes() const =0
The phylogenetic node class.
virtual int getId() const
Get the node's id.
virtual const Node * getSon(size_t pos) const
virtual const Node * getFather() const
Get the father of this node is there is one.
virtual bool hasFather() const
Tell if this node has a father node.
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
virtual size_t getNumberOfSons() const
Substitution models manager for non-homogeneous / non-reversible models of evolution.
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModelForNode(int nodeId) const
std::shared_ptr< const SubstitutionModelInterface > getSubstitutionModel(size_t i) const
Return a markovian substitution model (or null)
The SubstitutionRegister interface.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
virtual size_t getNumberOfSubstitutionTypes() const =0
virtual size_t getType(size_t fromState, size_t toState) const =0
Get the substitution type far a given pair of model states.
virtual const Site & site(size_t sitePosition) const override=0
The phylogenetic tree class.
virtual std::vector< const N * > getNodes() const
virtual N * getNode(int id, bool checkId=false)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVVdouble > VVVVdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble