bpp-phyl3 3.0.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
10
11#include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
12#include "DecompositionReward.h"
16#include "RewardMappingTools.h"
18
19using namespace bpp;
20using namespace numeric;
21using namespace std;
22
23// From the STL:
24#include <iomanip>
25
26/******************************************************************************/
27
28unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeCounts(
30 const vector<uint>& edgeIds,
31 std::shared_ptr<const SubstitutionRegisterInterface> reg,
32 std::shared_ptr<const AlphabetIndex2> weights,
33 std::shared_ptr<const AlphabetIndex2> distances,
34 short unresolvedOption,
35 double threshold,
36 bool verbose)
37{
38 // Preamble:
39 if (!rltc.isInitialized())
40 throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
41
42 auto& sp = rltc.substitutionProcess();
43
44 if (edgeIds.size() == 0)
45 return make_unique<ProbabilisticSubstitutionMapping>(
46 sp.parametrizablePhyloTree(),
47 reg->getNumberOfSubstitutionTypes(),
50
51 DecompositionSubstitutionCount substitutionCount(reg, weights, distances);
52 return computeCounts(rltc, edgeIds, substitutionCount, unresolvedOption, threshold, verbose);
53}
54
55/******************************************************************************/
56
57unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeCounts(
59 const vector<uint>& edgeIds,
60 SubstitutionCountInterface& substitutionCount,
61 short unresolvedOption,
62 double threshold,
63 bool verbose)
64{
65 // Preamble:
66 if (!rltc.isInitialized())
67 throw Exception("SubstitutionMappingTools::computeCounts(). Likelihood object is not initialized.");
68
69 auto& sp = rltc.substitutionProcess();
70
71 if (edgeIds.size() == 0)
72 return make_unique<ProbabilisticSubstitutionMapping>(
73 sp.parametrizablePhyloTree(),
74 substitutionCount.getNumberOfSubstitutionTypes(),
77
78 auto processTree = rltc.getTreeNode(0);
79
80 /* First, set substitution counts */
81
82 // Map from models to counts
83 std::map<const SubstitutionModelInterface*, std::shared_ptr<SubstitutionCountInterface>> mModCount;
84
85 for (auto speciesId : edgeIds)
86 {
87 const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
88
89 for (auto id:dagIndexes)
90 {
91 const auto& edge = processTree->getEdge(id);
92 if (edge->getBrLen()) // if edge with model on it
93 {
94 auto model = edge->getModel();
95
96 auto nMod = edge->getNMod();
97
98 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
99
100 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
101
102 if (nMod == 0)
103 {
104 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
105
106 if (!sm)
107 throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
108 }
109 else
110 {
111 size_t nmod = nMod->targetValue();
112
113 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
114 if (!ttm)
115 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
116
117 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
118
119 if (!sm)
120 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
121 }
122
123 if (mModCount.find(sm.get()) == mModCount.end())
124 {
125 mModCount[sm.get()] = std::shared_ptr<SubstitutionCountInterface>(substitutionCount.clone());
126 mModCount[sm.get()]->setSubstitutionModel(sm);
127 }
128 }
129 }
130 }
131
132 auto ppt = sp.getParametrizablePhyloTree();
133
134 // A few variables we'll need:
135
136 size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
137 size_t nbClasses = sp.getNumberOfClasses();
138
139 size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
140 size_t nbNodes = edgeIds.size();
141
142 const auto& rootPatternLinks = rltc.getRootArrayPositions();
143
144 // We create a Mapping objects
145
146 unique_ptr<ProbabilisticSubstitutionMapping> substitutions(new ProbabilisticSubstitutionMapping(*ppt, nbTypes, rootPatternLinks, nbDistinctSites));
147
148 // Compute the number of substitutions for each class and each branch in the tree
149
150 // Get the DAG of probabilities of the edges
152
153 if (verbose)
154 ApplicationTools::displayTask("Compute counts", true);
155
156 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = substitutions->allEdgesIterator();
157
158 size_t nn = 0;
159 for ( ; !brIt->end(); brIt->next())
160 {
161 if (verbose)
162 ApplicationTools::displayGauge(nn++, nbNodes - 1);
163
164 shared_ptr<PhyloBranchMapping> br = **brIt;
165
166 // For each branch
167 uint speciesId = substitutions->getEdgeIndex(br);
168
169 if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
170 continue;
171
172 // #pragma GCC diagnostic push
173 // #pragma GCC diagnostic ignored "-Wdelete-non-abstract-non-virtual-dtor" //Disable warning coming from Eigen because of non-virtual destructor.
174 vector<RowLik> substitutionsForCurrentNode(nbTypes);
175 // #pragma GCC diagnostic pop
176 for (auto& sub : substitutionsForCurrentNode)
177 {
178 sub = RowLik::Zero((int)nbDistinctSites);
179 }
180
181 for (size_t ncl = 0; ncl < nbClasses; ncl++)
182 {
183 processTree = rltc.getTreeNode(ncl);
184 double pr = sp.getProbabilityForModel(ncl);
185
186 vector<RowLik> substitutionsForCurrentClass(nbTypes);
187 for (auto& sub:substitutionsForCurrentClass)
188 {
189 sub = RowLik::Zero((int)nbDistinctSites);
190 }
191
192 const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
193
194 Eigen::MatrixXd npxy;
195
196 // Sum on all dag edges for this speciesId
197 for (auto id : dagIndexes)
198 {
199 auto edge = processTree->getEdge(id);
200
201 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->model().targetValue());
202
203 auto nMod = edge->getNMod();
204
205 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
206
207 if (nMod == 0)
208 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
209 else
210 {
211 size_t nmod = nMod->targetValue();
212
213 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
214 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
215 }
216
217 auto subCount = mModCount[sm.get()];
218
219 const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
220
221 auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
222 auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
223
224 const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
225
226 const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->targetValue();
227
228 const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
229
230 for (size_t t = 0; t < nbTypes; ++t)
231 {
232 // compute all nxy * pxy first:
233
234 subCount->storeAllNumbersOfSubstitutions(edge->getBrLen()->getValue(), t + 1, npxy);
235
236 npxy.array() *= pxy.array();
237
238 // Now loop over sites:
239
240 auto counts = npxy * likelihoodsBotEdge;
241
242 auto bb = (cwise(likelihoodsTopEdge) * cwise(counts)).colwise().sum();
243
244 Eigen::VectorXd ff(likelihoodsBotEdge.cols());
245 switch (unresolvedOption)
246 {
249
250 // Nullify counts where sum likelihoods > 1 : ie unknown
251 for (auto i = 0; i < ff.size(); i++)
252 {
253 const DataLik s = likelihoodsBotEdge.col(i).sum();
254 if (s >= 2.)
255 ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
256 else
257 ff[i] = 1;
258 }
259
260 bb *= ff.array();
261 default:
262 ;
263 }
264
265 // Normalizes by likelihood on this node
266 auto cc = bb / cwise(likelihoodsFather);
267
268 // adds, with branch ponderation ( * edge / edge * father) probs
269 cwise(substitutionsForCurrentClass[t]) += cc * probaDAG.getProbaAtNode(fatid);
270 }
271 }
272
273 // sum for all rate classes, with class ponderation
274 for (size_t t = 0; t < nbTypes; ++t)
275 {
276 substitutionsForCurrentNode[t] += substitutionsForCurrentClass[t] * pr;
277 }
278 }
279
280 // Now we just have to copy the substitutions into the result vector:
281
282 for (size_t i = 0; i < nbDistinctSites; ++i)
283 {
284 for (size_t t = 0; t < nbTypes; ++t)
285 {
286 double x = convert(substitutionsForCurrentNode[t](Eigen::Index(i)));
287 if (std::isnan(x) || std::isinf(x))
288 {
289 if (verbose)
290 ApplicationTools::displayWarning("On branch " + TextTools::toString(speciesId) + ", site index " + TextTools::toString(i) + ", and type " + TextTools::toString(t) + ", counts could not be computed.");
291 (*br)(i, t) = 0;
292 }
293 else
294 {
295 if (threshold >= 0 && x > threshold)
296 {
297 if (verbose)
298 ApplicationTools::displayWarning("On branch " + TextTools::toString(speciesId) + ", site index" + TextTools::toString(i) + ", and type " + TextTools::toString(t) + " count has been ignored because it is presumably saturated.");
299 (*br)(i, t) = 0;
300 }
301 else
302 (*br)(i, t) = x;
303 }
304 }
305 }
306 } // end of loop on branches
307
308 if (verbose)
309 {
313 }
314
315 return substitutions;
316}
317
318/**************************************************************************************************/
319
320unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizations(
322 const vector<uint>& edgeIds,
323 std::shared_ptr<const BranchedModelSet> nullModels,
324 std::shared_ptr<const SubstitutionRegisterInterface> reg,
325 std::shared_ptr<const AlphabetIndex2> distances,
326 short unresolvedOption,
327 bool verbose)
328{
329 // Preamble:
330 if (!rltc.isInitialized())
331 throw Exception("SubstitutionMappingTools::computeNormalizations(). Likelihood object is not initialized.");
332
333 auto& sp = rltc.substitutionProcess();
334
335 if (edgeIds.size() == 0)
336 return make_unique<ProbabilisticSubstitutionMapping>(
337 sp.parametrizablePhyloTree(),
338 reg->getNumberOfSubstitutionTypes(),
341
342 auto processTree = rltc.getTreeNode(0);
343
344
345 /* First, set substitution rewards */
346
347 // Map from models to type-vector of rewards
348 std::map<const SubstitutionModelInterface*, std::vector<std::shared_ptr<DecompositionReward>>> mModRewards;
349
350 const auto& stateMap = sp.stateMap();
351
352 size_t nbTypes = reg->getNumberOfSubstitutionTypes();
353
354 size_t nbStates = stateMap.getNumberOfModelStates();
355 vector<int> supportedStates = stateMap.getAlphabetStates();
356
357 vector<shared_ptr<UserAlphabetIndex1>> vusai(nbTypes);
358 for (auto& usai : vusai)
359 {
360 usai.reset(new UserAlphabetIndex1(stateMap.getAlphabet()));
361 }
362
363 for (auto speciesId : edgeIds)
364 {
365 const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
366
367 // look for matching null model
368 auto nullmodel = nullModels->getModelForBranch(speciesId);
369
370 for (auto id : dagIndexes)
371 {
372 const auto& edge = processTree->getEdge(id);
373 if (edge->getBrLen()) // if edge with model on it
374 {
375 auto model = edge->getModel();
376
377 auto nMod = edge->getNMod();
378
379 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
380
381 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
382 if (nMod == 0)
383 {
384 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
385
386 if (!sm)
387 throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
388 }
389 else
390 {
391 size_t nmod = nMod->targetValue();
392
393 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
394 if (!ttm)
395 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
396
397 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
398
399 if (!sm)
400 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
401 }
402
403 // Sets vector of reward counts (depend on nullmodel)
404 if (mModRewards.find(sm.get()) == mModRewards.end())
405 {
406 // Look for matching substitution nullmodel
407 shared_ptr<const SubstitutionModelInterface> nullsm = nullptr;
408
409 if (nMod == 0)
410 {
411 nullsm = dynamic_pointer_cast<const SubstitutionModelInterface>(nullmodel);
412
413 if (!nullsm)
414 throw Exception("SubstitutionMappingTools::computeNormalizations : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + "for null model " + nullmodel->getName());
415 }
416 else
417 {
418 size_t nmod = nMod->targetValue();
419
420 auto nullttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(nullmodel);
421 if (!nullttm)
422 throw Exception("SubstitutionMappingTools::computeNormalizations : Expecting Mixed model in branch " + TextTools::toString(speciesId) + " for null model " + nullttm->getName());
423
424 nullsm = dynamic_pointer_cast<const SubstitutionModelInterface>(nullttm->getNModel(nmod));
425
426 if (!nullsm)
427 throw Exception("SubstitutionMappingTools::computeNormalizations : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of null mixed model " + nullmodel->getName() + " in branch " + TextTools::toString(speciesId));
428 }
429
430 for (auto& usai : vusai)
431 {
432 for (size_t i = 0; i < nbStates; ++i)
433 {
434 usai->setIndex(supportedStates[i], 0);
435 }
436 }
437
438 for (size_t i = 0; i < nbStates; ++i)
439 {
440 for (size_t j = 0; j < nbStates; ++j)
441 {
442 if (i != j)
443 {
444 size_t nbt = reg->getType(i, j);
445 if (nbt != 0)
446 vusai[nbt - 1]->setIndex(supportedStates[i], vusai[nbt - 1]->getIndex(supportedStates[i]) + nullsm->Qij(i, j) * (distances ? distances->getIndex(supportedStates[i], supportedStates[j]) : 1));
447 }
448 }
449 }
450
451 mModRewards[sm.get()] = vector<std::shared_ptr<DecompositionReward>>(nbTypes);
452 auto& mMdodsm = mModRewards[sm.get()];
453 for (size_t t = 0; t < nbTypes; ++t)
454 {
455 mMdodsm[t] = make_shared<DecompositionReward>(nullsm, vusai[t]);
456 }
457 }
458 }
459 }
460 }
461
464
465 // A few variables we'll need:
466
467 size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
468 size_t nbNodes = edgeIds.size();
469 size_t nbClasses = sp.getNumberOfClasses();
470
471 const auto& rootPatternLinks = rltc.getRootArrayPositions();
472
473 // We create a Mapping objects
474
475 unique_ptr<ProbabilisticSubstitutionMapping> normalizations(new ProbabilisticSubstitutionMapping(*sp.getParametrizablePhyloTree(), nbTypes, rootPatternLinks, nbDistinctSites));
476
477 // Compute the reward for each class and each branch in the tree:
478
479 // Get the DAG of probabilities of the edges
481
482 if (verbose)
483 ApplicationTools::displayTask("Compute rewards", true);
484
485 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = normalizations->allEdgesIterator();
486
487 Eigen::MatrixXd rpxy;
488
489 size_t nn = 0;
490 for ( ; !brIt->end(); brIt->next())
491 {
492 if (verbose)
493 ApplicationTools::displayGauge(nn++, nbNodes - 1);
494
495 shared_ptr<PhyloBranchMapping> br = **brIt;
496
497 // For each branch
498 uint speciesId = normalizations->getEdgeIndex(br);
499
500 if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
501 continue;
502
503 vector<RowLik> rewardsForCurrentNode(nbTypes);
504 for (auto& sub:rewardsForCurrentNode)
505 {
506 sub = RowLik::Zero((int)nbDistinctSites);
507 }
508
509 for (size_t ncl = 0; ncl < nbClasses; ncl++)
510 {
511 processTree = rltc.getTreeNode(ncl);
512 double pr = sp.getProbabilityForModel(ncl);
513
514 vector<RowLik> rewardsForCurrentClass(nbTypes);
515 for (auto& sub:rewardsForCurrentClass)
516 {
517 sub = RowLik::Zero((int)nbDistinctSites);
518 }
519
520 const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
521
522 // Sum on all dag edges for this speciesId
523 for (auto id:dagIndexes)
524 {
525 auto edge = processTree->getEdge(id);
526
527 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->getModel()->targetValue());
528
529 auto nMod = edge->getNMod();
530
531 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
532
533 if (nMod == 0)
534 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
535 else
536 {
537 size_t nmod = nMod->targetValue();
538
539 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
540 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
541 }
542
543 // Rewards for this model
544 auto subReward = mModRewards[sm.get()];
545
546 const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
547
548 auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
549 auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
550
551 const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
552
553 const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->accessValueConst();
554
555 const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
556
557 for (size_t t = 0; t < nbTypes; ++t)
558 {
559 // compute all rxy * pxy first:
560 subReward[t]->storeAllRewards(edge->getBrLen()->getValue(), rpxy);
561
562 rpxy.array() *= pxy.array();
563
564 // Now loop over sites:
565
566 auto rew = rpxy * likelihoodsBotEdge;
567
568 auto bb = (cwise(likelihoodsTopEdge) * cwise(rew)).colwise().sum();
569
570 // Nullify counts where sum likelihoods > 1 : ie unknown
571 Eigen::VectorXd ff(likelihoodsBotEdge.cols());
572
573 switch (unresolvedOption)
574 {
577
578 // Nullify counts where sum likelihoods > 1 : ie unknown
579 for (auto i = 0; i < ff.size(); i++)
580 {
581 const DataLik s = likelihoodsBotEdge.col(i).sum();
582 if (s >= 2.)
583 ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
584 else
585 ff[i] = 1;
586 }
587
588 bb *= ff.array();
589 default:
590 ;
591 }
592
593 // Normalizes by likelihood on this node
594 auto cc = bb / cwise(likelihoodsFather);
595
596 // adds, with branch ponderation ( * edge / edge * father) probs
597 cwise(rewardsForCurrentClass[t]) += cc * probaDAG.getProbaAtNode(fatid);
598 }
599 }
600
601 for (size_t t = 0; t < nbTypes; ++t)
602 {
603 rewardsForCurrentNode[t] += rewardsForCurrentClass[t] * pr;
604 }
605 }
606
607 // Now we just have to copy the substitutions into the result vector:
608 for (size_t i = 0; i < nbDistinctSites; ++i)
609 {
610 for (size_t t = 0; t < nbTypes; ++t)
611 {
612 (*br)(i, t) = convert(rewardsForCurrentNode[t](Eigen::Index(i)));
613 }
614 }
615 } // end of loop on branches
616
617 if (verbose)
618 {
622 }
623
624 return normalizations;
625}
626
627
628/************************************************************/
629
630unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizedCounts(
632 const vector<uint>& edgeIds,
633 std::shared_ptr<const BranchedModelSet> nullModels,
634 std::shared_ptr<const SubstitutionRegisterInterface> reg,
635 std::shared_ptr<const AlphabetIndex2> weights,
636 std::shared_ptr<const AlphabetIndex2> distances,
637 bool perTimeUnit,
638 uint siteSize,
639 short unresolvedOption,
640 double threshold,
641 bool verbose)
642{
643 unique_ptr<ProbabilisticSubstitutionMapping> counts(computeCounts(rltc, edgeIds, reg, weights, distances, unresolvedOption, threshold, verbose));
644 unique_ptr<ProbabilisticSubstitutionMapping> factors(computeNormalizations(rltc, edgeIds, nullModels, reg, distances, unresolvedOption, verbose));
645 return computeNormalizedCounts(*counts, *factors, edgeIds, perTimeUnit, siteSize);
646}
647
648/************************************************************/
649
650unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizedCounts(
653 const vector<uint>& edgeIds,
654 bool perTimeUnit,
655 uint siteSize)
656{
657 unique_ptr<ProbabilisticSubstitutionMapping> normCounts(counts.clone());
658
659 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
660 size_t nbDistinctSites = counts.getNumberOfDistinctSites();
661
662 // Iterate on branches
663
664 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = normCounts->allEdgesIterator();
665
666 for ( ; !brIt->end(); brIt->next())
667 {
668 shared_ptr<PhyloBranchMapping> brNormCount = **brIt;
669
670 VVdouble& brnCou = brNormCount->getCounts();
671
672 // For each branch
673 uint edid = normCounts->getEdgeIndex(brNormCount);
674
675 if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)edid))
676 {
677 for (auto& brk : brnCou)
678 {
679 VectorTools::fill(brk, 0.);
680 }
681 continue;
682 }
683
684 shared_ptr<PhyloBranchMapping> brFactor = factors.getEdge(edid);
685 shared_ptr<PhyloBranchMapping> brCount = counts.getEdge(edid);
686
687 const VVdouble& cou = brCount->getCounts();
688 const VVdouble& fac = brFactor->getCounts();
689
690
691 // if not per time, multiply by the lengths of the branches of
692 // the input tree
693
694 double slg = (!perTimeUnit ? brCount->getLength() : 1) / siteSize;
695
696 for (size_t k = 0; k < nbDistinctSites; ++k)
697 {
698 Vdouble& ncou_k = brnCou[k];
699
700 const Vdouble& cou_k = cou[k];
701 const Vdouble& fac_k = fac[k];
702
703 for (size_t t = 0; t < nbTypes; ++t)
704 {
705 ncou_k[t] = (fac_k[t] != 0 ? cou_k[t] / fac_k[t] * slg : 0);
706 }
707 }
708 }
709
710 return normCounts;
711}
712
713/*******************************************************************************/
714/* Get trees of counts */
715/*******************************************************************************/
716
719 size_t type)
720{
721 auto pt = make_unique<PhyloTree>(counts);
722 size_t nbSites = counts.getNumberOfSites();
723
724 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
725
726 for ( ; !brIt->end(); brIt->next())
727 {
728 shared_ptr<PhyloBranchMapping> brm = (**brIt);
729 double x = 0;
730
731 for (size_t i = 0; i < nbSites; ++i)
732 {
733 x += brm->getSiteTypeCount(counts.getSiteIndex(i), type);
734 }
735
736 pt->getEdge(counts.getEdgeIndex(brm))->setLength(x);
737 }
738
739 return pt;
740}
741
742/*******************************************************************************/
743
747 size_t type)
748{
749 auto pt = make_unique<PhyloTree>(counts);
750 size_t nbSites = counts.getNumberOfSites();
751
752 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
753
754 for ( ; !brIt->end(); brIt->next())
755 {
756 shared_ptr<PhyloBranchMapping> brm = (**brIt);
757 shared_ptr<PhyloBranchMapping> brf = factors.getEdge(counts.getEdgeIndex(brm));
758
759 double x = 0, f = 0;
760
761 for (size_t i = 0; i < nbSites; ++i)
762 {
763 x += brm->getSiteTypeCount(counts.getSiteIndex(i), type);
764 f += brf->getSiteTypeCount(counts.getSiteIndex(i), type);
765 }
766
767 pt->getEdge(counts.getEdgeIndex(brm))->setLength(x / f);
768 }
769
770 return pt;
771}
772
773
774/********************************************************************************/
775/* Get vectors of counts */
776/********************************************************************************/
777
780 const vector<uint>& ids)
781{
782 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
783 size_t nbBr = idc.size();
784
785 size_t nbSites = counts.getNumberOfSites();
786
787 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
788
789 VVVdouble result;
790 VectorTools::resize3(result, nbSites, nbBr, nbTypes);
791
792 for (size_t j = 0; j < nbSites; ++j)
793 {
794 VVdouble& resS = result[j];
795 size_t siteIndex = counts.getSiteIndex(j);
796
797 for (size_t k = 0; k < nbBr; ++k)
798 {
799 Vdouble& resSB = resS[k];
800
801 for (size_t i = 0; i < nbTypes; ++i)
802 {
803 resSB[i] = counts(idc[k], siteIndex, i);
804 }
805 }
806 }
807
808 return result;
809}
810
811
812/**************************************************************************************************/
813
816 size_t site)
817{
818 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
819 size_t siteIndex = counts.getSiteIndex(site);
820
821 Vdouble v(counts.getNumberOfBranches(), 0);
822 for ( ; !brIt->end(); brIt->next())
823 {
824 v[counts.getEdgeIndex(**brIt)] = VectorTools::sum((***brIt).getSiteCount(siteIndex));
825 }
826
827 return v;
828}
829
833 size_t site)
834{
835 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
836 size_t siteIndex = counts.getSiteIndex(site);
837
838 Vdouble v(counts.getNumberOfBranches(), 0);
839 for ( ; !brIt->end(); brIt->next())
840 {
841 shared_ptr<PhyloBranchMapping> brm = (**brIt);
842
843 uint edid = counts.getEdgeIndex(brm);
844 shared_ptr<PhyloBranchMapping> brf = factors.getEdge(edid);
845
846
847 v[edid] = VectorTools::sum(brm->getSiteCount(siteIndex)) / VectorTools::sum(brf->getSiteCount(siteIndex));
848 }
849
850 return v;
851}
852
853/**************************************************************************************************/
854
857 const vector<uint>& ids)
858{
859 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
860 size_t nbBr = idc.size();
861
862 size_t nbSites = counts.getNumberOfSites();
863
864 VVdouble result;
865 VectorTools::resize2(result, nbSites, nbBr);
866
867 for (size_t k = 0; k < nbSites; ++k)
868 {
869 vector<double> countsf(SubstitutionMappingTools::getCountsForSitePerBranch(counts, k));
870 Vdouble* resS = &result[k];
871
872 for (size_t i = 0; i < nbBr; ++i)
873 {
874 (*resS)[i] = countsf[idc[i]];
875 }
876 }
877 return result;
878}
879
883 const vector<uint>& ids)
884{
885 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
886 size_t nbBr = idc.size();
887
888 size_t nbSites = counts.getNumberOfSites();
889
890 VVdouble result;
891 VectorTools::resize2(result, nbSites, nbBr);
892
893 for (size_t k = 0; k < nbSites; ++k)
894 {
895 vector<double> countsf(SubstitutionMappingTools::getCountsForSitePerBranch(counts, factors, k));
896 Vdouble* resS = &result[k];
897
898 for (size_t i = 0; i < nbBr; ++i)
899 {
900 (*resS)[i] = countsf[idc[i]];
901 }
902 }
903 return result;
904}
905
906/**************************************************************************************************/
907
910 uint branchId)
911{
912 size_t nbSites = counts.getNumberOfSites();
913 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
914 Vdouble v(nbTypes, 0);
915 shared_ptr<PhyloBranchMapping> br = counts.getEdge(branchId);
916
917 for (size_t i = 0; i < nbSites; ++i)
918 {
919 for (size_t t = 0; t < nbTypes; ++t)
920 {
921 v[t] += br->getSiteTypeCount(counts.getSiteIndex(i), t);
922 }
923 }
924
925 return v;
926}
927
928/**************************************************************************************************/
929
932 const vector<uint>& ids)
933{
934 VVdouble result;
935 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
936
937 for (auto id : idc)
938 {
939 result.push_back(SubstitutionMappingTools::getCountsForBranchPerType(counts, id));
940 }
941
942 return result;
943}
944
945/**************************************************************************************************/
946
949 const vector<uint>& ids)
950{
951 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
952 size_t nbBr = idc.size();
953
954 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
955
956 VVdouble result;
957 VectorTools::resize2(result, nbTypes, nbBr);
958
959 for (size_t i = 0; i < idc.size(); i++)
960 {
962 for (size_t nbt = 0; nbt < nbTypes; nbt++)
963 {
964 result[nbt][i] = cou[nbt];
965 }
966 }
967
968 return result;
969}
970
971
972/************************************************************/
973
974
977 const vector<uint>& ids,
978 std::shared_ptr<const SubstitutionRegisterInterface> reg,
979 std::shared_ptr<const AlphabetIndex2> weights,
980 std::shared_ptr<const AlphabetIndex2> distances,
981 short unresolvedOption,
982 double threshold,
983 bool verbose)
984{
985 auto psm = computeCounts(rltc, ids, reg, weights, distances, unresolvedOption, threshold, verbose);
986
987 VVdouble result = getCountsPerTypePerBranch(*psm, ids);
988
989 auto creg = dynamic_pointer_cast<const CategorySubstitutionRegister>(reg);
990
991 if (creg && !creg->isStationary())
992 {
993 size_t nbTypes = result[0].size();
994
995 for (size_t k = 0; k < ids.size(); ++k)
996 {
997 vector<double> freqs(0); // = rltc.getPosteriorStateFrequencies(ids[k]);
998 // Compute frequencies for types:
999
1000 vector<double> freqsTypes(creg->getNumberOfCategories());
1001 for (size_t i = 0; i < freqs.size(); ++i)
1002 {
1003 size_t c = creg->getCategory(i);
1004 freqsTypes[c - 1] += freqs[i];
1005 }
1006
1007 // We divide the counts by the frequencies and rescale:
1008
1009 double s = VectorTools::sum(result[k]);
1010 for (size_t t = 0; t < nbTypes; ++t)
1011 {
1012 result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1013 }
1014
1015 double s2 = VectorTools::sum(result[k]);
1016 // Scale:
1017
1018 result[k] = (result[k] / s2) * s;
1019 }
1020 }
1021 return result;
1022}
1023
1024
1025/**************************************************************************************************/
1026
1028{
1029 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1030
1031 size_t siteIndex = counts.getSiteIndex(site);
1032
1033 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1034 Vdouble v(nbTypes, 0);
1035
1036 for (auto id : idc)
1037 {
1038 shared_ptr<PhyloBranchMapping> br = counts.getEdge(id);
1039 for (size_t t = 0; t < nbTypes; ++t)
1040 {
1041 v[t] += (*br)(siteIndex, t);
1042 }
1043 }
1044
1045 return v;
1046}
1047
1048/**************************************************************************************************/
1049
1052 const ProbabilisticSubstitutionMapping& factors,
1053 size_t site,
1054 bool perTimeUnit,
1055 uint siteSize)
1056{
1057 size_t siteIndex = counts.getSiteIndex(site);
1058 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1059
1060 Vdouble v(nbTypes, 0);
1061 Vdouble n(nbTypes, 0);
1062
1063 double lg = (!perTimeUnit ? 0 : 1);
1064 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
1065 for ( ; !brIt->end(); brIt->next())
1066 {
1067 for (size_t t = 0; t < nbTypes; ++t)
1068 {
1069 v[t] += (***brIt)(siteIndex, t);
1070 }
1071 if (!perTimeUnit)
1072 lg += (**brIt)->getLength();
1073 }
1074
1075 double slg = lg / siteSize;
1076
1077 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> nrIt = factors.allEdgesIterator();
1078 for ( ; !nrIt->end(); nrIt->next())
1079 {
1080 for (size_t t = 0; t < nbTypes; ++t)
1081 {
1082 n[t] += (***nrIt)(siteIndex, t);
1083 }
1084 }
1085
1086 for (size_t t = 0; t < nbTypes; ++t)
1087 {
1088 v[t] = v[t] / n[t] * slg;
1089 }
1090
1091 return v;
1092}
1093
1094/**************************************************************************************************/
1095
1098 const ProbabilisticSubstitutionMapping& factors,
1099 size_t site,
1100 const vector<uint>& ids,
1101 bool perTimeUnit,
1102 uint siteSize)
1103{
1104 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1105
1106 size_t siteIndex = counts.getSiteIndex(site);
1107 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1108
1109 Vdouble v(nbTypes, 0);
1110 Vdouble n(nbTypes, 0);
1111
1112 double lg = (!perTimeUnit ? 0 : 1);
1113 for (auto id : idc)
1114 {
1115 shared_ptr<PhyloBranchMapping> br = counts.getEdge(id);
1116 for (size_t t = 0; t < nbTypes; ++t)
1117 {
1118 v[t] += (*br)(siteIndex, t);
1119 }
1120 if (!perTimeUnit)
1121 lg += br->getLength();
1122 }
1123
1124 double slg = lg / siteSize;
1125
1126 for (auto id : idc)
1127 {
1128 shared_ptr<PhyloBranchMapping> br = factors.getEdge(id);
1129 for (size_t t = 0; t < nbTypes; ++t)
1130 {
1131 n[t] += (*br)(siteIndex, t);
1132 }
1133 }
1134
1135 for (size_t t = 0; t < nbTypes; ++t)
1136 {
1137 v[t] = v[t] / n[t] * slg;
1138 }
1139
1140 return v;
1141}
1142
1143/**************************************************************************************************/
1144
1147 const ProbabilisticSubstitutionMapping& factors,
1148 bool perTimeUnit,
1149 uint siteSize)
1150{
1151 size_t nbSites = counts.getNumberOfSites();
1152 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1153 VVdouble result;
1154 VectorTools::resize2(result, nbSites, nbTypes);
1155
1156 for (size_t k = 0; k < nbSites; ++k)
1157 {
1158 result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, factors, k, perTimeUnit, siteSize);
1159 }
1160
1161 return result;
1162}
1163
1164/**************************************************************************************************/
1165
1167{
1168 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1169
1170 size_t nbSites = counts.getNumberOfSites();
1171 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1172 VVdouble result;
1173 VectorTools::resize2(result, nbSites, nbTypes);
1174
1175 for (size_t k = 0; k < nbSites; ++k)
1176 {
1177 result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, k, idc);
1178 }
1179
1180 return result;
1181}
1182
1183/**************************************************************************************************/
1184
1187 const ProbabilisticSubstitutionMapping& factors,
1188 const vector<uint>& ids,
1189 bool perTimeUnit,
1190 uint siteSize)
1191{
1192 const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1193
1194 size_t nbSites = counts.getNumberOfSites();
1195 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1196 VVdouble result;
1197 VectorTools::resize2(result, nbSites, nbTypes);
1198
1199 for (size_t k = 0; k < nbSites; ++k)
1200 {
1201 result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, factors, k, idc, perTimeUnit, siteSize);
1202 }
1203
1204 return result;
1205}
1206
1207/**************************************************************************************************/
1208
1210{
1211 size_t siteIndex = counts.getSiteIndex(site);
1212 double sumSquare = 0;
1213
1214 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
1215
1216 size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1217 for ( ; !brIt->end(); brIt->next())
1218 {
1219 double sum = 0;
1220 for (size_t t = 0; t < nbTypes; ++t)
1221 {
1222 sum += (***brIt)(siteIndex, t);
1223 }
1224
1225 sumSquare += sum * sum;
1226 }
1227 return sqrt(sumSquare);
1228}
1229
1230
1231/******************************************************/
1232/* OUTPUT */
1233/******************************************************/
1234
1236 const string& filename,
1237 const vector<uint>& ids,
1238 const AlignmentDataInterface& sites,
1239 const VVdouble& counts)
1240{
1241 size_t nbSites = counts.size();
1242 if (nbSites == 0)
1243 return;
1244 size_t nbBr = counts[0].size();
1245
1246 ofstream file;
1247 file.precision(10);
1248 file.open(filename.c_str());
1249
1250 file << "[Site]";
1251 for (size_t i = 0; i < nbBr; ++i)
1252 {
1253 file << "\t" << ids[i];
1254 }
1255 file << endl;
1256
1257 for (size_t k = 0; k < nbSites; ++k)
1258 {
1259 const Vdouble& countS = counts[k];
1260 file << "[" << sites.site(k).getCoordinate() << "]";
1261 for (size_t i = 0; i < nbBr; ++i)
1262 {
1263 file << "\t" << countS[i];
1264 }
1265 file << endl;
1266 }
1267 file.close();
1268}
1269
1270
1271/**************************************************************************************************/
1272
1274 const string& filename,
1276 const AlignmentDataInterface& sites,
1277 const VVdouble& counts)
1278{
1279 size_t nbSites = counts.size();
1280 if (nbSites == 0)
1281 return;
1282 size_t nbTypes = counts[0].size();
1283
1284 ofstream file;
1285 file.precision(10);
1286 file.open(filename.c_str());
1287
1288 file << "[Site]";
1289 for (size_t i = 0; i < nbTypes; ++i)
1290 {
1291 file << "\t" << reg.getTypeName(i + 1);
1292 }
1293 file << endl;
1294
1295 for (size_t k = 0; k < nbSites; ++k)
1296 {
1297 file << "[" << sites.site(k).getCoordinate() << "]";
1298 const Vdouble& resS = counts[k];
1299 for (size_t i = 0; i < nbTypes; ++i)
1300 {
1301 file << "\t" << resS[i];
1302 }
1303 file << endl;
1304 }
1305 file.close();
1306}
1307
1308/**************************************************************************************************/
1309
1311 const string& filename,
1313 const AlignmentDataInterface& sites,
1314 const VVdouble& counts)
1315{
1316 size_t nbSites = counts.size();
1317 if (nbSites == 0)
1318 return;
1319
1320 size_t nbTypes = counts[0].size();
1321
1322 ofstream file;
1323 file.precision(10);
1324 file.open(filename.c_str());
1325
1326 for (size_t i = 0; i < nbTypes; ++i)
1327 {
1328 double sumtype = 0;
1329 for (size_t k = 0; k < nbSites; ++k)
1330 {
1331 sumtype += counts[k][i];
1332 }
1333
1334 file << reg.getTypeName(i + 1) << "\t" << sumtype << endl;
1335 }
1336 file.close();
1337}
1338
1339
1340/**************************************************************************************************/
1341
1343 const string& filenamePrefix,
1344 const vector<uint>& ids,
1346 const AlignmentDataInterface& sites,
1347 const VVVdouble& counts)
1348{
1349 size_t nbSites = counts.size();
1350 if (nbSites == 0)
1351 return;
1352 size_t nbBr = counts[0].size();
1353 if (nbBr == 0)
1354 return;
1355 size_t nbTypes = counts[0][0].size();
1356
1357 ofstream file;
1358 file.precision(10);
1359
1360 for (size_t i = 0; i < nbTypes; ++i)
1361 {
1362 string name = reg.getTypeName(i + 1);
1363 if (name == "")
1364 name = TextTools::toString(i + 1);
1365
1366 string path = filenamePrefix + name + string(".count");
1367
1368 ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
1369 file.open(path.c_str());
1370
1371 file << "[Site]";
1372 for (size_t k = 0; k < nbBr; ++k)
1373 {
1374 file << "\t" << ids[k];
1375 }
1376 file << endl;
1377
1378 for (size_t j = 0; j < nbSites; ++j)
1379 {
1380 const VVdouble& resS = counts[j];
1381
1382 file << "[" << sites.site(j).getCoordinate() << "]";
1383
1384 for (size_t k = 0; k < nbBr; ++k)
1385 {
1386 file << "\t" << resS[k][i];
1387 }
1388 file << endl;
1389 }
1390 file.close();
1391 }
1392}
1393
1394
1395/**************************************************************************************************/
1396
1398 const ProbabilisticSubstitutionMapping& substitutions,
1399 const AlignmentDataInterface& sites,
1400 size_t type,
1401 ostream& out)
1402{
1403 if (!out)
1404 throw IOException("SubstitutionMappingTools::writeToStream. Can't write to stream.");
1405 out << "Branches";
1406 out << "\tMean";
1407 for (size_t i = 0; i < substitutions.getNumberOfSites(); ++i)
1408 {
1409 out << "\tSite" << sites.site(i).getCoordinate();
1410 }
1411 out << endl;
1412
1413 unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = substitutions.allEdgesIterator();
1414
1415 for ( ; !brIt->end(); brIt->next())
1416 {
1417 const shared_ptr<PhyloBranchMapping> br = **brIt;
1418
1419 out << substitutions.getEdgeIndex(br) << "\t" << br->getLength();
1420
1421 for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1422 {
1423 out << "\t" << (*br)(substitutions.getSiteIndex(i), type);
1424 }
1425
1426 out << endl;
1427 }
1428}
1429
1430/**************************************************************************************************/
1431
1433{
1434 try
1435 {
1436 auto data = DataTable::read(in, "\t", true, -1);
1437 vector<string> ids = data->getColumn(0);
1438 data->deleteColumn(0); // Remove ids
1439 data->deleteColumn(0); // Remove means
1440 // Now parse the table:
1441 size_t nbSites = data->getNumberOfColumns();
1442 substitutions.setNumberOfSites(nbSites);
1443
1444 auto brIt = substitutions.allEdgesIterator();
1445
1446 for ( ; !brIt->end(); brIt->next())
1447 {
1448 const shared_ptr<PhyloBranchMapping> br = **brIt;
1449 uint brid = substitutions.getEdgeIndex(br);
1450 for (size_t j = 0; j < nbSites; j++)
1451 {
1452 (*br)(j, type) = TextTools::toDouble((*data)(brid, j));
1453 }
1454 }
1455
1456 // Parse the header:
1457 for (size_t i = 0; i < nbSites; ++i)
1458 {
1459 string siteTxt = data->getColumnName(i);
1460 int site = 0;
1461 if (siteTxt.substr(0, 4) == "Site")
1462 site = TextTools::to<int>(siteTxt.substr(4));
1463 else
1464 site = TextTools::to<int>(siteTxt);
1465 substitutions.setSitePosition(i, site);
1466 }
1467 }
1468 catch (Exception& e)
1469 {
1470 throw IOException(string("Bad input file. ") + e.what());
1471 }
1472}
void setSitePosition(size_t index, int position)
Set the position of a given site.
Definition: Mapping.h:103
size_t getNumberOfSites() const
Definition: Mapping.h:111
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayWarning(const std::string &text)
static void displayResult(const std::string &text, const T &result)
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
virtual std::unique_ptr< EdgeIterator > allEdgesIterator()=0
virtual EdgeIndex getEdgeIndex(const std::shared_ptr< E > edgeObject) const=0
virtual std::shared_ptr< E > getEdge(EdgeIndex edgeIndex) const=0
virtual int getCoordinate() const=0
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
Analytical substitution count using the eigen decomposition method.
const char * what() const noexcept override
static Self Zero(Eigen::Index rows, Eigen::Index cols)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
virtual bool isInitialized() const
Data storage class for probabilistic substitution mappings.
void setNumberOfSites(size_t numberOfSites) override
ProbabilisticSubstitutionMapping * clone() const override
double getProbaAtNode(PhyloTree::NodeIndex nodeId)
The SubstitutionsCount interface.
virtual SubstitutionCountInterface * clone() const =0
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
static Vdouble getCountsForSitePerType(const ProbabilisticSubstitutionMapping &counts, size_t site, const std::vector< uint > &ids=Vuint(0))
Methods that sum on branches need raw mapping trees, or raw mapping trees and normalization trees (ie...
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions std::vectors from a stream.
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
static std::unique_ptr< PhyloTree > getTreeForType(const ProbabilisticSubstitutionMapping &counts, size_t type)
Methods to get trees of counts. These methods can use raw or normalized counting trees (ie computed w...
static VVdouble getCountsPerSitePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each branch.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Site Per Type in SGED format.
static Vdouble getCountsForSitePerBranch(const ProbabilisticSubstitutionMapping &counts, size_t site)
Sum all type of substitutions for each branch of a given position.
static VVdouble computeCountsPerTypePerBranch(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &ids, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
static VVVdouble getCountsPerSitePerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Methods to get std::vectors of counts.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const AlignmentDataInterface &sites, size_t type, std::ostream &out)
Write the substitutions std::vectors to a stream.
static Vdouble getCountsForBranchPerType(const ProbabilisticSubstitutionMapping &counts, uint branchId)
Sum all sites substitutions for each type of a given branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitution counts tree, normalized by the models of "null" process on each branch,...
static VVdouble getCountsPerTypePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
static void outputPerSitePerBranch(const std::string &filename, const std::vector< uint > &ids, const AlignmentDataInterface &sites, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< uint > &ids, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVVdouble &counts)
Output Per Site Per Branch Per Type, one SGED file per type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static VVdouble getCountsPerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all sites substitutions for each branch for each type.
static VVdouble getCountsPerSitePerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each type.
static double getNormForSite(const ProbabilisticSubstitutionMapping &counts, size_t site)
Compute the norm of a substitution std::vector for a given position.
static void outputPerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, SubstitutionCountInterface &substitutionCount, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Methods to compute mapping Trees.
The SubstitutionRegister interface.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
static T sum(const std::vector< T > &v1)
static void resize3(VVVdouble &vvv, size_t n1, size_t n2, size_t n3)
static bool contains(const std::vector< T > &vec, T el)
static void resize2(VVdouble &vv, size_t n1, size_t n2)
static void fill(std::vector< T > &v, T value)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
bool isinf(const double &d)
T & cwise(T &t)
Definition: DataFlowCWise.h:29
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
double convert(const bpp::ExtendedFloat &ef)
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint