bpp-phyl3  3.0.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
9 #include <Bpp/Text/TextTools.h>
10 
11 #include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
12 #include "DecompositionReward.h"
16 #include "RewardMappingTools.h"
18 
19 using namespace bpp;
20 using namespace numeric;
21 using namespace std;
22 
23 // From the STL:
24 #include <iomanip>
25 
26 /******************************************************************************/
27 
28 unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeCounts(
30  const vector<uint>& edgeIds,
31  std::shared_ptr<const SubstitutionRegisterInterface> reg,
32  std::shared_ptr<const AlphabetIndex2> weights,
33  std::shared_ptr<const AlphabetIndex2> distances,
34  short unresolvedOption,
35  double threshold,
36  bool verbose)
37 {
38  // Preamble:
39  if (!rltc.isInitialized())
40  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
41 
42  auto& sp = rltc.substitutionProcess();
43 
44  if (edgeIds.size() == 0)
45  return make_unique<ProbabilisticSubstitutionMapping>(
46  sp.parametrizablePhyloTree(),
47  reg->getNumberOfSubstitutionTypes(),
48  rltc.getRootArrayPositions(),
50 
51  DecompositionSubstitutionCount substitutionCount(reg, weights, distances);
52  return computeCounts(rltc, edgeIds, substitutionCount, unresolvedOption, threshold, verbose);
53 }
54 
55 /******************************************************************************/
56 
57 unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeCounts(
59  const vector<uint>& edgeIds,
60  SubstitutionCountInterface& substitutionCount,
61  short unresolvedOption,
62  double threshold,
63  bool verbose)
64 {
65  // Preamble:
66  if (!rltc.isInitialized())
67  throw Exception("SubstitutionMappingTools::computeCounts(). Likelihood object is not initialized.");
68 
69  auto& sp = rltc.substitutionProcess();
70 
71  if (edgeIds.size() == 0)
72  return make_unique<ProbabilisticSubstitutionMapping>(
73  sp.parametrizablePhyloTree(),
74  substitutionCount.getNumberOfSubstitutionTypes(),
75  rltc.getRootArrayPositions(),
77 
78  auto processTree = rltc.getTreeNode(0);
79 
80  /* First, set substitution counts */
81 
82  // Map from models to counts
83  std::map<const SubstitutionModelInterface*, std::shared_ptr<SubstitutionCountInterface>> mModCount;
84 
85  for (auto speciesId : edgeIds)
86  {
87  const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
88 
89  for (auto id:dagIndexes)
90  {
91  const auto& edge = processTree->getEdge(id);
92  if (edge->getBrLen()) // if edge with model on it
93  {
94  auto model = edge->getModel();
95 
96  auto nMod = edge->getNMod();
97 
98  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
99 
100  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
101 
102  if (nMod == 0)
103  {
104  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
105 
106  if (!sm)
107  throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
108  }
109  else
110  {
111  size_t nmod = nMod->targetValue();
112 
113  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
114  if (!ttm)
115  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
116 
117  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
118 
119  if (!sm)
120  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
121  }
122 
123  if (mModCount.find(sm.get()) == mModCount.end())
124  {
125  mModCount[sm.get()] = std::shared_ptr<SubstitutionCountInterface>(substitutionCount.clone());
126  mModCount[sm.get()]->setSubstitutionModel(sm);
127  }
128  }
129  }
130  }
131 
132  auto ppt = sp.getParametrizablePhyloTree();
133 
134  // A few variables we'll need:
135 
136  size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
137  size_t nbClasses = sp.getNumberOfClasses();
138 
139  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
140  size_t nbNodes = edgeIds.size();
141 
142  const auto& rootPatternLinks = rltc.getRootArrayPositions();
143 
144  // We create a Mapping objects
145 
146  unique_ptr<ProbabilisticSubstitutionMapping> substitutions(new ProbabilisticSubstitutionMapping(*ppt, nbTypes, rootPatternLinks, nbDistinctSites));
147 
148  // Compute the number of substitutions for each class and each branch in the tree
149 
150  // Get the DAG of probabilities of the edges
151  ProbabilityDAG probaDAG(rltc.getForwardLikelihoodTree(0));
152 
153  if (verbose)
154  ApplicationTools::displayTask("Compute counts", true);
155 
156  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = substitutions->allEdgesIterator();
157 
158  size_t nn = 0;
159  for ( ; !brIt->end(); brIt->next())
160  {
161  if (verbose)
162  ApplicationTools::displayGauge(nn++, nbNodes - 1);
163 
164  shared_ptr<PhyloBranchMapping> br = **brIt;
165 
166  // For each branch
167  uint speciesId = substitutions->getEdgeIndex(br);
168 
169  if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
170  continue;
171 
172 // #pragma GCC diagnostic push
173 // #pragma GCC diagnostic ignored "-Wdelete-non-abstract-non-virtual-dtor" //Disable warning coming from Eigen because of non-virtual destructor.
174  vector<RowLik> substitutionsForCurrentNode(nbTypes);
175 // #pragma GCC diagnostic pop
176  for (auto& sub : substitutionsForCurrentNode)
177  {
178  sub = RowLik::Zero((int)nbDistinctSites);
179  }
180 
181  for (size_t ncl = 0; ncl < nbClasses; ncl++)
182  {
183  processTree = rltc.getTreeNode(ncl);
184  double pr = sp.getProbabilityForModel(ncl);
185 
186  vector<RowLik> substitutionsForCurrentClass(nbTypes);
187  for (auto& sub:substitutionsForCurrentClass)
188  {
189  sub = RowLik::Zero((int)nbDistinctSites);
190  }
191 
192  const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
193 
194  Eigen::MatrixXd npxy;
195 
196  // Sum on all dag edges for this speciesId
197  for (auto id : dagIndexes)
198  {
199  auto edge = processTree->getEdge(id);
200 
201  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->model().targetValue());
202 
203  auto nMod = edge->getNMod();
204 
205  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
206 
207  if (nMod == 0)
208  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
209  else
210  {
211  size_t nmod = nMod->targetValue();
212 
213  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
214  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
215  }
216 
217  auto subCount = mModCount[sm.get()];
218 
219  const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
220 
221  auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
222  auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
223 
224  const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
225 
226  const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->targetValue();
227 
228  const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
229 
230  for (size_t t = 0; t < nbTypes; ++t)
231  {
232  // compute all nxy * pxy first:
233 
234  subCount->storeAllNumbersOfSubstitutions(edge->getBrLen()->getValue(), t + 1, npxy);
235 
236  npxy.array() *= pxy.array();
237 
238  // Now loop over sites:
239 
240  auto counts = npxy * likelihoodsBotEdge;
241 
242  auto bb = (cwise(likelihoodsTopEdge) * cwise(counts)).colwise().sum();
243 
244  Eigen::VectorXd ff(likelihoodsBotEdge.cols());
245  switch (unresolvedOption)
246  {
249 
250  // Nullify counts where sum likelihoods > 1 : ie unknown
251  for (auto i = 0; i < ff.size(); i++)
252  {
253  const DataLik s = likelihoodsBotEdge.col(i).sum();
254  if (s >= 2.)
255  ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
256  else
257  ff[i] = 1;
258  }
259 
260  bb *= ff.array();
261  default:
262  ;
263  }
264 
265  // Normalizes by likelihood on this node
266  auto cc = bb / cwise(likelihoodsFather);
267 
268  // adds, with branch ponderation ( * edge / edge * father) probs
269  cwise(substitutionsForCurrentClass[t]) += cc * probaDAG.getProbaAtNode(fatid);
270  }
271  }
272 
273  // sum for all rate classes, with class ponderation
274  for (size_t t = 0; t < nbTypes; ++t)
275  {
276  substitutionsForCurrentNode[t] += substitutionsForCurrentClass[t] * pr;
277  }
278  }
279 
280  // Now we just have to copy the substitutions into the result vector:
281 
282  for (size_t i = 0; i < nbDistinctSites; ++i)
283  {
284  for (size_t t = 0; t < nbTypes; ++t)
285  {
286  double x = convert(substitutionsForCurrentNode[t](Eigen::Index(i)));
287  if (std::isnan(x) || std::isinf(x))
288  {
289  if (verbose)
290  ApplicationTools::displayWarning("On branch " + TextTools::toString(speciesId) + ", site index " + TextTools::toString(i) + ", and type " + TextTools::toString(t) + ", counts could not be computed.");
291  (*br)(i, t) = 0;
292  }
293  else
294  {
295  if (threshold >= 0 && x > threshold)
296  {
297  if (verbose)
298  ApplicationTools::displayWarning("On branch " + TextTools::toString(speciesId) + ", site index" + TextTools::toString(i) + ", and type " + TextTools::toString(t) + " count has been ignored because it is presumably saturated.");
299  (*br)(i, t) = 0;
300  }
301  else
302  (*br)(i, t) = x;
303  }
304  }
305  }
306  } // end of loop on branches
307 
308  if (verbose)
309  {
313  }
314 
315  return substitutions;
316 }
317 
318 /**************************************************************************************************/
319 
320 unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizations(
322  const vector<uint>& edgeIds,
323  std::shared_ptr<const BranchedModelSet> nullModels,
324  std::shared_ptr<const SubstitutionRegisterInterface> reg,
325  std::shared_ptr<const AlphabetIndex2> distances,
326  short unresolvedOption,
327  bool verbose)
328 {
329  // Preamble:
330  if (!rltc.isInitialized())
331  throw Exception("SubstitutionMappingTools::computeNormalizations(). Likelihood object is not initialized.");
332 
333  auto& sp = rltc.substitutionProcess();
334 
335  if (edgeIds.size() == 0)
336  return make_unique<ProbabilisticSubstitutionMapping>(
337  sp.parametrizablePhyloTree(),
338  reg->getNumberOfSubstitutionTypes(),
339  rltc.getRootArrayPositions(),
340  rltc.getNumberOfDistinctSites());
341 
342  auto processTree = rltc.getTreeNode(0);
343 
344 
345  /* First, set substitution rewards */
346 
347  // Map from models to type-vector of rewards
348  std::map<const SubstitutionModelInterface*, std::vector<std::shared_ptr<DecompositionReward>>> mModRewards;
349 
350  const auto& stateMap = sp.stateMap();
351 
352  size_t nbTypes = reg->getNumberOfSubstitutionTypes();
353 
354  size_t nbStates = stateMap.getNumberOfModelStates();
355  vector<int> supportedStates = stateMap.getAlphabetStates();
356 
357  vector<shared_ptr<UserAlphabetIndex1>> vusai(nbTypes);
358  for (auto& usai : vusai)
359  {
360  usai.reset(new UserAlphabetIndex1(stateMap.getAlphabet()));
361  }
362 
363  for (auto speciesId : edgeIds)
364  {
365  const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
366 
367  // look for matching null model
368  auto nullmodel = nullModels->getModelForBranch(speciesId);
369 
370  for (auto id : dagIndexes)
371  {
372  const auto& edge = processTree->getEdge(id);
373  if (edge->getBrLen()) // if edge with model on it
374  {
375  auto model = edge->getModel();
376 
377  auto nMod = edge->getNMod();
378 
379  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
380 
381  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
382  if (nMod == 0)
383  {
384  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
385 
386  if (!sm)
387  throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
388  }
389  else
390  {
391  size_t nmod = nMod->targetValue();
392 
393  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
394  if (!ttm)
395  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
396 
397  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
398 
399  if (!sm)
400  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
401  }
402 
403  // Sets vector of reward counts (depend on nullmodel)
404  if (mModRewards.find(sm.get()) == mModRewards.end())
405  {
406  // Look for matching substitution nullmodel
407  shared_ptr<const SubstitutionModelInterface> nullsm = nullptr;
408 
409  if (nMod == 0)
410  {
411  nullsm = dynamic_pointer_cast<const SubstitutionModelInterface>(nullmodel);
412 
413  if (!nullsm)
414  throw Exception("SubstitutionMappingTools::computeNormalizations : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + "for null model " + nullmodel->getName());
415  }
416  else
417  {
418  size_t nmod = nMod->targetValue();
419 
420  auto nullttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(nullmodel);
421  if (!nullttm)
422  throw Exception("SubstitutionMappingTools::computeNormalizations : Expecting Mixed model in branch " + TextTools::toString(speciesId) + " for null model " + nullttm->getName());
423 
424  nullsm = dynamic_pointer_cast<const SubstitutionModelInterface>(nullttm->getNModel(nmod));
425 
426  if (!nullsm)
427  throw Exception("SubstitutionMappingTools::computeNormalizations : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of null mixed model " + nullmodel->getName() + " in branch " + TextTools::toString(speciesId));
428  }
429 
430  for (auto& usai : vusai)
431  {
432  for (size_t i = 0; i < nbStates; ++i)
433  {
434  usai->setIndex(supportedStates[i], 0);
435  }
436  }
437 
438  for (size_t i = 0; i < nbStates; ++i)
439  {
440  for (size_t j = 0; j < nbStates; ++j)
441  {
442  if (i != j)
443  {
444  size_t nbt = reg->getType(i, j);
445  if (nbt != 0)
446  vusai[nbt - 1]->setIndex(supportedStates[i], vusai[nbt - 1]->getIndex(supportedStates[i]) + nullsm->Qij(i, j) * (distances ? distances->getIndex(supportedStates[i], supportedStates[j]) : 1));
447  }
448  }
449  }
450 
451  mModRewards[sm.get()] = vector<std::shared_ptr<DecompositionReward>>(nbTypes);
452  auto& mMdodsm = mModRewards[sm.get()];
453  for (size_t t = 0; t < nbTypes; ++t)
454  {
455  mMdodsm[t] = make_shared<DecompositionReward>(nullsm, vusai[t]);
456  }
457  }
458  }
459  }
460  }
461 
464 
465  // A few variables we'll need:
466 
467  size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
468  size_t nbNodes = edgeIds.size();
469  size_t nbClasses = sp.getNumberOfClasses();
470 
471  const auto& rootPatternLinks = rltc.getRootArrayPositions();
472 
473  // We create a Mapping objects
474 
475  unique_ptr<ProbabilisticSubstitutionMapping> normalizations(new ProbabilisticSubstitutionMapping(*sp.getParametrizablePhyloTree(), nbTypes, rootPatternLinks, nbDistinctSites));
476 
477  // Compute the reward for each class and each branch in the tree:
478 
479  // Get the DAG of probabilities of the edges
480  ProbabilityDAG probaDAG(rltc.getForwardLikelihoodTree(0));
481 
482  if (verbose)
483  ApplicationTools::displayTask("Compute rewards", true);
484 
485  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = normalizations->allEdgesIterator();
486 
487  Eigen::MatrixXd rpxy;
488 
489  size_t nn = 0;
490  for ( ; !brIt->end(); brIt->next())
491  {
492  if (verbose)
493  ApplicationTools::displayGauge(nn++, nbNodes - 1);
494 
495  shared_ptr<PhyloBranchMapping> br = **brIt;
496 
497  // For each branch
498  uint speciesId = normalizations->getEdgeIndex(br);
499 
500  if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
501  continue;
502 
503  vector<RowLik> rewardsForCurrentNode(nbTypes);
504  for (auto& sub:rewardsForCurrentNode)
505  {
506  sub = RowLik::Zero((int)nbDistinctSites);
507  }
508 
509  for (size_t ncl = 0; ncl < nbClasses; ncl++)
510  {
511  processTree = rltc.getTreeNode(ncl);
512  double pr = sp.getProbabilityForModel(ncl);
513 
514  vector<RowLik> rewardsForCurrentClass(nbTypes);
515  for (auto& sub:rewardsForCurrentClass)
516  {
517  sub = RowLik::Zero((int)nbDistinctSites);
518  }
519 
520  const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
521 
522  // Sum on all dag edges for this speciesId
523  for (auto id:dagIndexes)
524  {
525  auto edge = processTree->getEdge(id);
526 
527  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->getModel()->targetValue());
528 
529  auto nMod = edge->getNMod();
530 
531  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
532 
533  if (nMod == 0)
534  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
535  else
536  {
537  size_t nmod = nMod->targetValue();
538 
539  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
540  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
541  }
542 
543  // Rewards for this model
544  auto subReward = mModRewards[sm.get()];
545 
546  const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
547 
548  auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
549  auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
550 
551  const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
552 
553  const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->accessValueConst();
554 
555  const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
556 
557  for (size_t t = 0; t < nbTypes; ++t)
558  {
559  // compute all rxy * pxy first:
560  subReward[t]->storeAllRewards(edge->getBrLen()->getValue(), rpxy);
561 
562  rpxy.array() *= pxy.array();
563 
564  // Now loop over sites:
565 
566  auto rew = rpxy * likelihoodsBotEdge;
567 
568  auto bb = (cwise(likelihoodsTopEdge) * cwise(rew)).colwise().sum();
569 
570  // Nullify counts where sum likelihoods > 1 : ie unknown
571  Eigen::VectorXd ff(likelihoodsBotEdge.cols());
572 
573  switch (unresolvedOption)
574  {
577 
578  // Nullify counts where sum likelihoods > 1 : ie unknown
579  for (auto i = 0; i < ff.size(); i++)
580  {
581  const DataLik s = likelihoodsBotEdge.col(i).sum();
582  if (s >= 2.)
583  ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
584  else
585  ff[i] = 1;
586  }
587 
588  bb *= ff.array();
589  default:
590  ;
591  }
592 
593  // Normalizes by likelihood on this node
594  auto cc = bb / cwise(likelihoodsFather);
595 
596  // adds, with branch ponderation ( * edge / edge * father) probs
597  cwise(rewardsForCurrentClass[t]) += cc * probaDAG.getProbaAtNode(fatid);
598  }
599  }
600 
601  for (size_t t = 0; t < nbTypes; ++t)
602  {
603  rewardsForCurrentNode[t] += rewardsForCurrentClass[t] * pr;
604  }
605  }
606 
607  // Now we just have to copy the substitutions into the result vector:
608  for (size_t i = 0; i < nbDistinctSites; ++i)
609  {
610  for (size_t t = 0; t < nbTypes; ++t)
611  {
612  (*br)(i, t) = convert(rewardsForCurrentNode[t](Eigen::Index(i)));
613  }
614  }
615  } // end of loop on branches
616 
617  if (verbose)
618  {
622  }
623 
624  return normalizations;
625 }
626 
627 
628 /************************************************************/
629 
630 unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizedCounts(
632  const vector<uint>& edgeIds,
633  std::shared_ptr<const BranchedModelSet> nullModels,
634  std::shared_ptr<const SubstitutionRegisterInterface> reg,
635  std::shared_ptr<const AlphabetIndex2> weights,
636  std::shared_ptr<const AlphabetIndex2> distances,
637  bool perTimeUnit,
638  uint siteSize,
639  short unresolvedOption,
640  double threshold,
641  bool verbose)
642 {
643  unique_ptr<ProbabilisticSubstitutionMapping> counts(computeCounts(rltc, edgeIds, reg, weights, distances, unresolvedOption, threshold, verbose));
644  unique_ptr<ProbabilisticSubstitutionMapping> factors(computeNormalizations(rltc, edgeIds, nullModels, reg, distances, unresolvedOption, verbose));
645  return computeNormalizedCounts(*counts, *factors, edgeIds, perTimeUnit, siteSize);
646 }
647 
648 /************************************************************/
649 
650 unique_ptr<ProbabilisticSubstitutionMapping> SubstitutionMappingTools::computeNormalizedCounts(
651  const ProbabilisticSubstitutionMapping& counts,
652  const ProbabilisticSubstitutionMapping& factors,
653  const vector<uint>& edgeIds,
654  bool perTimeUnit,
655  uint siteSize)
656 {
657  unique_ptr<ProbabilisticSubstitutionMapping> normCounts(counts.clone());
658 
659  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
660  size_t nbDistinctSites = counts.getNumberOfDistinctSites();
661 
662  // Iterate on branches
663 
664  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = normCounts->allEdgesIterator();
665 
666  for ( ; !brIt->end(); brIt->next())
667  {
668  shared_ptr<PhyloBranchMapping> brNormCount = **brIt;
669 
670  VVdouble& brnCou = brNormCount->getCounts();
671 
672  // For each branch
673  uint edid = normCounts->getEdgeIndex(brNormCount);
674 
675  if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)edid))
676  {
677  for (auto& brk : brnCou)
678  {
679  VectorTools::fill(brk, 0.);
680  }
681  continue;
682  }
683 
684  shared_ptr<PhyloBranchMapping> brFactor = factors.getEdge(edid);
685  shared_ptr<PhyloBranchMapping> brCount = counts.getEdge(edid);
686 
687  const VVdouble& cou = brCount->getCounts();
688  const VVdouble& fac = brFactor->getCounts();
689 
690 
691  // if not per time, multiply by the lengths of the branches of
692  // the input tree
693 
694  double slg = (!perTimeUnit ? brCount->getLength() : 1) / siteSize;
695 
696  for (size_t k = 0; k < nbDistinctSites; ++k)
697  {
698  Vdouble& ncou_k = brnCou[k];
699 
700  const Vdouble& cou_k = cou[k];
701  const Vdouble& fac_k = fac[k];
702 
703  for (size_t t = 0; t < nbTypes; ++t)
704  {
705  ncou_k[t] = (fac_k[t] != 0 ? cou_k[t] / fac_k[t] * slg : 0);
706  }
707  }
708  }
709 
710  return normCounts;
711 }
712 
713 /*******************************************************************************/
714 /* Get trees of counts */
715 /*******************************************************************************/
716 
718  const ProbabilisticSubstitutionMapping& counts,
719  size_t type)
720 {
721  auto pt = make_unique<PhyloTree>(counts);
722  size_t nbSites = counts.getNumberOfSites();
723 
724  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
725 
726  for ( ; !brIt->end(); brIt->next())
727  {
728  shared_ptr<PhyloBranchMapping> brm = (**brIt);
729  double x = 0;
730 
731  for (size_t i = 0; i < nbSites; ++i)
732  {
733  x += brm->getSiteTypeCount(counts.getSiteIndex(i), type);
734  }
735 
736  pt->getEdge(counts.getEdgeIndex(brm))->setLength(x);
737  }
738 
739  return pt;
740 }
741 
742 /*******************************************************************************/
743 
745  const ProbabilisticSubstitutionMapping& counts,
746  const ProbabilisticSubstitutionMapping& factors,
747  size_t type)
748 {
749  auto pt = make_unique<PhyloTree>(counts);
750  size_t nbSites = counts.getNumberOfSites();
751 
752  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
753 
754  for ( ; !brIt->end(); brIt->next())
755  {
756  shared_ptr<PhyloBranchMapping> brm = (**brIt);
757  shared_ptr<PhyloBranchMapping> brf = factors.getEdge(counts.getEdgeIndex(brm));
758 
759  double x = 0, f = 0;
760 
761  for (size_t i = 0; i < nbSites; ++i)
762  {
763  x += brm->getSiteTypeCount(counts.getSiteIndex(i), type);
764  f += brf->getSiteTypeCount(counts.getSiteIndex(i), type);
765  }
766 
767  pt->getEdge(counts.getEdgeIndex(brm))->setLength(x / f);
768  }
769 
770  return pt;
771 }
772 
773 
774 /********************************************************************************/
775 /* Get vectors of counts */
776 /********************************************************************************/
777 
779  const ProbabilisticSubstitutionMapping& counts,
780  const vector<uint>& ids)
781 {
782  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
783  size_t nbBr = idc.size();
784 
785  size_t nbSites = counts.getNumberOfSites();
786 
787  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
788 
789  VVVdouble result;
790  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
791 
792  for (size_t j = 0; j < nbSites; ++j)
793  {
794  VVdouble& resS = result[j];
795  size_t siteIndex = counts.getSiteIndex(j);
796 
797  for (size_t k = 0; k < nbBr; ++k)
798  {
799  Vdouble& resSB = resS[k];
800 
801  for (size_t i = 0; i < nbTypes; ++i)
802  {
803  resSB[i] = counts(idc[k], siteIndex, i);
804  }
805  }
806  }
807 
808  return result;
809 }
810 
811 
812 /**************************************************************************************************/
813 
815  const ProbabilisticSubstitutionMapping& counts,
816  size_t site)
817 {
818  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
819  size_t siteIndex = counts.getSiteIndex(site);
820 
821  Vdouble v(counts.getNumberOfBranches(), 0);
822  for ( ; !brIt->end(); brIt->next())
823  {
824  v[counts.getEdgeIndex(**brIt)] = VectorTools::sum((***brIt).getSiteCount(siteIndex));
825  }
826 
827  return v;
828 }
829 
831  const ProbabilisticSubstitutionMapping& counts,
832  const ProbabilisticSubstitutionMapping& factors,
833  size_t site)
834 {
835  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
836  size_t siteIndex = counts.getSiteIndex(site);
837 
838  Vdouble v(counts.getNumberOfBranches(), 0);
839  for ( ; !brIt->end(); brIt->next())
840  {
841  shared_ptr<PhyloBranchMapping> brm = (**brIt);
842 
843  uint edid = counts.getEdgeIndex(brm);
844  shared_ptr<PhyloBranchMapping> brf = factors.getEdge(edid);
845 
846 
847  v[edid] = VectorTools::sum(brm->getSiteCount(siteIndex)) / VectorTools::sum(brf->getSiteCount(siteIndex));
848  }
849 
850  return v;
851 }
852 
853 /**************************************************************************************************/
854 
856  const ProbabilisticSubstitutionMapping& counts,
857  const vector<uint>& ids)
858 {
859  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
860  size_t nbBr = idc.size();
861 
862  size_t nbSites = counts.getNumberOfSites();
863 
864  VVdouble result;
865  VectorTools::resize2(result, nbSites, nbBr);
866 
867  for (size_t k = 0; k < nbSites; ++k)
868  {
869  vector<double> countsf(SubstitutionMappingTools::getCountsForSitePerBranch(counts, k));
870  Vdouble* resS = &result[k];
871 
872  for (size_t i = 0; i < nbBr; ++i)
873  {
874  (*resS)[i] = countsf[idc[i]];
875  }
876  }
877  return result;
878 }
879 
881  const ProbabilisticSubstitutionMapping& counts,
882  const ProbabilisticSubstitutionMapping& factors,
883  const vector<uint>& ids)
884 {
885  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
886  size_t nbBr = idc.size();
887 
888  size_t nbSites = counts.getNumberOfSites();
889 
890  VVdouble result;
891  VectorTools::resize2(result, nbSites, nbBr);
892 
893  for (size_t k = 0; k < nbSites; ++k)
894  {
895  vector<double> countsf(SubstitutionMappingTools::getCountsForSitePerBranch(counts, factors, k));
896  Vdouble* resS = &result[k];
897 
898  for (size_t i = 0; i < nbBr; ++i)
899  {
900  (*resS)[i] = countsf[idc[i]];
901  }
902  }
903  return result;
904 }
905 
906 /**************************************************************************************************/
907 
909  const ProbabilisticSubstitutionMapping& counts,
910  uint branchId)
911 {
912  size_t nbSites = counts.getNumberOfSites();
913  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
914  Vdouble v(nbTypes, 0);
915  shared_ptr<PhyloBranchMapping> br = counts.getEdge(branchId);
916 
917  for (size_t i = 0; i < nbSites; ++i)
918  {
919  for (size_t t = 0; t < nbTypes; ++t)
920  {
921  v[t] += br->getSiteTypeCount(counts.getSiteIndex(i), t);
922  }
923  }
924 
925  return v;
926 }
927 
928 /**************************************************************************************************/
929 
931  const ProbabilisticSubstitutionMapping& counts,
932  const vector<uint>& ids)
933 {
934  VVdouble result;
935  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
936 
937  for (auto id : idc)
938  {
939  result.push_back(SubstitutionMappingTools::getCountsForBranchPerType(counts, id));
940  }
941 
942  return result;
943 }
944 
945 /**************************************************************************************************/
946 
948  const ProbabilisticSubstitutionMapping& counts,
949  const vector<uint>& ids)
950 {
951  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
952  size_t nbBr = idc.size();
953 
954  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
955 
956  VVdouble result;
957  VectorTools::resize2(result, nbTypes, nbBr);
958 
959  for (size_t i = 0; i < idc.size(); i++)
960  {
962  for (size_t nbt = 0; nbt < nbTypes; nbt++)
963  {
964  result[nbt][i] = cou[nbt];
965  }
966  }
967 
968  return result;
969 }
970 
971 
972 /************************************************************/
973 
974 
977  const vector<uint>& ids,
978  std::shared_ptr<const SubstitutionRegisterInterface> reg,
979  std::shared_ptr<const AlphabetIndex2> weights,
980  std::shared_ptr<const AlphabetIndex2> distances,
981  short unresolvedOption,
982  double threshold,
983  bool verbose)
984 {
985  auto psm = computeCounts(rltc, ids, reg, weights, distances, unresolvedOption, threshold, verbose);
986 
987  VVdouble result = getCountsPerTypePerBranch(*psm, ids);
988 
989  auto creg = dynamic_pointer_cast<const CategorySubstitutionRegister>(reg);
990 
991  if (creg && !creg->isStationary())
992  {
993  size_t nbTypes = result[0].size();
994 
995  for (size_t k = 0; k < ids.size(); ++k)
996  {
997  vector<double> freqs(0); // = rltc.getPosteriorStateFrequencies(ids[k]);
998  // Compute frequencies for types:
999 
1000  vector<double> freqsTypes(creg->getNumberOfCategories());
1001  for (size_t i = 0; i < freqs.size(); ++i)
1002  {
1003  size_t c = creg->getCategory(i);
1004  freqsTypes[c - 1] += freqs[i];
1005  }
1006 
1007  // We divide the counts by the frequencies and rescale:
1008 
1009  double s = VectorTools::sum(result[k]);
1010  for (size_t t = 0; t < nbTypes; ++t)
1011  {
1012  result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1013  }
1014 
1015  double s2 = VectorTools::sum(result[k]);
1016  // Scale:
1017 
1018  result[k] = (result[k] / s2) * s;
1019  }
1020  }
1021  return result;
1022 }
1023 
1024 
1025 /**************************************************************************************************/
1026 
1028 {
1029  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1030 
1031  size_t siteIndex = counts.getSiteIndex(site);
1032 
1033  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1034  Vdouble v(nbTypes, 0);
1035 
1036  for (auto id : idc)
1037  {
1038  shared_ptr<PhyloBranchMapping> br = counts.getEdge(id);
1039  for (size_t t = 0; t < nbTypes; ++t)
1040  {
1041  v[t] += (*br)(siteIndex, t);
1042  }
1043  }
1044 
1045  return v;
1046 }
1047 
1048 /**************************************************************************************************/
1049 
1051  const ProbabilisticSubstitutionMapping& counts,
1052  const ProbabilisticSubstitutionMapping& factors,
1053  size_t site,
1054  bool perTimeUnit,
1055  uint siteSize)
1056 {
1057  size_t siteIndex = counts.getSiteIndex(site);
1058  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1059 
1060  Vdouble v(nbTypes, 0);
1061  Vdouble n(nbTypes, 0);
1062 
1063  double lg = (!perTimeUnit ? 0 : 1);
1064  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
1065  for ( ; !brIt->end(); brIt->next())
1066  {
1067  for (size_t t = 0; t < nbTypes; ++t)
1068  {
1069  v[t] += (***brIt)(siteIndex, t);
1070  }
1071  if (!perTimeUnit)
1072  lg += (**brIt)->getLength();
1073  }
1074 
1075  double slg = lg / siteSize;
1076 
1077  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> nrIt = factors.allEdgesIterator();
1078  for ( ; !nrIt->end(); nrIt->next())
1079  {
1080  for (size_t t = 0; t < nbTypes; ++t)
1081  {
1082  n[t] += (***nrIt)(siteIndex, t);
1083  }
1084  }
1085 
1086  for (size_t t = 0; t < nbTypes; ++t)
1087  {
1088  v[t] = v[t] / n[t] * slg;
1089  }
1090 
1091  return v;
1092 }
1093 
1094 /**************************************************************************************************/
1095 
1097  const ProbabilisticSubstitutionMapping& counts,
1098  const ProbabilisticSubstitutionMapping& factors,
1099  size_t site,
1100  const vector<uint>& ids,
1101  bool perTimeUnit,
1102  uint siteSize)
1103 {
1104  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1105 
1106  size_t siteIndex = counts.getSiteIndex(site);
1107  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1108 
1109  Vdouble v(nbTypes, 0);
1110  Vdouble n(nbTypes, 0);
1111 
1112  double lg = (!perTimeUnit ? 0 : 1);
1113  for (auto id : idc)
1114  {
1115  shared_ptr<PhyloBranchMapping> br = counts.getEdge(id);
1116  for (size_t t = 0; t < nbTypes; ++t)
1117  {
1118  v[t] += (*br)(siteIndex, t);
1119  }
1120  if (!perTimeUnit)
1121  lg += br->getLength();
1122  }
1123 
1124  double slg = lg / siteSize;
1125 
1126  for (auto id : idc)
1127  {
1128  shared_ptr<PhyloBranchMapping> br = factors.getEdge(id);
1129  for (size_t t = 0; t < nbTypes; ++t)
1130  {
1131  n[t] += (*br)(siteIndex, t);
1132  }
1133  }
1134 
1135  for (size_t t = 0; t < nbTypes; ++t)
1136  {
1137  v[t] = v[t] / n[t] * slg;
1138  }
1139 
1140  return v;
1141 }
1142 
1143 /**************************************************************************************************/
1144 
1146  const ProbabilisticSubstitutionMapping& counts,
1147  const ProbabilisticSubstitutionMapping& factors,
1148  bool perTimeUnit,
1149  uint siteSize)
1150 {
1151  size_t nbSites = counts.getNumberOfSites();
1152  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1153  VVdouble result;
1154  VectorTools::resize2(result, nbSites, nbTypes);
1155 
1156  for (size_t k = 0; k < nbSites; ++k)
1157  {
1158  result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, factors, k, perTimeUnit, siteSize);
1159  }
1160 
1161  return result;
1162 }
1163 
1164 /**************************************************************************************************/
1165 
1167 {
1168  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1169 
1170  size_t nbSites = counts.getNumberOfSites();
1171  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1172  VVdouble result;
1173  VectorTools::resize2(result, nbSites, nbTypes);
1174 
1175  for (size_t k = 0; k < nbSites; ++k)
1176  {
1177  result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, k, idc);
1178  }
1179 
1180  return result;
1181 }
1182 
1183 /**************************************************************************************************/
1184 
1186  const ProbabilisticSubstitutionMapping& counts,
1187  const ProbabilisticSubstitutionMapping& factors,
1188  const vector<uint>& ids,
1189  bool perTimeUnit,
1190  uint siteSize)
1191 {
1192  const Vuint idc(ids.size() == 0 ? counts.getAllEdgesIndexes() : ids);
1193 
1194  size_t nbSites = counts.getNumberOfSites();
1195  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1196  VVdouble result;
1197  VectorTools::resize2(result, nbSites, nbTypes);
1198 
1199  for (size_t k = 0; k < nbSites; ++k)
1200  {
1201  result[k] = SubstitutionMappingTools::getCountsForSitePerType(counts, factors, k, idc, perTimeUnit, siteSize);
1202  }
1203 
1204  return result;
1205 }
1206 
1207 /**************************************************************************************************/
1208 
1210 {
1211  size_t siteIndex = counts.getSiteIndex(site);
1212  double sumSquare = 0;
1213 
1214  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = counts.allEdgesIterator();
1215 
1216  size_t nbTypes = counts.getNumberOfSubstitutionTypes();
1217  for ( ; !brIt->end(); brIt->next())
1218  {
1219  double sum = 0;
1220  for (size_t t = 0; t < nbTypes; ++t)
1221  {
1222  sum += (***brIt)(siteIndex, t);
1223  }
1224 
1225  sumSquare += sum * sum;
1226  }
1227  return sqrt(sumSquare);
1228 }
1229 
1230 
1231 /******************************************************/
1232 /* OUTPUT */
1233 /******************************************************/
1234 
1236  const string& filename,
1237  const vector<uint>& ids,
1238  const AlignmentDataInterface& sites,
1239  const VVdouble& counts)
1240 {
1241  size_t nbSites = counts.size();
1242  if (nbSites == 0)
1243  return;
1244  size_t nbBr = counts[0].size();
1245 
1246  ofstream file;
1247  file.precision(10);
1248  file.open(filename.c_str());
1249 
1250  file << "[Site]";
1251  for (size_t i = 0; i < nbBr; ++i)
1252  {
1253  file << "\t" << ids[i];
1254  }
1255  file << endl;
1256 
1257  for (size_t k = 0; k < nbSites; ++k)
1258  {
1259  const Vdouble& countS = counts[k];
1260  file << "[" << sites.site(k).getCoordinate() << "]";
1261  for (size_t i = 0; i < nbBr; ++i)
1262  {
1263  file << "\t" << countS[i];
1264  }
1265  file << endl;
1266  }
1267  file.close();
1268 }
1269 
1270 
1271 /**************************************************************************************************/
1272 
1274  const string& filename,
1275  const SubstitutionRegisterInterface& reg,
1276  const AlignmentDataInterface& sites,
1277  const VVdouble& counts)
1278 {
1279  size_t nbSites = counts.size();
1280  if (nbSites == 0)
1281  return;
1282  size_t nbTypes = counts[0].size();
1283 
1284  ofstream file;
1285  file.precision(10);
1286  file.open(filename.c_str());
1287 
1288  file << "[Site]";
1289  for (size_t i = 0; i < nbTypes; ++i)
1290  {
1291  file << "\t" << reg.getTypeName(i + 1);
1292  }
1293  file << endl;
1294 
1295  for (size_t k = 0; k < nbSites; ++k)
1296  {
1297  file << "[" << sites.site(k).getCoordinate() << "]";
1298  const Vdouble& resS = counts[k];
1299  for (size_t i = 0; i < nbTypes; ++i)
1300  {
1301  file << "\t" << resS[i];
1302  }
1303  file << endl;
1304  }
1305  file.close();
1306 }
1307 
1308 /**************************************************************************************************/
1309 
1311  const string& filename,
1312  const SubstitutionRegisterInterface& reg,
1313  const AlignmentDataInterface& sites,
1314  const VVdouble& counts)
1315 {
1316  size_t nbSites = counts.size();
1317  if (nbSites == 0)
1318  return;
1319 
1320  size_t nbTypes = counts[0].size();
1321 
1322  ofstream file;
1323  file.precision(10);
1324  file.open(filename.c_str());
1325 
1326  for (size_t i = 0; i < nbTypes; ++i)
1327  {
1328  double sumtype = 0;
1329  for (size_t k = 0; k < nbSites; ++k)
1330  {
1331  sumtype += counts[k][i];
1332  }
1333 
1334  file << reg.getTypeName(i + 1) << "\t" << sumtype << endl;
1335  }
1336  file.close();
1337 }
1338 
1339 
1340 /**************************************************************************************************/
1341 
1343  const string& filenamePrefix,
1344  const vector<uint>& ids,
1345  const SubstitutionRegisterInterface& reg,
1346  const AlignmentDataInterface& sites,
1347  const VVVdouble& counts)
1348 {
1349  size_t nbSites = counts.size();
1350  if (nbSites == 0)
1351  return;
1352  size_t nbBr = counts[0].size();
1353  if (nbBr == 0)
1354  return;
1355  size_t nbTypes = counts[0][0].size();
1356 
1357  ofstream file;
1358  file.precision(10);
1359 
1360  for (size_t i = 0; i < nbTypes; ++i)
1361  {
1362  string name = reg.getTypeName(i + 1);
1363  if (name == "")
1364  name = TextTools::toString(i + 1);
1365 
1366  string path = filenamePrefix + name + string(".count");
1367 
1368  ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
1369  file.open(path.c_str());
1370 
1371  file << "[Site]";
1372  for (size_t k = 0; k < nbBr; ++k)
1373  {
1374  file << "\t" << ids[k];
1375  }
1376  file << endl;
1377 
1378  for (size_t j = 0; j < nbSites; ++j)
1379  {
1380  const VVdouble& resS = counts[j];
1381 
1382  file << "[" << sites.site(j).getCoordinate() << "]";
1383 
1384  for (size_t k = 0; k < nbBr; ++k)
1385  {
1386  file << "\t" << resS[k][i];
1387  }
1388  file << endl;
1389  }
1390  file.close();
1391  }
1392 }
1393 
1394 
1395 /**************************************************************************************************/
1396 
1398  const ProbabilisticSubstitutionMapping& substitutions,
1399  const AlignmentDataInterface& sites,
1400  size_t type,
1401  ostream& out)
1402 {
1403  if (!out)
1404  throw IOException("SubstitutionMappingTools::writeToStream. Can't write to stream.");
1405  out << "Branches";
1406  out << "\tMean";
1407  for (size_t i = 0; i < substitutions.getNumberOfSites(); ++i)
1408  {
1409  out << "\tSite" << sites.site(i).getCoordinate();
1410  }
1411  out << endl;
1412 
1413  unique_ptr<ProbabilisticSubstitutionMapping::mapTree::EdgeIterator> brIt = substitutions.allEdgesIterator();
1414 
1415  for ( ; !brIt->end(); brIt->next())
1416  {
1417  const shared_ptr<PhyloBranchMapping> br = **brIt;
1418 
1419  out << substitutions.getEdgeIndex(br) << "\t" << br->getLength();
1420 
1421  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1422  {
1423  out << "\t" << (*br)(substitutions.getSiteIndex(i), type);
1424  }
1425 
1426  out << endl;
1427  }
1428 }
1429 
1430 /**************************************************************************************************/
1431 
1433 {
1434  try
1435  {
1436  auto data = DataTable::read(in, "\t", true, -1);
1437  vector<string> ids = data->getColumn(0);
1438  data->deleteColumn(0); // Remove ids
1439  data->deleteColumn(0); // Remove means
1440  // Now parse the table:
1441  size_t nbSites = data->getNumberOfColumns();
1442  substitutions.setNumberOfSites(nbSites);
1443 
1444  auto brIt = substitutions.allEdgesIterator();
1445 
1446  for ( ; !brIt->end(); brIt->next())
1447  {
1448  const shared_ptr<PhyloBranchMapping> br = **brIt;
1449  uint brid = substitutions.getEdgeIndex(br);
1450  for (size_t j = 0; j < nbSites; j++)
1451  {
1452  (*br)(j, type) = TextTools::toDouble((*data)(brid, j));
1453  }
1454  }
1455 
1456  // Parse the header:
1457  for (size_t i = 0; i < nbSites; ++i)
1458  {
1459  string siteTxt = data->getColumnName(i);
1460  int site = 0;
1461  if (siteTxt.substr(0, 4) == "Site")
1462  site = TextTools::to<int>(siteTxt.substr(4));
1463  else
1464  site = TextTools::to<int>(siteTxt);
1465  substitutions.setSitePosition(i, site);
1466  }
1467  }
1468  catch (Exception& e)
1469  {
1470  throw IOException(string("Bad input file. ") + e.what());
1471  }
1472 }
void setSitePosition(size_t index, int position)
Set the position of a given site.
Definition: Mapping.h:103
size_t getNumberOfSites() const
Definition: Mapping.h:111
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayWarning(const std::string &text)
static void displayResult(const std::string &text, const T &result)
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
virtual std::unique_ptr< EdgeIterator > allEdgesIterator()=0
virtual EdgeIndex getEdgeIndex(const std::shared_ptr< E > edgeObject) const=0
virtual std::shared_ptr< E > getEdge(EdgeIndex edgeIndex) const=0
virtual int getCoordinate() const=0
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
Analytical substitution count using the eigen decomposition method.
const char * what() const noexcept override
static Self Zero(Eigen::Index rows, Eigen::Index cols)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
virtual bool isInitialized() const
Data storage class for probabilistic substitution mappings.
ProbabilisticSubstitutionMapping * clone() const override
void setNumberOfSites(size_t numberOfSites) override
double getProbaAtNode(PhyloTree::NodeIndex nodeId)
The SubstitutionsCount interface.
virtual SubstitutionCountInterface * clone() const =0
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
static Vdouble getCountsForSitePerType(const ProbabilisticSubstitutionMapping &counts, size_t site, const std::vector< uint > &ids=Vuint(0))
Methods that sum on branches need raw mapping trees, or raw mapping trees and normalization trees (ie...
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions std::vectors from a stream.
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
static std::unique_ptr< PhyloTree > getTreeForType(const ProbabilisticSubstitutionMapping &counts, size_t type)
Methods to get trees of counts. These methods can use raw or normalized counting trees (ie computed w...
static VVdouble getCountsPerSitePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, SubstitutionCountInterface &substitutionCount, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Methods to compute mapping Trees.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Site Per Type in SGED format.
static Vdouble getCountsForSitePerBranch(const ProbabilisticSubstitutionMapping &counts, size_t site)
Sum all type of substitutions for each branch of a given position.
static VVdouble computeCountsPerTypePerBranch(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &ids, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
static VVVdouble getCountsPerSitePerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Methods to get std::vectors of counts.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const AlignmentDataInterface &sites, size_t type, std::ostream &out)
Write the substitutions std::vectors to a stream.
static Vdouble getCountsForBranchPerType(const ProbabilisticSubstitutionMapping &counts, uint branchId)
Sum all sites substitutions for each type of a given branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitution counts tree, normalized by the models of "null" process on each branch,...
static VVdouble getCountsPerTypePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
static void outputPerSitePerBranch(const std::string &filename, const std::vector< uint > &ids, const AlignmentDataInterface &sites, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< uint > &ids, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVVdouble &counts)
Output Per Site Per Branch Per Type, one SGED file per type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static VVdouble getCountsPerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all sites substitutions for each branch for each type.
static VVdouble getCountsPerSitePerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each type.
static double getNormForSite(const ProbabilisticSubstitutionMapping &counts, size_t site)
Compute the norm of a substitution std::vector for a given position.
static void outputPerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Type.
The SubstitutionRegister interface.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
static T sum(const std::vector< T > &v1)
static void resize3(VVVdouble &vvv, size_t n1, size_t n2, size_t n3)
static bool contains(const std::vector< T > &vec, T el)
static void resize2(VVdouble &vv, size_t n1, size_t n2)
static void fill(std::vector< T > &v, T value)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
bool isinf(const double &d)
T & cwise(T &t)
Definition: DataFlowCWise.h:29
R convert(const F &from, const Dimension< R > &)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint