bpp-phyl3  3.0.0
GivenDataSubstitutionProcessSiteSimulator.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 #include <algorithm>
8 
9 #include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
11 
12 // From SeqLib:
14 
15 using namespace bpp;
16 using namespace std;
17 using namespace numeric;
18 
19 /******************************************************************************/
20 
22 {
23  calcul_->makeLikelihoods();
24 
25  // Initialize sons & fathers of tree_ Nodes
26  // set sequence names
27 
28  outputInternalSites(outputInternalSites_);
29 
30  // Set up cumsum rates
31 
32  const auto& dRate = process_->getRateDistribution();
33 
34  std::vector<DataLik> qR;
35 
36  for (size_t i = 0; i < nbClasses_; i++)
37  {
38  qR.push_back(calcul_->getSiteLikelihoodsForAClass(i)(pos_));
39  }
40 
41  auto sQ = VectorTools::sum(qR);
42 
43  qRates_.clear();
44  for (size_t i = 0; i < nbClasses_; i++)
45  {
46  qRates_.push_back(convert(qR[i] / sQ));
47  }
48 
49  // Initialize root frequencies
50 
51  const auto dRoot = process_->getRootFrequencies();
52  qRoots_.resize(nbClasses_);
53  RowLik temp((int)nbStates_);
54 
55  for (size_t c = 0; c < nbClasses_; c++)
56  {
57  temp = calcul_->getForwardLikelihoodsAtNodeForClass(tree_.getNodeIndex(tree_.getRoot()), c)->targetValue().col(pos_);
58 
59  temp /= temp.sum();
60 
61  Vdouble temp2;
62  copyEigenToBpp(temp, temp2);
63 
64  qRoots_[c] = VectorTools::cumSum(temp2);
65  }
66 
67  // Initialize cumulative pxy for edges that have models
68 
69  auto edges = tree_.getAllEdges();
70 
71  std::vector<DataLik> postTrans(nbStates_);
72 
73  for (auto& edge : edges)
74  {
75  const auto model = edge->getModel();
76  auto outid = tree_.getEdgeIndex(edge);
77 
78  if (edge->useProb())
79  continue;
80 
81  const auto transmodel = dynamic_pointer_cast<const TransitionModelInterface>(model);
82  if (!transmodel)
83  throw Exception("SubstitutionProcessSiteSimulator::init : model " + model->getName() + " on branch " + TextTools::toString(tree_.getEdgeIndex(edge)) + " is not a TransitionModel.");
84 
85  VVVdouble* cumpxy_node_ = &edge->cumpxy_;
86  cumpxy_node_->resize(nbClasses_);
87 
88  for (size_t c = 0; c < nbClasses_; c++)
89  {
90  double brlen = dRate->getCategory(c) * phyloTree_->getEdge(edge->getSpeciesIndex())->getLength();
91 
92  VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c];
93 
94  cumpxy_node_c_->resize(nbStates_);
95 
96  // process transition probabilities already consider rates &
97  // branch length
98 
99  const Matrix<double>* P;
100 
101  const auto& vSub(edge->subModelNumbers());
102 
103  if (vSub.size() == 0)
104  P = &transmodel->getPij_t(brlen);
105  else
106  {
107  if (vSub.size() > 1)
108  throw Exception("SubstitutionProcessSiteSimulator::init : only 1 submodel can be used.");
109 
110  const auto mmodel = dynamic_pointer_cast<const MixedTransitionModelInterface>(transmodel);
111 
112  const auto model2 = mmodel->getNModel(vSub[0]);
113 
114  P = &model2->getPij_t(brlen);
115  }
116 
117  /* Get likelihoods on this node for all states at this position*/
118 
119  const auto& siteForwLik = calcul_->getForwardLikelihoodsAtNodeForClass(calcul_->getForwardLikelihoodTree(c)->getSon(outid), c)->targetValue().col(pos_);
120 
121  for (size_t x = 0; x < size_t(nbStates_); x++)
122  {
123  for (size_t y = 0; y < size_t(nbStates_); y++)
124  {
125  postTrans[y] = std::max((*P)(x, y), NumConstants::PICO()) * siteForwLik(Eigen::Index(y)); // to avoid null trans prob on short branches, and then null sum of the postTrans
126  }
127  postTrans /= VectorTools::sum(postTrans);
128 
129  Vdouble pT2(postTrans.size());
130  for (size_t i = 0; i < postTrans.size(); i++)
131  {
132  pT2[i] = convert(postTrans[i]);
133  }
134 
135  (*cumpxy_node_c_)[x] = VectorTools::cumSum(pT2);
136  }
137  }
138  }
139 
140  // Initialize cumulative prob for mixture nodes
141  auto nodes = tree_.getAllNodes();
142 
143  for (auto node:nodes)
144  {
145  if (node->isMixture()) // set probas to chose
146  {
147  auto outEdges = tree_.getOutgoingEdges(node);
148  std::vector<std::vector<DataLik>> vprob;
149  vprob.resize(nbClasses_);
150 
151  for (auto edge : outEdges)
152  {
153  auto outid = tree_.getEdgeIndex(edge);
154 
155  auto model = dynamic_pointer_cast<const MixedTransitionModelInterface>(edge->getModel());
156  if (!model)
157  throw Exception("SubstitutionProcessSiteSimulator::init : model in edge " + TextTools::toString(tree_.getEdgeIndex(edge)) + " is not a mixture.");
158 
159  // a priori probabilities of the edges
160  const auto& vNb(edge->subModelNumbers());
161 
162  double x = 0.;
163  for (auto nb:vNb)
164  {
165  x += model->getNProbability(nb);
166  }
167 
168  // forward lik
169  for (size_t c = 0; c < nbClasses_; c++)
170  {
171  auto forwLik = calcul_->getForwardLikelihoodTree(c)->getEdge(outid)->targetValue().col(pos_).sum();
172  vprob[c].push_back(x * forwLik);
173  }
174 
175  node->sons_.push_back(tree_.getSon(edge));
176  }
177 
178  for (size_t c = 0; c < nbClasses_; c++)
179  {
180  vprob[c] /= VectorTools::sum(vprob[c]);
181 
182  Vdouble pT2(vprob.size());
183 
184  // convert to double
185  for (size_t i = 0; i < vprob.size(); i++)
186  {
187  pT2[i] = convert(vprob[c][i]);
188  }
189 
190  node->cumProb_[c] = VectorTools::cumSum(pT2);
191  }
192  }
193  }
194 }
195 
196 /******************************************************************************/
std::enable_if< std::is_same< M, EFMatrix< R, C > >::value, ExtendedFloatCol< R, C, EigenType > >::type col(Eigen::Index pos)
const ExtendedFloat & sum() const
virtual std::vector< Scalar > col(size_t j) const=0
static double PICO()
static T sum(const std::vector< T > &v1)
static std::vector< T > cumSum(const std::vector< T > &v1)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double >> &bppMatrix)
std::vector< VVdouble > VVVdouble
double convert(const bpp::ExtendedFloat &ef)
std::vector< Vdouble > VVdouble