bpp-phyl3 3.0.0
GivenDataSubstitutionProcessSiteSimulator.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7#include <algorithm>
8
9#include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
11
12// From SeqLib:
14
15using namespace bpp;
16using namespace std;
17using namespace numeric;
18
19/******************************************************************************/
20
21void GivenDataSubstitutionProcessSiteSimulator::init()
22{
23 calcul_->makeLikelihoods();
24
25 // Initialize sons & fathers of tree_ Nodes
26 // set sequence names
27
29
30 // Set up cumsum rates
31
32 const auto& dRate = process_->getRateDistribution();
33
34 std::vector<DataLik> qR;
35
36 for (size_t i = 0; i < nbClasses_; i++)
37 {
38 qR.push_back(calcul_->getSiteLikelihoodsForAClass(i)(pos_));
39 }
40
41 auto sQ = VectorTools::sum(qR);
42
43 qRates_.clear();
44 for (size_t i = 0; i < nbClasses_; i++)
45 {
46 qRates_.push_back(convert(qR[i] / sQ));
47 }
48
49 // Initialize root frequencies
50
51 const auto dRoot = process_->getRootFrequencies();
52 qRoots_.resize(nbClasses_);
53 RowLik temp((int)nbStates_);
54
55 for (size_t c = 0; c < nbClasses_; c++)
56 {
57 temp = calcul_->getForwardLikelihoodsAtNodeForClass(tree_.getNodeIndex(tree_.getRoot()), c)->targetValue().col(pos_);
58
59 temp /= temp.sum();
60
61 Vdouble temp2;
62 copyEigenToBpp(temp, temp2);
63
64 qRoots_[c] = VectorTools::cumSum(temp2);
65 }
66
67 // Initialize cumulative pxy for edges that have models
68
69 auto edges = tree_.getAllEdges();
70
71 std::vector<DataLik> postTrans(nbStates_);
72
73 for (auto& edge : edges)
74 {
75 const auto model = edge->getModel();
76 auto outid = tree_.getEdgeIndex(edge);
77
78
79 if (edge->useProb())
80 continue;
81
82 bool useLik = (std::find(vPriorBranch_.begin(), vPriorBranch_.end(), edge->getSpeciesIndex()) == vPriorBranch_.end());
83
84 const auto transmodel = dynamic_pointer_cast<const TransitionModelInterface>(model);
85 if (!transmodel)
86 throw Exception("SubstitutionProcessSiteSimulator::init : model " + model->getName() + " on branch " + TextTools::toString(tree_.getEdgeIndex(edge)) + " is not a TransitionModel.");
87
88 VVVdouble* cumpxy_node_ = &edge->cumpxy_;
89 cumpxy_node_->resize(nbClasses_);
90
91 for (size_t c = 0; c < nbClasses_; c++)
92 {
93 double brlen = dRate->getCategory(c) * phyloTree_->getEdge(edge->getSpeciesIndex())->getLength();
94
95 VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c];
96
97 cumpxy_node_c_->resize(nbStates_);
98
99 // process transition probabilities already consider rates &
100 // branch length
101
102 const Matrix<double>* P;
103
104 const auto& vSub(edge->subModelNumbers());
105
106 if (vSub.size() == 0)
107 P = &transmodel->getPij_t(brlen);
108 else
109 {
110 if (vSub.size() > 1)
111 throw Exception("SubstitutionProcessSiteSimulator::init : only 1 submodel can be used.");
112
113 const auto mmodel = dynamic_pointer_cast<const MixedTransitionModelInterface>(transmodel);
114
115 const auto model2 = mmodel->getNModel(vSub[0]);
116
117 P = &model2->getPij_t(brlen);
118 }
119
120 /* Use posterior likelihoods if branchid not in prior list */
121 /* Get likelihoods on this node for all states at this position*/
122
123 if (useLik)
124 {
125 const auto& siteForwLik = calcul_->getForwardLikelihoodsAtNodeForClass(calcul_->getForwardLikelihoodTree(c)->getSon(outid), c)->targetValue().col(pos_);
126
127 for (size_t x = 0; x < size_t(nbStates_); x++)
128 {
129 for (size_t y = 0; y < size_t(nbStates_); y++)
130 {
131 postTrans[y] = std::max((*P)(x, y), NumConstants::PICO()) * siteForwLik(Eigen::Index(y)); // to avoid null trans prob on short branches, and then null sum of the postTrans
132 }
133 postTrans /= VectorTools::sum(postTrans);
134
135 Vdouble pT2(postTrans.size());
136 for (size_t i = 0; i < postTrans.size(); i++)
137 {
138 pT2[i] = convert(postTrans[i]);
139 }
140
141 (*cumpxy_node_c_)[x] = VectorTools::cumSum(pT2);
142 }
143 }
144 else
145 {
146 for (size_t x = 0; x < nbStates_; x++)
147 {
148 Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_c_)[x];
149 cumpxy_node_c_x_->resize(nbStates_);
150 (*cumpxy_node_c_x_)[0] = (*P)(x, 0);
151 for (size_t y = 1; y < nbStates_; y++)
152 {
153 (*cumpxy_node_c_x_)[y] = (*cumpxy_node_c_x_)[y - 1] + (*P)(x, y);
154 }
155 }
156 }
157 }
158 }
159
160 // Initialize cumulative prob for mixture nodes
161 auto nodes = tree_.getAllNodes();
162
163 for (auto node:nodes)
164 {
165 if (node->isMixture()) // set probas to chose
166 {
167 auto outEdges = tree_.getOutgoingEdges(node);
168 std::vector<std::vector<DataLik>> vprob;
169 vprob.resize(nbClasses_);
170
171 for (auto edge : outEdges)
172 {
173 auto outid = tree_.getEdgeIndex(edge);
174 bool useLik = (std::find(vPriorBranch_.begin(), vPriorBranch_.end(), edge->getSpeciesIndex()) == vPriorBranch_.end());
175
176 auto model = dynamic_pointer_cast<const MixedTransitionModelInterface>(edge->getModel());
177 if (!model)
178 throw Exception("SubstitutionProcessSiteSimulator::init : model in edge " + TextTools::toString(tree_.getEdgeIndex(edge)) + " is not a mixture.");
179
180 // a priori probabilities of the edges
181 const auto& vNb(edge->subModelNumbers());
182
183 double x = 0.;
184 for (auto nb:vNb)
185 {
186 x += model->getNProbability(nb);
187 }
188
189 // forward lik
190 if (useLik)
191 {
192 for (size_t c = 0; c < nbClasses_; c++)
193 {
194 auto forwLik = calcul_->getForwardLikelihoodTree(c)->getEdge(outid)->targetValue().col(pos_).sum();
195 vprob[c].push_back(x * forwLik);
196 }
197 }
198 else
199 {
200 for (size_t c = 0; c < nbClasses_; c++)
201 {
202 vprob[c].push_back(x);
203 }
204 }
205
206 node->sons_.push_back(tree_.getSon(edge));
207 }
208
209 for (size_t c = 0; c < nbClasses_; c++)
210 {
211 vprob[c] /= VectorTools::sum(vprob[c]);
212
213 Vdouble pT2(vprob.size());
214
215 // convert to double
216 for (size_t i = 0; i < vprob.size(); i++)
217 {
218 pT2[i] = convert(vprob[c][i]);
219 }
220
221 node->cumProb_[c] = VectorTools::cumSum(pT2);
222 }
223 }
224 }
225}
226
227/******************************************************************************/
virtual std::vector< std::shared_ptr< E > > getAllEdges() const=0
virtual std::shared_ptr< N > getRoot() const=0
virtual EdgeIndex getEdgeIndex(const std::shared_ptr< E > edgeObject) const=0
virtual std::vector< std::shared_ptr< N > > getAllNodes() const=0
virtual NodeIndex getNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual std::vector< std::shared_ptr< E > > getOutgoingEdges(const std::shared_ptr< N > node) const=0
std::shared_ptr< N > getSon(const std::shared_ptr< E > edge) const
const ExtendedFloat & sum() const
std::shared_ptr< LikelihoodCalculationSingleProcess > calcul_
Eigen::Index pos_
Position of the copied site, in SHRUNKED data.
static double PICO()
std::shared_ptr< const ParametrizablePhyloTree > phyloTree_
std::shared_ptr< const SubstitutionProcessInterface > process_
SPTree tree_
To store states & transition probabilities of the simulator.
void outputInternalSites(bool yn) override
Sets whether we will output the internal sequences or not.
Vdouble qRates_
cumsum probas of the substitution rates
static T sum(const std::vector< T > &v1)
static std::vector< T > cumSum(const std::vector< T > &v1)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double > > &bppMatrix)
std::vector< VVdouble > VVVdouble
double convert(const bpp::ExtendedFloat &ef)
std::vector< Vdouble > VVdouble