bpp-phyl3 3.0.0
RewardMappingTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6#include "../Likelihood/DRTreeLikelihoodTools.h"
7#include "../Likelihood/MarginalAncestralStateReconstruction.h"
8
13
14using namespace bpp;
15
16// From the STL:
17#include <iomanip>
18
19using namespace std;
20
21/******************************************************************************/
22
23std::unique_ptr<LegacyProbabilisticRewardMapping> LegacyRewardMappingTools::computeRewardVectors(
24 std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
25 const vector<int>& nodeIds,
26 std::shared_ptr<Reward> reward,
27 bool verbose)
28{
29 // Preamble:
30 if (!drtl->isInitialized())
31 throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized.");
32
33 // A few variables we'll need:
34
35 const TreeTemplate<Node> tree(drtl->tree());
36 const auto& sequences = drtl->data();
37 auto& rDist = drtl->rateDistribution();
38
39 size_t nbSites = sequences.getNumberOfSites();
40 size_t nbDistinctSites = drtl->likelihoodData().getNumberOfDistinctSites();
41 size_t nbStates = sequences.alphabet().getSize();
42 size_t nbClasses = rDist.getNumberOfCategories();
43 vector<const Node*> nodes = tree.getNodes();
44 const vector<size_t>& rootPatternLinks = drtl->likelihoodData().getRootArrayPositions();
45 nodes.pop_back(); // Remove root node.
46 size_t nbNodes = nodes.size();
47
48 // We create a new ProbabilisticRewardMapping object:
49 auto rewards = make_unique<LegacyProbabilisticRewardMapping>(tree, reward, nbSites);
50
51 // Store likelihood for each rate for each site:
52 VVVdouble lik;
53 drtl->computeLikelihoodAtNode(tree.getRootId(), lik);
54 Vdouble Lr(nbDistinctSites, 0);
55 Vdouble rcProbs = rDist.getProbabilities();
56 Vdouble rcRates = rDist.getCategories();
57 for (size_t i = 0; i < nbDistinctSites; i++)
58 {
59 VVdouble& lik_i = lik[i];
60 for (size_t c = 0; c < nbClasses; c++)
61 {
62 Vdouble& lik_i_c = lik_i[c];
63 double rc = rDist.getProbability(c);
64 for (size_t s = 0; s < nbStates; s++)
65 {
66 Lr[i] += lik_i_c[s] * rc;
67 }
68 }
69 }
70
71 // Compute the reward for each class and each branch in the tree:
72 if (verbose)
73 ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
74
75 for (size_t l = 0; l < nbNodes; ++l)
76 {
77 // For each node,
78 const Node* currentNode = nodes[l];
79 if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
80 continue;
81
82 const Node* father = currentNode->getFather();
83
84 double d = currentNode->getDistanceToFather();
85
86 if (verbose)
87 ApplicationTools::displayGauge(l, nbNodes - 1);
88 Vdouble rewardsForCurrentNode(nbDistinctSites);
89
90 // Now we've got to compute likelihoods in a smart manner... ;)
91 VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
92 for (size_t i = 0; i < nbDistinctSites; i++)
93 {
94 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
95 likelihoodsFatherConstantPart_i.resize(nbClasses);
96 for (size_t c = 0; c < nbClasses; c++)
97 {
98 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
99 likelihoodsFatherConstantPart_i_c.resize(nbStates);
100 double rc = rDist.getProbability(c);
101 for (size_t s = 0; s < nbStates; s++)
102 {
103 // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
104 // freq is already accounted in the array
105 likelihoodsFatherConstantPart_i_c[s] = rc;
106 }
107 }
108 }
109
110 // First, what will remain constant:
111 size_t nbSons = father->getNumberOfSons();
112 for (size_t n = 0; n < nbSons; n++)
113 {
114 const Node* currentSon = father->getSon(n);
115 if (currentSon->getId() != currentNode->getId())
116 {
117 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
118
119 // Now iterate over all site partitions:
120 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentSon->getId()));
121 VVVdouble pxy;
122 bool first;
123 while (mit->hasNext())
124 {
125 auto bmd = mit->next();
126 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
127 first = true;
128 while (sit->hasNext())
129 {
130 size_t i = sit->next();
131 // We retrieve the transition probabilities for this site partition:
132 if (first)
133 {
134 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
135 first = false;
136 }
137 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
138 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
139 for (size_t c = 0; c < nbClasses; c++)
140 {
141 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
142 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
143 VVdouble& pxy_c = pxy[c];
144 for (size_t x = 0; x < nbStates; x++)
145 {
146 Vdouble& pxy_c_x = pxy_c[x];
147 double likelihood = 0.;
148 for (size_t y = 0; y < nbStates; y++)
149 {
150 likelihood += pxy_c_x[y] * likelihoodsFather_son_i_c[y];
151 }
152 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
153 }
154 }
155 }
156 }
157 }
158 }
159 if (father->hasFather())
160 {
161 const Node* currentSon = father->getFather();
162 const VVVdouble& likelihoodsFather_son = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentSon->getId());
163 // Now iterate over all site partitions:
164 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(father->getId()));
165 VVVdouble pxy;
166 bool first;
167 while (mit->hasNext())
168 {
169 auto bmd = mit->next();
170 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
171 first = true;
172 while (sit->hasNext())
173 {
174 size_t i = sit->next();
175 // We retrieve the transition probabilities for this site partition:
176 if (first)
177 {
178 pxy = drtl->getTransitionProbabilitiesPerRateClass(father->getId(), i);
179 first = false;
180 }
181 const VVdouble& likelihoodsFather_son_i = likelihoodsFather_son[i];
182 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
183 for (size_t c = 0; c < nbClasses; c++)
184 {
185 const Vdouble& likelihoodsFather_son_i_c = likelihoodsFather_son_i[c];
186 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
187 VVdouble& pxy_c = pxy[c];
188 for (size_t x = 0; x < nbStates; x++)
189 {
190 double likelihood = 0.;
191 for (size_t y = 0; y < nbStates; y++)
192 {
193 Vdouble& pxy_c_x = pxy_c[y];
194 likelihood += pxy_c_x[x] * likelihoodsFather_son_i_c[y];
195 }
196 likelihoodsFatherConstantPart_i_c[x] *= likelihood;
197 }
198 }
199 }
200 }
201 }
202 else
203 {
204 // Account for root frequencies:
205 for (size_t i = 0; i < nbDistinctSites; i++)
206 {
207 vector<double> freqs = drtl->getRootFrequencies(i);
208 VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
209 for (size_t c = 0; c < nbClasses; c++)
210 {
211 Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
212 for (size_t x = 0; x < nbStates; x++)
213 {
214 (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
215 }
216 }
217 }
218 }
219
220 // Then, we deal with the node of interest.
221 // We first average upon 'y' to save computations, and then upon 'x'.
222 // ('y' is the state at 'node' and 'x' the state at 'father'.)
223
224 // Iterate over all site partitions:
225 const VVVdouble& likelihoodsFather_node = drtl->likelihoodData().getLikelihoodArray(father->getId(), currentNode->getId());
226 unique_ptr<TreeLikelihoodInterface::ConstBranchModelIterator> mit(drtl->getNewBranchModelIterator(currentNode->getId()));
227 VVVdouble pxy;
228 bool first;
229 while (mit->hasNext())
230 {
231 auto bmd = mit->next();
232 reward->setSubstitutionModel(bmd->getSubstitutionModel());
233 // compute all nxy first:
234 VVVdouble nxy(nbClasses);
235 for (size_t c = 0; c < nbClasses; ++c)
236 {
237 VVdouble& nxy_c = nxy[c];
238 double rc = rcRates[c];
239 auto nij = reward->getAllRewards(d * rc);
240 nxy_c.resize(nbStates);
241 for (size_t x = 0; x < nbStates; ++x)
242 {
243 Vdouble& nxy_c_x = nxy_c[x];
244 nxy_c_x.resize(nbStates);
245 for (size_t y = 0; y < nbStates; ++y)
246 {
247 nxy_c_x[y] = (*nij)(x, y);
248 }
249 }
250 }
251
252 // Now loop over sites:
253 unique_ptr<TreeLikelihoodInterface::SiteIterator> sit(bmd->getNewSiteIterator());
254 first = true;
255 while (sit->hasNext())
256 {
257 size_t i = sit->next();
258 // We retrieve the transition probabilities and substitution counts for this site partition:
259 if (first)
260 {
261 pxy = drtl->getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
262 first = false;
263 }
264 const VVdouble& likelihoodsFather_node_i = likelihoodsFather_node[i];
265 VVdouble& likelihoodsFatherConstantPart_i = likelihoodsFatherConstantPart[i];
266 for (size_t c = 0; c < nbClasses; ++c)
267 {
268 const Vdouble& likelihoodsFather_node_i_c = likelihoodsFather_node_i[c];
269 Vdouble& likelihoodsFatherConstantPart_i_c = likelihoodsFatherConstantPart_i[c];
270 const VVdouble& pxy_c = pxy[c];
271 VVdouble& nxy_c = nxy[c];
272 for (size_t x = 0; x < nbStates; ++x)
273 {
274 double& likelihoodsFatherConstantPart_i_c_x = likelihoodsFatherConstantPart_i_c[x];
275 const Vdouble& pxy_c_x = pxy_c[x];
276 for (size_t y = 0; y < nbStates; ++y)
277 {
278 double likelihood_cxy = likelihoodsFatherConstantPart_i_c_x
279 * pxy_c_x[y]
280 * likelihoodsFather_node_i_c[y];
281
282 // Now the vector computation:
283 rewardsForCurrentNode[i] += likelihood_cxy * nxy_c[x][y];
284 // <------------> <--------->
285 // Posterior probability | |
286 // for site i and rate class c * | |
287 // likelihood for this site------+ |
288 // |
289 // Reward function for site i and rate class c----+
290 }
291 }
292 }
293 }
294 }
295
296 // Now we just have to copy the substitutions into the result vector:
297 for (size_t i = 0; i < nbSites; ++i)
298 {
299 (*rewards)(l, i) = rewardsForCurrentNode[rootPatternLinks[i]] / Lr[rootPatternLinks[i]];
300 }
301 }
302 if (verbose)
303 {
307 }
308 return rewards;
309}
310
311/**************************************************************************************************/
312
315 const SiteContainerInterface& sites,
316 ostream& out)
317{
318 if (!out)
319 throw IOException("RewardMappingTools::writeToFile. Can't write to stream.");
320 out << "Branches";
321 out << "\tMean";
322 for (size_t i = 0; i < rewards.getNumberOfSites(); i++)
323 {
324 out << "\tSite" << sites.site(i).getCoordinate();
325 }
326 out << endl;
327
328 for (size_t j = 0; j < rewards.getNumberOfBranches(); j++)
329 {
330 out << rewards.getNode(j)->getId() << "\t" << rewards.getNode(j)->getDistanceToFather();
331 for (size_t i = 0; i < rewards.getNumberOfSites(); i++)
332 {
333 out << "\t" << rewards(j, i);
334 }
335 out << endl;
336 }
337}
338
339/**************************************************************************************************/
340
342{
343 try
344 {
345 auto data = DataTable::read(in, "\t", true, -1);
346 vector<string> ids = data->getColumn(0);
347 data->deleteColumn(0); // Remove ids
348 data->deleteColumn(0); // Remove means
349 // Now parse the table:
350 size_t nbSites = data->getNumberOfColumns();
351 rewards.setNumberOfSites(nbSites);
352 size_t nbBranches = data->getNumberOfRows();
353 for (size_t i = 0; i < nbBranches; i++)
354 {
355 int id = TextTools::toInt(ids[i]);
356 size_t br = rewards.getNodeIndex(id);
357 for (size_t j = 0; j < nbSites; j++)
358 {
359 rewards(br, j) = TextTools::toDouble((*data)(i, j));
360 }
361 }
362 // Parse the header:
363 for (size_t i = 0; i < nbSites; i++)
364 {
365 string siteTxt = data->getColumnName(i);
366 int site = 0;
367 if (siteTxt.substr(0, 4) == "Site")
368 site = TextTools::to<int>(siteTxt.substr(4));
369 else
370 site = TextTools::to<int>(siteTxt);
371 rewards.setSitePosition(i, site);
372 }
373 }
374 catch (Exception& e)
375 {
376 throw IOException(string("Bad input file. ") + e.what());
377 }
378}
379
380/**************************************************************************************************/
381
383{
384 size_t nbSites = smap.getNumberOfSites();
385 double v = 0;
386 for (size_t i = 0; i < nbSites; ++i)
387 {
388 v += smap(branchIndex, i);
389 }
390 return v;
391}
392
393/**************************************************************************************************/
394
396{
397 size_t nbBranches = smap.getNumberOfBranches();
398 double v = 0;
399 for (size_t i = 0; i < nbBranches; ++i)
400 {
401 v += smap(i, siteIndex);
402 }
403 return v;
404}
405
406/**************************************************************************************************/
int getCoordinate() const override
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
size_t getNumberOfSites() const override
Definition: Mapping.h:158
void setSitePosition(size_t index, int position) override
Set the position of a given site.
Definition: Mapping.h:152
virtual size_t getNodeIndex(int nodeId) const override
Definition: Mapping.h:184
size_t getNumberOfBranches() const override
Definition: Mapping.h:160
virtual const Node * getNode(size_t nodeIndex) const
Definition: Mapping.h:162
virtual size_t getNumberOfBranches() const =0
virtual size_t getNumberOfSites() const =0
Legacy data storage class for probabilistic rewards mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
Legacy interface for storing reward mapping data.
Definition: RewardMapping.h:28
static double computeSumForSite(const LegacyRewardMappingInterface &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
static void readFromStream(std::istream &in, LegacyProbabilisticRewardMapping &rewards)
Read the reward vectors from a stream.
static void writeToStream(const LegacyProbabilisticRewardMapping &rewards, const SiteContainerInterface &sites, std::ostream &out)
Write the reward vectors to a stream.
static double computeSumForBranch(const LegacyRewardMappingInterface &smap, size_t branchIndex)
Sum all rewards of a given branch (specified by its index).
The phylogenetic node class.
Definition: Node.h:59
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:250
virtual size_t getNumberOfSons() const
Definition: Node.h:355
virtual const Site & site(size_t sitePosition) const override=0
The phylogenetic tree class.
Definition: TreeTemplate.h:59
int getRootId() const
Definition: TreeTemplate.h:127
virtual std::vector< const N * > getNodes() const
Definition: TreeTemplate.h:421
static bool contains(const std::vector< T > &vec, T el)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble