bpp-phyl3 3.0.0
RewardMappingTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
9
10#include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
11#include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
12#include "RewardMappingTools.h"
13
14using namespace bpp;
15using namespace numeric;
16
17// From the STL:
18#include <iomanip>
19
20using namespace std;
21
22
23/******************************************************************************/
24
25unique_ptr<ProbabilisticRewardMapping> RewardMappingTools::computeRewardVectors(
27 const vector<uint>& edgeIds,
28 Reward& reward,
29 short unresolvedOption,
30 bool verbose)
31{
32 // Preamble:
33 if (!rltc.isInitialized())
34 throw Exception("RewardMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
35
37
38 if (edgeIds.size() == 0)
39 return make_unique<ProbabilisticRewardMapping>(
43
44 auto processTree = rltc.getTreeNode(0);
45
46 /* First, set substitution rewards */
47
48 map<const SubstitutionModelInterface*, shared_ptr<Reward>> mModReward;
49
50 for (auto speciesId : edgeIds)
51 {
52 const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
53
54 for (auto id : dagIndexes)
55 {
56 const auto& edge = processTree->getEdge(id);
57 if (edge->getBrLen()) // if edge with model on it
58 {
59 auto model = edge->getModel();
60
61 auto nMod = edge->getNMod();
62
63 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
64
65 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
66
67 if (nMod == 0)
68 {
69 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
70
71 if (!sm)
72 throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
73 }
74 else
75 {
76 size_t nmod = nMod->targetValue();
77
78 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
79 if (!ttm)
80 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
81
82 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
83
84 if (!sm)
85 throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
86 }
87
88 if (mModReward.find(sm.get()) == mModReward.end())
89 {
90 mModReward[sm.get()] = shared_ptr<Reward>(reward.clone());
91 mModReward[sm.get()]->setSubstitutionModel(sm);
92 }
93 }
94 }
95 }
96
97 auto ppt = sp.getParametrizablePhyloTree();
98
99 // A few variables we'll need:
100
101 size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
102 size_t nbClasses = sp.getNumberOfClasses();
103 size_t nbNodes = edgeIds.size();
104
105 const auto& rootPatternLinks = rltc.getRootArrayPositions();
106
107 // We create a new ProbabilisticRewardMapping object:
108 unique_ptr<ProbabilisticRewardMapping> rewards(new ProbabilisticRewardMapping(*ppt, rootPatternLinks, nbDistinctSites));
109
110 // Compute the reward for each class and each branch in the tree:
111
112 // Get the DAG of probabilities of the edges
114
115 if (verbose)
116 ApplicationTools::displayTask("Compute rewards", true);
117
118 unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards->allEdgesIterator();
119
120 Eigen::MatrixXd rpxy;
121
122 size_t nn = 0;
123 for ( ; !brIt->end(); brIt->next())
124 {
125 if (verbose)
126 ApplicationTools::displayGauge(nn++, nbNodes - 1);
127
128 shared_ptr<PhyloBranchReward> br = **brIt;
129
130 // For each branch
131 uint speciesId = rewards->getEdgeIndex(br);
132
133 if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
134 continue;
135
136 RowLik rewardsForCurrentNode(RowLik::Zero(int(nbDistinctSites)));
137
138 for (size_t ncl = 0; ncl < nbClasses; ncl++)
139 {
140 processTree = rltc.getTreeNode(ncl);
141
142 double pr = sp.getProbabilityForModel(ncl);
143
144 RowLik rewardsForCurrentClass(RowLik::Zero(Eigen::Index(nbDistinctSites)));
145
146 const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
147
148 // Sum on all dag edges for this speciesId
149 for (auto id : dagIndexes)
150 {
151 auto edge = processTree->getEdge(id);
152
153 auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->model().targetValue());
154
155 auto nMod = edge->getNMod();
156
157 shared_ptr<const SubstitutionModelInterface> sm = nullptr;
158
159 if (nMod == 0)
160 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
161 else
162 {
163 size_t nmod = nMod->targetValue();
164
165 auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
166 sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
167 }
168
169 auto subReward = mModReward[sm.get()];
170
171 const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
172
173 auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
174 auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
175
176 const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
177
178 // compute all nxy * pxy first:
179
180 const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->accessValueConst();
181 const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
182
183
184 subReward->storeAllRewards(edge->getBrLen()->getValue(), rpxy);
185
186 rpxy.array() *= pxy.array();
187
188 // Now loop over sites:
189
190 auto rew = rpxy * likelihoodsBotEdge;
191
192 auto bb = (cwise(likelihoodsTopEdge) * cwise(rew)).colwise().sum();
193
194
195 Eigen::VectorXd ff(likelihoodsBotEdge.cols());
196 switch (unresolvedOption)
197 {
200
201 // Nullify counts where sum likelihoods > 1 : ie unknown
202 for (auto i = 0; i < ff.size(); i++)
203 {
204 const auto& s = likelihoodsBotEdge.col(i).sum();
205 if (s >= 2.)
206 ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
207 else
208 ff[i] = 1;
209 }
210
211 // Normalizes by likelihood on this node
212
213 bb *= ff.array();
214 default:
215 ;
216 }
217
218 // Normalizes by likelihood on this node
219 auto cc = bb / cwise(likelihoodsFather);
220
221 // adds, with branch ponderation ( * edge / edge * father) probs
222 cwise(rewardsForCurrentClass) += cc * probaDAG.getProbaAtNode(fatid);
223 }
224
225 rewardsForCurrentNode += rewardsForCurrentClass * pr;
226 }
227
228 // Now we just have to copy the substitutions into the result vector:
229 for (size_t i = 0; i < nbDistinctSites; ++i)
230 {
231 (*br)(i) = convert(rewardsForCurrentNode(Eigen::Index(i)));
232 }
233 }
234 if (verbose)
235 {
239 }
240 return rewards;
241}
242
243/**************************************************************************************************/
244
246 const ProbabilisticRewardMapping& rewards,
247 const AlignmentDataInterface& sites,
248 ostream& out)
249{
250 if (!out)
251 throw IOException("RewardMappingTools::writeToFile. Can't write to stream.");
252 out << "Branches";
253 out << "\tMean";
254 for (size_t i = 0; i < rewards.getNumberOfSites(); ++i)
255 {
256 out << "\tSite" << sites.site(i).getCoordinate();
257 }
258 out << endl;
259
260 unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards.allEdgesIterator();
261
262 for ( ; !brIt->end(); brIt->next())
263 {
264 const shared_ptr<PhyloBranchReward> br = **brIt;
265
266 out << rewards.getEdgeIndex(br) << "\t" << br->getLength();
267
268 for (size_t i = 0; i < rewards.getNumberOfSites(); ++i)
269 {
270 out << "\t" << br->getSiteReward(rewards.getSiteIndex(i));
271 }
272
273 out << endl;
274 }
275}
276
277/**************************************************************************************************/
278
280{
281 try
282 {
283 auto data = DataTable::read(in, "\t", true, -1);
284 vector<string> ids = data->getColumn(0);
285 data->deleteColumn(0); // Remove ids
286 data->deleteColumn(0); // Remove means
287 // Now parse the table:
288 size_t nbSites = data->getNumberOfColumns();
289 rewards.setNumberOfSites(nbSites);
290
291 unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards.allEdgesIterator();
292
293 for ( ; !brIt->end(); brIt->next())
294 {
295 const shared_ptr<PhyloBranchReward> br = **brIt;
296
297 uint brid = rewards.getEdgeIndex(br);
298 for (size_t j = 0; j < nbSites; j++)
299 {
300 (*br)(j) = TextTools::toDouble((*data)(brid, j));
301 }
302 }
303
304 // Parse the header:
305 for (size_t i = 0; i < nbSites; i++)
306 {
307 string siteTxt = data->getColumnName(i);
308 int site = 0;
309 if (siteTxt.substr(0, 4) == "Site")
310 site = TextTools::to<int>(siteTxt.substr(4));
311 else
312 site = TextTools::to<int>(siteTxt);
313 rewards.setSitePosition(i, site);
314 }
315 }
316 catch (Exception& e)
317 {
318 throw IOException(string("Bad input file. ") + e.what());
319 }
320}
321
322/**************************************************************************************************/
323
325{
326 size_t nbSites = smap.getNumberOfSites();
327 double v = 0;
328 shared_ptr<PhyloBranchReward> br = smap.getEdge((uint)branchIndex);
329
330 for (size_t i = 0; i < nbSites; ++i)
331 {
332 v += br->getSiteReward(smap.getSiteIndex(i));
333 }
334 return v;
335}
336
337/**************************************************************************************************/
338
340{
341 double v = 0;
342 unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = smap.allEdgesIterator();
343
344 size_t siteIndex = smap.getSiteIndex(site);
345
346 for ( ; !brIt->end(); brIt->next())
347 {
348 v += (**brIt)->getSiteReward(siteIndex);
349 }
350 return v;
351}
352
353/**************************************************************************************************/
void setSitePosition(size_t index, int position)
Set the position of a given site.
Definition: Mapping.h:103
size_t getNumberOfSites() const
Definition: Mapping.h:111
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
virtual std::unique_ptr< EdgeIterator > allEdgesIterator()=0
virtual EdgeIndex getEdgeIndex(const std::shared_ptr< E > edgeObject) const=0
virtual std::shared_ptr< E > getEdge(EdgeIndex edgeIndex) const=0
virtual int getCoordinate() const=0
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
static Self Zero(Eigen::Index rows, Eigen::Index cols)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
virtual bool isInitialized() const
Data storage class for probabilistic rewards mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
const size_t getSiteIndex(size_t site) const
double getProbaAtNode(PhyloTree::NodeIndex nodeId)
static double computeSumForBranch(const ProbabilisticRewardMapping &smap, size_t branchIndex)
Sum all rewards of a given branch (specified by its index).
static void writeToStream(const ProbabilisticRewardMapping &rewards, const AlignmentDataInterface &sites, std::ostream &out)
Write the reward vectors to a stream.
static double computeSumForSite(const ProbabilisticRewardMapping &smap, size_t siteIndex)
Sum all rewards on a given site (specified by its index).
static void readFromStream(std::istream &in, ProbabilisticRewardMapping &rewards)
Read the reward vectors from a stream.
The Reward interface.
Definition: Reward.h:36
virtual Reward * clone() const =0
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
This interface describes the substitution process along the tree and sites of the alignment.
virtual std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const =0
virtual size_t getNumberOfClasses() const =0
virtual double getProbabilityForModel(size_t classIndex) const =0
virtual const ParametrizablePhyloTree & parametrizablePhyloTree() const =0
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
static bool contains(const std::vector< T > &vec, T el)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
T & cwise(T &t)
Definition: DataFlowCWise.h:29
Defines the basic types of data flow nodes.
double convert(const bpp::ExtendedFloat &ef)