bpp-phyl3  3.0.0
RewardMappingTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
8 #include <Bpp/Text/TextTools.h>
9 
10 #include "../Likelihood/DataFlow/ForwardLikelihoodTree.h"
11 #include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
12 #include "RewardMappingTools.h"
13 
14 using namespace bpp;
15 using namespace numeric;
16 
17 // From the STL:
18 #include <iomanip>
19 
20 using namespace std;
21 
22 
23 /******************************************************************************/
24 
25 unique_ptr<ProbabilisticRewardMapping> RewardMappingTools::computeRewardVectors(
27  const vector<uint>& edgeIds,
28  Reward& reward,
29  short unresolvedOption,
30  bool verbose)
31 {
32  // Preamble:
33  if (!rltc.isInitialized())
34  throw Exception("RewardMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
35 
37 
38  if (edgeIds.size() == 0)
39  return make_unique<ProbabilisticRewardMapping>(
41  rltc.getRootArrayPositions(),
43 
44  auto processTree = rltc.getTreeNode(0);
45 
46  /* First, set substitution rewards */
47 
48  map<const SubstitutionModelInterface*, shared_ptr<Reward>> mModReward;
49 
50  for (auto speciesId : edgeIds)
51  {
52  const auto& dagIndexes = rltc.getEdgesIds(speciesId, 0);
53 
54  for (auto id : dagIndexes)
55  {
56  const auto& edge = processTree->getEdge(id);
57  if (edge->getBrLen()) // if edge with model on it
58  {
59  auto model = edge->getModel();
60 
61  auto nMod = edge->getNMod();
62 
63  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(model->targetValue());
64 
65  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
66 
67  if (nMod == 0)
68  {
69  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
70 
71  if (!sm)
72  throw Exception("SubstitutionMappingTools::computeCounts : SubstitutionVectors possible only for SubstitutionModels, not in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
73  }
74  else
75  {
76  size_t nmod = nMod->targetValue();
77 
78  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
79  if (!ttm)
80  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Mixed model in branch " + TextTools::toString(speciesId) + ". Got model " + tm->getName());
81 
82  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
83 
84  if (!sm)
85  throw Exception("SubstitutionMappingTools::computeCounts : Expecting Substitution model for submodel " + TextTools::toString(nmod) + " of mixed model " + tm->getName() + " in branch " + TextTools::toString(speciesId));
86  }
87 
88  if (mModReward.find(sm.get()) == mModReward.end())
89  {
90  mModReward[sm.get()] = shared_ptr<Reward>(reward.clone());
91  mModReward[sm.get()]->setSubstitutionModel(sm);
92  }
93  }
94  }
95  }
96 
97  auto ppt = sp.getParametrizablePhyloTree();
98 
99  // A few variables we'll need:
100 
101  size_t nbDistinctSites = rltc.getNumberOfDistinctSites();
102  size_t nbClasses = sp.getNumberOfClasses();
103  size_t nbNodes = edgeIds.size();
104 
105  const auto& rootPatternLinks = rltc.getRootArrayPositions();
106 
107  // We create a new ProbabilisticRewardMapping object:
108  unique_ptr<ProbabilisticRewardMapping> rewards(new ProbabilisticRewardMapping(*ppt, rootPatternLinks, nbDistinctSites));
109 
110  // Compute the reward for each class and each branch in the tree:
111 
112  // Get the DAG of probabilities of the edges
113  ProbabilityDAG probaDAG(rltc.getForwardLikelihoodTree(0));
114 
115  if (verbose)
116  ApplicationTools::displayTask("Compute rewards", true);
117 
118  unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards->allEdgesIterator();
119 
120  Eigen::MatrixXd rpxy;
121 
122  size_t nn = 0;
123  for ( ; !brIt->end(); brIt->next())
124  {
125  if (verbose)
126  ApplicationTools::displayGauge(nn++, nbNodes - 1);
127 
128  shared_ptr<PhyloBranchReward> br = **brIt;
129 
130  // For each branch
131  uint speciesId = rewards->getEdgeIndex(br);
132 
133  if (edgeIds.size() > 0 && !VectorTools::contains(edgeIds, (int)speciesId))
134  continue;
135 
136  RowLik rewardsForCurrentNode(RowLik::Zero(int(nbDistinctSites)));
137 
138  for (size_t ncl = 0; ncl < nbClasses; ncl++)
139  {
140  processTree = rltc.getTreeNode(ncl);
141 
142  double pr = sp.getProbabilityForModel(ncl);
143 
144  RowLik rewardsForCurrentClass(RowLik::Zero(Eigen::Index(nbDistinctSites)));
145 
146  const auto& dagIndexes = rltc.getEdgesIds(speciesId, ncl);
147 
148  // Sum on all dag edges for this speciesId
149  for (auto id : dagIndexes)
150  {
151  auto edge = processTree->getEdge(id);
152 
153  auto tm = dynamic_pointer_cast<const TransitionModelInterface>(edge->model().targetValue());
154 
155  auto nMod = edge->getNMod();
156 
157  shared_ptr<const SubstitutionModelInterface> sm = nullptr;
158 
159  if (nMod == 0)
160  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(tm);
161  else
162  {
163  size_t nmod = nMod->targetValue();
164 
165  auto ttm = dynamic_pointer_cast<const MixedTransitionModelInterface>(tm);
166  sm = dynamic_pointer_cast<const SubstitutionModelInterface>(ttm->getNModel(nmod));
167  }
168 
169  auto subReward = mModReward[sm.get()];
170 
171  const auto& likelihoodsTopEdge = rltc.getBackwardLikelihoodsAtEdgeForClass(id, ncl)->targetValue();
172 
173  auto sonid = rltc.getForwardLikelihoodTree(ncl)->getSon(id);
174  auto fatid = rltc.getForwardLikelihoodTree(ncl)->getFatherOfEdge(id);
175 
176  const auto& likelihoodsBotEdge = rltc.getForwardLikelihoodsAtNodeForClass(sonid, ncl)->targetValue();
177 
178  // compute all nxy * pxy first:
179 
180  const Eigen::MatrixXd& pxy = edge->getTransitionMatrix()->accessValueConst();
181  const auto& likelihoodsFather = rltc.getLikelihoodsAtNodeForClass(fatid, ncl)->targetValue();
182 
183 
184  subReward->storeAllRewards(edge->getBrLen()->getValue(), rpxy);
185 
186  rpxy.array() *= pxy.array();
187 
188  // Now loop over sites:
189 
190  auto rew = rpxy * likelihoodsBotEdge;
191 
192  auto bb = (cwise(likelihoodsTopEdge) * cwise(rew)).colwise().sum();
193 
194 
195  Eigen::VectorXd ff(likelihoodsBotEdge.cols());
196  switch (unresolvedOption)
197  {
200 
201  // Nullify counts where sum likelihoods > 1 : ie unknown
202  for (auto i = 0; i < ff.size(); i++)
203  {
204  const auto& s = likelihoodsBotEdge.col(i).sum();
205  if (s >= 2.)
206  ff[i] = (unresolvedOption == SubstitutionMappingTools::UNRESOLVED_ZERO) ? 0. : 1. / convert(s);
207  else
208  ff[i] = 1;
209  }
210 
211  // Normalizes by likelihood on this node
212 
213  bb *= ff.array();
214  default:
215  ;
216  }
217 
218  // Normalizes by likelihood on this node
219  auto cc = bb / cwise(likelihoodsFather);
220 
221  // adds, with branch ponderation ( * edge / edge * father) probs
222  cwise(rewardsForCurrentClass) += cc * probaDAG.getProbaAtNode(fatid);
223  }
224 
225  rewardsForCurrentNode += rewardsForCurrentClass * pr;
226  }
227 
228  // Now we just have to copy the substitutions into the result vector:
229  for (size_t i = 0; i < nbDistinctSites; ++i)
230  {
231  (*br)(i) = convert(rewardsForCurrentNode(Eigen::Index(i)));
232  }
233  }
234  if (verbose)
235  {
239  }
240  return rewards;
241 }
242 
243 /**************************************************************************************************/
244 
246  const ProbabilisticRewardMapping& rewards,
247  const AlignmentDataInterface& sites,
248  ostream& out)
249 {
250  if (!out)
251  throw IOException("RewardMappingTools::writeToFile. Can't write to stream.");
252  out << "Branches";
253  out << "\tMean";
254  for (size_t i = 0; i < rewards.getNumberOfSites(); ++i)
255  {
256  out << "\tSite" << sites.site(i).getCoordinate();
257  }
258  out << endl;
259 
260  unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards.allEdgesIterator();
261 
262  for ( ; !brIt->end(); brIt->next())
263  {
264  const shared_ptr<PhyloBranchReward> br = **brIt;
265 
266  out << rewards.getEdgeIndex(br) << "\t" << br->getLength();
267 
268  for (size_t i = 0; i < rewards.getNumberOfSites(); ++i)
269  {
270  out << "\t" << br->getSiteReward(rewards.getSiteIndex(i));
271  }
272 
273  out << endl;
274  }
275 }
276 
277 /**************************************************************************************************/
278 
280 {
281  try
282  {
283  auto data = DataTable::read(in, "\t", true, -1);
284  vector<string> ids = data->getColumn(0);
285  data->deleteColumn(0); // Remove ids
286  data->deleteColumn(0); // Remove means
287  // Now parse the table:
288  size_t nbSites = data->getNumberOfColumns();
289  rewards.setNumberOfSites(nbSites);
290 
291  unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = rewards.allEdgesIterator();
292 
293  for ( ; !brIt->end(); brIt->next())
294  {
295  const shared_ptr<PhyloBranchReward> br = **brIt;
296 
297  uint brid = rewards.getEdgeIndex(br);
298  for (size_t j = 0; j < nbSites; j++)
299  {
300  (*br)(j) = TextTools::toDouble((*data)(brid, j));
301  }
302  }
303 
304  // Parse the header:
305  for (size_t i = 0; i < nbSites; i++)
306  {
307  string siteTxt = data->getColumnName(i);
308  int site = 0;
309  if (siteTxt.substr(0, 4) == "Site")
310  site = TextTools::to<int>(siteTxt.substr(4));
311  else
312  site = TextTools::to<int>(siteTxt);
313  rewards.setSitePosition(i, site);
314  }
315  }
316  catch (Exception& e)
317  {
318  throw IOException(string("Bad input file. ") + e.what());
319  }
320 }
321 
322 /**************************************************************************************************/
323 
325 {
326  size_t nbSites = smap.getNumberOfSites();
327  double v = 0;
328  shared_ptr<PhyloBranchReward> br = smap.getEdge((uint)branchIndex);
329 
330  for (size_t i = 0; i < nbSites; ++i)
331  {
332  v += br->getSiteReward(smap.getSiteIndex(i));
333  }
334  return v;
335 }
336 
337 /**************************************************************************************************/
338 
340 {
341  double v = 0;
342  unique_ptr<ProbabilisticRewardMapping::mapTree::EdgeIterator> brIt = smap.allEdgesIterator();
343 
344  size_t siteIndex = smap.getSiteIndex(site);
345 
346  for ( ; !brIt->end(); brIt->next())
347  {
348  v += (**brIt)->getSiteReward(siteIndex);
349  }
350  return v;
351 }
352 
353 /**************************************************************************************************/
void setSitePosition(size_t index, int position)
Set the position of a given site.
Definition: Mapping.h:103
size_t getNumberOfSites() const
Definition: Mapping.h:111
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayTaskDone()
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
virtual std::unique_ptr< EdgeIterator > allEdgesIterator()=0
virtual EdgeIndex getEdgeIndex(const std::shared_ptr< E > edgeObject) const=0
virtual std::shared_ptr< E > getEdge(EdgeIndex edgeIndex) const=0
virtual int getCoordinate() const=0
static std::unique_ptr< DataTable > read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
const char * what() const noexcept override
static Self Zero(Eigen::Index rows, Eigen::Index cols)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
virtual bool isInitialized() const
Data storage class for probabilistic rewards mappings.
virtual void setNumberOfSites(size_t numberOfSites) override
const size_t getSiteIndex(size_t site) const
double getProbaAtNode(PhyloTree::NodeIndex nodeId)
static double computeSumForBranch(const ProbabilisticRewardMapping &smap, size_t branchIndex)
Sum all rewards of a given branch (specified by its index).
static void writeToStream(const ProbabilisticRewardMapping &rewards, const AlignmentDataInterface &sites, std::ostream &out)
Write the reward vectors to a stream.
static std::unique_ptr< ProbabilisticRewardMapping > computeRewardVectors(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, Reward &reward, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, bool verbose=true)
Compute the reward vectors for a particular dataset using the double-recursive likelihood computation...
static double computeSumForSite(const ProbabilisticRewardMapping &smap, size_t siteIndex)
Sum all rewards on a given site (specified by its index).
static void readFromStream(std::istream &in, ProbabilisticRewardMapping &rewards)
Read the reward vectors from a stream.
The Reward interface.
Definition: Reward.h:36
virtual Reward * clone() const =0
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
This interface describes the substitution process along the tree and sites of the alignment.
virtual const ParametrizablePhyloTree & parametrizablePhyloTree() const =0
virtual size_t getNumberOfClasses() const =0
virtual double getProbabilityForModel(size_t classIndex) const =0
virtual std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const =0
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
static bool contains(const std::vector< T > &vec, T el)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
T & cwise(T &t)
Definition: DataFlowCWise.h:29
R convert(const F &from, const Dimension< R > &)
Defines the basic types of data flow nodes.