bpp-phyl3  3.0.0
NonHomogeneousSubstitutionProcess.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Utils/MapTools.h>
6 #include <algorithm>
7 
8 #include "../Model/MixedTransitionModel.h"
12 
13 using namespace bpp;
14 using namespace std;
15 
19  modelSet_(set.modelSet_.size()),
20  rDist_ (set.rDist_ ? dynamic_cast<DiscreteDistributionInterface*>(set.rDist_->clone()) : 0),
21  nodeToModel_ (set.nodeToModel_),
22  modelToNodes_ (set.modelToNodes_),
23  modelParameters_ (set.modelParameters_)
24 {
25  // Duplicate all model objects:
26  for (size_t i = 0; i < set.modelSet_.size(); i++)
27  {
28  modelSet_[i] = shared_ptr<BranchModelInterface>(set.modelSet_[i]->clone());
29  }
30 
31  if (modelScenario_)
32  for (size_t i = 0; i < modelSet_.size(); i++)
33  {
34  modelScenario_->changeModel(dynamic_pointer_cast<MixedTransitionModelInterface>(set.modelSet_[i]), dynamic_pointer_cast<MixedTransitionModelInterface>(modelSet_[i]));
35  }
36 }
37 
39 {
40  clear();
41 
47 
48  rDist_.reset(rDist_ ? dynamic_cast<DiscreteDistributionInterface*>(set.rDist_->clone()) : 0);
49 
50  // Duplicate all model objects:
51 
52  modelSet_.resize(set.modelSet_.size());
53 
54  for (size_t i = 0; i < set.modelSet_.size(); i++)
55  {
56  modelSet_[i] = shared_ptr<BranchModelInterface>(set.modelSet_[i]->clone());
57  }
58 
59  if (modelScenario_)
60  for (size_t i = 0; i < modelSet_.size(); i++)
61  {
62  modelScenario_->changeModel(dynamic_pointer_cast<MixedTransitionModelInterface>(set.modelSet_[i]), dynamic_pointer_cast<MixedTransitionModelInterface>(modelSet_[i]));
63  }
64 
65  return *this;
66 }
67 
69 {
71 
72  modelSet_.clear();
73  rDist_.reset();
74  nodeToModel_.clear();
75  modelParameters_.clear();
76 }
77 
78 void NonHomogeneousSubstitutionProcess::setModelToNode(size_t modelIndex, unsigned int nodeNumber)
79 {
80  if (modelIndex >= nodeToModel_.size())
81  throw IndexOutOfBoundsException("NonHomogeneousSubstitutionProcess::setModelToNode.", modelIndex, 0, nodeToModel_.size() - 1);
82  nodeToModel_[nodeNumber] = modelIndex;
83 
84  vector<unsigned int> vNod;
85  vNod.push_back(nodeNumber);
86 }
87 
88 
89 void NonHomogeneousSubstitutionProcess::addModel(shared_ptr<BranchModelInterface> model, const vector<unsigned int>& nodesId)
90 {
91  if (modelSet_.size() > 0 && model->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
92  throw Exception("NonHomogeneousSubstitutionProcess::addModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same alphabet.");
93  if (modelSet_.size() > 0 && model->getNumberOfStates() != getNumberOfStates())
94  throw Exception("NonHomogeneousSubstitutionProcess::addModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same number of states.");
95 
96  modelSet_.push_back(model);
97  size_t thisModelIndex = modelSet_.size() - 1;
98 
99 
100  // Associate this model to specified nodes:
101  modelToNodes_[thisModelIndex] = nodesId;
102  for (size_t i = 0; i < nodesId.size(); i++)
103  {
104  nodeToModel_[nodesId[i]] = thisModelIndex;
105  }
106 
107  // Associate parameters:
108  string pname;
110  modelParameters_.push_back(pl);
111 
112  for (size_t i = 0; i < pl.size(); i++)
113  {
114  Parameter* p = pl[i].clone();
115  p->setName(p->getName() + "_" + TextTools::toString(modelSet_.size()));
116  addParameter_(p);
117  }
118 }
119 
120 void NonHomogeneousSubstitutionProcess::setModel(shared_ptr<BranchModelInterface> model, size_t modelIndex)
121 {
122  if (modelSet_.size() > 0 && model->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
123  throw Exception("NonHomogeneousSubstitutionProcess::setModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same alphabet.");
124  if (modelSet_.size() > 0 && model->getNumberOfStates() != getNumberOfStates())
125  throw Exception("NonHomogeneousSubstitutionProcess::setModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same number of states.");
126 
127  if (modelIndex >= modelSet_.size())
128  throw IndexOutOfBoundsException("NonHomogeneousSubstitutionProcess::setModel.", modelIndex, 0, modelSet_.size());
129 
130  modelSet_[modelIndex] = model;
131 
132  // Change associate parameters
133  ParameterList& pl1 = modelParameters_[modelIndex];
134  for (size_t i = 0; i < pl1.size(); i++)
135  {
136  string pn = pl1[i].getName() + "_" + TextTools::toString(modelIndex + 1);
137  deleteParameter_(pn);
138  }
139  string pname;
141  modelParameters_[modelIndex] = pl;
142 
143  for (size_t i = 0; i < pl.size(); i++)
144  {
145  Parameter* p = pl[i].clone();
146  p->setName(p->getName() + "_" + TextTools::toString(modelIndex + 1));
147  addParameter_(p);
148  }
149 }
150 
152 {
153  for (size_t i = 0; i < modelSet_.size(); i++)
154  {
155  out << "Model " << i + 1 << ": " << modelSet_[i]->getName() << "\t attached to nodes ";
156  for (size_t j = 0; j < modelToNodes_[i].size(); j++)
157  {
158  out << modelToNodes_[i][j];
159  }
160  out << endl;
161  }
162 }
163 
165 {
166  // Update rate distribution:
167  if (rDist_)
168  rDist_->matchParametersValues(parameters);
169 
170 
171  // Then we update all models in the set:
172  for (size_t i = 0; i < modelParameters_.size(); i++)
173  {
174  for (size_t np = 0; np < modelParameters_[i].size(); np++)
175  {
176  modelParameters_[i][np].setValue(getParameterValue(modelParameters_[i][np].getName() + "_" + TextTools::toString(i + 1)));
177  }
178  modelSet_[i]->matchParametersValues(modelParameters_[i]);
179  }
180 
182 }
183 
184 
186 {
187  ParameterList pl;
188 
189  // Then we update all models in the set:
190  for (size_t i = 0; i < modelParameters_.size(); i++)
191  {
192  for (size_t np = 0; np < modelParameters_[i].size(); np++)
193  {
194  if (!independent || hasIndependentParameter(modelParameters_[i][np].getName() + "_" + TextTools::toString(i + 1)))
195  {
196  Parameter p(modelParameters_[i][np]);
197  p.setName(p.getName() + "_" + TextTools::toString(i + 1));
198  pl.addParameter(p);
199  }
200  }
201  }
202 
203  return pl;
204 }
205 
207 {
209  {
210  if (throwEx)
211  throw Exception("NonHomogeneousSubstitutionProcess::checkOrphanNodes(). No tree provided.");
212  return false;
213  }
214 
215  vector<unsigned int> ids = getParametrizablePhyloTree()->getAllNodesIndexes();
216  unsigned int rootId = getParametrizablePhyloTree()->getNodeIndex(getParametrizablePhyloTree()->getRoot());
217  for (size_t i = 0; i < ids.size(); i++)
218  {
219  if (ids[i] != rootId && nodeToModel_.find(ids[i]) == nodeToModel_.end())
220  {
221  if (throwEx)
222  throw Exception("NonHomogeneousSubstitutionProcess::checkOrphanNodes(). Node '" + TextTools::toString(ids[i]) + "' in tree has no model associated.");
223  return false;
224  }
225  }
226  return true;
227 }
228 
230 {
232  {
233  if (throwEx)
234  throw Exception("NonHomogeneousSubstitutionProcess::checkUnknownNodes(). No tree provided.");
235  return false;
236  }
237  vector<unsigned int> ids = getParametrizablePhyloTree()->getAllNodesIndexes();
238  unsigned int id;
239  unsigned int rootId = getParametrizablePhyloTree()->getNodeIndex(getParametrizablePhyloTree()->getRoot());
240 
241  map<size_t, vector<unsigned int>>::const_iterator it;
242 
243  for (it = modelToNodes_.begin(); it != modelToNodes_.end(); it++)
244  {
245  for (size_t j = 0; j < it->second.size(); j++)
246  {
247  id = it->second[j];
248  if (id == rootId || !VectorTools::contains(ids, id))
249  {
250  if (throwEx)
251  throw Exception("NonHomogeneousSubstitutionProcess::checkUnknownNodes(). Node '" + TextTools::toString(id) + "' is not found in tree or is the root node.");
252  return false;
253  }
254  }
255  }
256  return true;
257 }
258 
260 {
261  for (size_t i = 1; i <= getNumberOfModels(); i++)
262  {
263  if (dynamic_pointer_cast<const MixedTransitionModelInterface>(getModel(i)) != nullptr)
264  return true;
265  }
266  return false;
267 }
268 
269 
270 void NonHomogeneousSubstitutionProcess::setModelScenario(shared_ptr<ModelScenario> modelscenario)
271 {
272  auto vmod = modelscenario->getModels();
273 
274  for (auto& mod:vmod)
275  {
276  if (find(modelSet_.begin(), modelSet_.end(), mod) == modelSet_.end())
277  throw Exception("NonHomogeneousSubstitutionProcess::setModelScenario: unknown model " + mod->getName());
278  }
279 
280  modelScenario_ = modelscenario;
281 }
282 
283 
284 unique_ptr<AutonomousSubstitutionProcessInterface> NonHomogeneousSubstitutionProcess::createHomogeneousSubstitutionProcess(
285  shared_ptr<BranchModelInterface> model,
286  shared_ptr<DiscreteDistributionInterface> rdist,
287  shared_ptr<PhyloTree> tree,
288  shared_ptr<FrequencySetInterface> rootFreqs,
289  shared_ptr<ModelScenario> scenario)
290 {
291  if (!tree)
292  throw Exception("NonHomogeneousSubstitutionProcess::createHomogeneousSubstitutionProcess: missing tree.");
293 
294  // Check alphabet:
295  if (rootFreqs && model->alphabet().getAlphabetType() != rootFreqs->alphabet().getAlphabetType())
296  throw AlphabetMismatchException("NonHomogeneousSubstitutionProcess::createHomogeneousModelSet()", model->getAlphabet().get(), rootFreqs->getAlphabet().get());
297 
298  unique_ptr<AutonomousSubstitutionProcessInterface> modelSet;
299 
300  if (!rdist)
301  modelSet.reset(new SimpleSubstitutionProcess(model, tree));
302  else
303  modelSet.reset(new RateAcrossSitesSubstitutionProcess(model, rdist, tree));
304 
305  if (rootFreqs)
306  modelSet->setRootFrequencySet(rootFreqs);
307 
308  if (scenario)
309  modelSet->setModelScenario(scenario);
310 
311  return modelSet;
312 }
313 
315  shared_ptr<BranchModelInterface> model,
316  shared_ptr<DiscreteDistributionInterface> rdist,
317  shared_ptr<PhyloTree> tree,
318  shared_ptr<FrequencySetInterface> rootFreqs,
319  const vector<string>& globalParameterNames,
320  shared_ptr<ModelScenario> scenario)
321 {
322  if (!tree)
323  throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousSubstitutionProcess: missing tree.");
324 
325  // Check alphabet:
326  if (rootFreqs && model->alphabet().getAlphabetType() != rootFreqs->alphabet().getAlphabetType())
327  throw AlphabetMismatchException("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet()", model->getAlphabet().get(), rootFreqs->getAlphabet().get());
328  if (dynamic_pointer_cast<MixedTransitionModelInterface>(model) != nullptr)
329  throw Exception("createNonHomogeneousSubstitutionProcess not yet programmed for mixture models.");
330 
331  ParameterList globalParameters, branchParameters;
332  globalParameters = model->getIndependentParameters();
333 
334  vector<string> globalParameterNames2; // vector of the complete names of global parameters
335 
336  // First get correct parameter names
337  size_t i, j;
338 
339  for (i = 0; i < globalParameterNames.size(); i++)
340  {
341  if (globalParameterNames[i].find("*") != string::npos)
342  {
343  for (j = 0; j < globalParameters.size(); j++)
344  {
345  StringTokenizer stj(globalParameterNames[i], "*", true, false);
346  size_t pos1, pos2;
347  string parn = globalParameters[j].getName();
348  bool flag(true);
349  string g = stj.nextToken();
350  pos1 = parn.find(g);
351  if (pos1 != 0)
352  flag = false;
353  pos1 += g.length();
354  while (flag && stj.hasMoreToken())
355  {
356  g = stj.nextToken();
357  pos2 = parn.find(g, pos1);
358  if (pos2 == string::npos)
359  {
360  flag = false;
361  break;
362  }
363  pos1 = pos2 + g.length();
364  }
365  if (flag &&
366  ((g.length() == 0) || (pos1 == parn.length()) || (parn.rfind(g) == parn.length() - g.length())))
367  globalParameterNames2.push_back(parn);
368  }
369  }
370  else if (!globalParameters.hasParameter(globalParameterNames[i]))
371  throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet. Parameter '" + globalParameterNames[i] + "' is not valid.");
372  else
373  globalParameterNames2.push_back(globalParameterNames[i]);
374  }
375 
376  // remove non global parameters
377  for (i = globalParameters.size(); i > 0; i--)
378  {
379  if (find(globalParameterNames2.begin(), globalParameterNames2.end(), globalParameters[i - 1].getName()) == globalParameterNames2.end())
380  {
381  // not a global parameter:
382  branchParameters.addParameter(globalParameters[i - 1]);
383  globalParameters.deleteParameter(i - 1);
384  }
385  }
386 
387  auto modelSet = make_unique<NonHomogeneousSubstitutionProcess>(rdist, tree, rootFreqs);
388 
389  // We assign a copy of this model to all nodes in the tree (excepted root node), and link all parameters with it.
390  vector<unsigned int> ids = tree->getAllNodesIndexes();
391  unsigned int rootId = tree->getRootIndex();
392  size_t pos = 0;
393  for (i = 0; i < ids.size(); ++i)
394  {
395  if (ids[i] == rootId)
396  {
397  pos = i;
398  break;
399  }
400  }
401 
402  ids.erase(ids.begin() + (long)pos);
403  sort(ids.begin(), ids.end());
404 
405  for (i = 0; i < ids.size(); i++)
406  {
407  modelSet->addModel(shared_ptr<BranchModelInterface>(model->clone()), vector<unsigned int>(1, ids[i]));
408  }
409 
410  // Now alias all global parameters on all nodes:
411  for (i = 0; i < globalParameters.size(); i++)
412  {
413  string pname = globalParameters[i].getName();
414 
415  for (size_t nn = 1; nn < ids.size(); nn++)
416  {
417  modelSet->aliasParameters(pname + "_1", pname + "_" + TextTools::toString(nn + 1));
418  }
419  }
420 
421  if (scenario)
422  throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet : setModelScenario(scenario) to be implemented.");
423 
424  // Defines the hypernodes if mixed
425  // if (mixed)
426  // {
427  // MixedNonHomogeneousSubstitutionProcess* pMSMS = dynamic_cast<MixedNonHomogeneousSubstitutionProcess*>(modelSet);
428  // MixedSubstitutionModel* pMSM = dynamic_cast<MixedSubstitutionModel*>(model);
429 
430  // size_t nbm = pMSM->getNumberOfModels();
431  // for (i = 0; i < nbm; i++)
432  // {
433  // pMSMS->addEmptyHyperNode();
434  // for (j = 0; j < ids.size(); j++)
435  // {
436  // pMSMS->addToHyperNode(j, vector<int>(1, static_cast<int>(i)));
437  // }
438  // }
439  // pMSMS->computeHyperNodesProbabilities();
440  // }
441 
442  // delete model; // delete template model.
443  return modelSet;
444 }
A partial implementation of the SubstitutionProcess interface.
void fireParameterChanged(const ParameterList &pl)
AbsractParametrizable interface.
std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const
AbstractAutonomousSubstitutionProcess & operator=(const AbstractAutonomousSubstitutionProcess &asp)
void deleteParameter_(size_t index)
void addParameter_(Parameter *parameter)
bool hasIndependentParameter(const std::string &name) const
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
double getParameterValue(const std::string &name) const override
std::shared_ptr< const Alphabet > getAlphabet() const
virtual std::string getAlphabetType() const=0
BranchModelInterface * clone() const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual const Alphabet & alphabet() const =0
Substitution process manager for non-homogeneous / non-reversible models of evolution.
std::vector< std::shared_ptr< BranchModelInterface > > modelSet_
Contains all models used in this tree.
std::map< unsigned int, size_t > nodeToModel_
Contains for each node in a tree the index of the corresponding model in modelSet_.
std::shared_ptr< DiscreteDistributionInterface > rDist_
Rate Distribution.
void setModel(std::shared_ptr< BranchModelInterface > model, size_t modelIndex)
Change a given model.
void fireParameterChanged(const ParameterList &parameters) override
void clear()
Resets all the information contained in this object.
const BranchModelInterface & model(size_t n) const override
void setModelScenario(std::shared_ptr< ModelScenario > modelscenario) override
Set the modelPath, after checking it is valid (ie modelpath has only the model of the process).
void listModelNames(std::ostream &out=std::cout) const
list all model names.
std::map< size_t, std::vector< unsigned int > > modelToNodes_
ParameterList getSubstitutionModelParameters(bool independent) const override
Get the INDEPENDENT parameters corresponding to the models.
NonHomogeneousSubstitutionProcess(std::shared_ptr< DiscreteDistributionInterface > rdist, std::shared_ptr< const PhyloTree > tree=0, std::shared_ptr< FrequencySetInterface > rootFreqs=0)
Create a model set according to the specified alphabet and root frequencies. Stationarity is not assu...
void addModel(std::shared_ptr< BranchModelInterface > model, const std::vector< unsigned int > &nodesId)
Add a new model to the set, and set relationships with nodes and params.
static std::unique_ptr< NonHomogeneousSubstitutionProcess > createNonHomogeneousSubstitutionProcess(std::shared_ptr< BranchModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rdist, std::shared_ptr< PhyloTree > tree, std::shared_ptr< FrequencySetInterface > rootFreqs, const std::vector< std::string > &globalParameterNames, std::shared_ptr< ModelScenario > scenario=0)
Create a NonHomogeneousSubstitutionProcess object, with one model per branch.
static std::unique_ptr< AutonomousSubstitutionProcessInterface > createHomogeneousSubstitutionProcess(std::shared_ptr< BranchModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rdist, std::shared_ptr< PhyloTree > tree, std::shared_ptr< FrequencySetInterface > rootFreqs=0, std::shared_ptr< ModelScenario > scenario=0)
Create a NonHomogeneousSubstitutionProcess object, corresponding to the homogeneous case.
void setModelToNode(size_t modelIndex, unsigned int nodeNumber)
Associate an existing model with a given node.
std::vector< ParameterList > modelParameters_
Parameters for each model in the set.
NonHomogeneousSubstitutionProcess & operator=(const NonHomogeneousSubstitutionProcess &set)
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
virtual const ParameterList & getIndependentParameters() const=0
virtual bool hasParameter(const std::string &name) const
size_t size() const
virtual void addParameter(const Parameter &param)
ParameterList * clone() const
virtual void deleteParameter(const std::string &name)
virtual void setName(const std::string &name)
virtual const std::string & getName() const
Space and time homogeneous substitution process, without mixture.
const std::string & nextToken()
bool hasMoreToken() const
static bool contains(const std::vector< T > &vec, T el)
std::string toString(T t)
Defines the basic types of data flow nodes.