bpp-phyl3 3.0.0
NonHomogeneousSubstitutionProcess.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6#include <algorithm>
7
8#include "../Model/MixedTransitionModel.h"
12
13using namespace bpp;
14using namespace std;
15
16NonHomogeneousSubstitutionProcess::NonHomogeneousSubstitutionProcess(const NonHomogeneousSubstitutionProcess& set) :
19 modelSet_(set.modelSet_.size()),
20 rDist_ (set.rDist_ ? dynamic_cast<DiscreteDistributionInterface*>(set.rDist_->clone()) : 0),
21 nodeToModel_ (set.nodeToModel_),
22 modelToNodes_ (set.modelToNodes_),
23 modelParameters_ (set.modelParameters_)
24{
25 // Duplicate all model objects:
26 for (size_t i = 0; i < set.modelSet_.size(); i++)
27 {
28 modelSet_[i] = shared_ptr<BranchModelInterface>(set.modelSet_[i]->clone());
29 }
30
32 for (size_t i = 0; i < modelSet_.size(); i++)
33 {
34 modelScenario_->changeModel(dynamic_pointer_cast<MixedTransitionModelInterface>(set.modelSet_[i]), dynamic_pointer_cast<MixedTransitionModelInterface>(modelSet_[i]));
35 }
36}
37
39{
40 clear();
41
47
48 rDist_.reset(rDist_ ? dynamic_cast<DiscreteDistributionInterface*>(set.rDist_->clone()) : 0);
49
50 // Duplicate all model objects:
51
52 modelSet_.resize(set.modelSet_.size());
53
54 for (size_t i = 0; i < set.modelSet_.size(); i++)
55 {
56 modelSet_[i] = shared_ptr<BranchModelInterface>(set.modelSet_[i]->clone());
57 }
58
60 for (size_t i = 0; i < modelSet_.size(); i++)
61 {
62 modelScenario_->changeModel(dynamic_pointer_cast<MixedTransitionModelInterface>(set.modelSet_[i]), dynamic_pointer_cast<MixedTransitionModelInterface>(modelSet_[i]));
63 }
64
65 return *this;
66}
67
69{
71
72 modelSet_.clear();
73 rDist_.reset();
74 nodeToModel_.clear();
75 modelParameters_.clear();
76}
77
78void NonHomogeneousSubstitutionProcess::setModelToNode(size_t modelIndex, unsigned int nodeNumber)
79{
80 if (modelIndex >= nodeToModel_.size())
81 throw IndexOutOfBoundsException("NonHomogeneousSubstitutionProcess::setModelToNode.", modelIndex, 0, nodeToModel_.size() - 1);
82 nodeToModel_[nodeNumber] = modelIndex;
83
84 vector<unsigned int> vNod;
85 vNod.push_back(nodeNumber);
86}
87
88
89void NonHomogeneousSubstitutionProcess::addModel(shared_ptr<BranchModelInterface> model, const vector<unsigned int>& nodesId)
90{
91 if (modelSet_.size() > 0 && model->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
92 throw Exception("NonHomogeneousSubstitutionProcess::addModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same alphabet.");
93 if (modelSet_.size() > 0 && model->getNumberOfStates() != getNumberOfStates())
94 throw Exception("NonHomogeneousSubstitutionProcess::addModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same number of states.");
95
96 modelSet_.push_back(model);
97 size_t thisModelIndex = modelSet_.size() - 1;
98
99
100 // Associate this model to specified nodes:
101 modelToNodes_[thisModelIndex] = nodesId;
102 for (size_t i = 0; i < nodesId.size(); i++)
103 {
104 nodeToModel_[nodesId[i]] = thisModelIndex;
105 }
106
107 // Associate parameters:
108 string pname;
110 modelParameters_.push_back(pl);
111
112 for (size_t i = 0; i < pl.size(); i++)
113 {
114 Parameter* p = pl[i].clone();
115 p->setName(p->getName() + "_" + TextTools::toString(modelSet_.size()));
116 addParameter_(p);
117 }
118}
119
120void NonHomogeneousSubstitutionProcess::setModel(shared_ptr<BranchModelInterface> model, size_t modelIndex)
121{
122 if (modelSet_.size() > 0 && model->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
123 throw Exception("NonHomogeneousSubstitutionProcess::setModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same alphabet.");
124 if (modelSet_.size() > 0 && model->getNumberOfStates() != getNumberOfStates())
125 throw Exception("NonHomogeneousSubstitutionProcess::setModel. A Substitution Model cannot be added to a Substitution Process if it does not have the same number of states.");
126
127 if (modelIndex >= modelSet_.size())
128 throw IndexOutOfBoundsException("NonHomogeneousSubstitutionProcess::setModel.", modelIndex, 0, modelSet_.size());
129
130 modelSet_[modelIndex] = model;
131
132 // Change associate parameters
133 ParameterList& pl1 = modelParameters_[modelIndex];
134 for (size_t i = 0; i < pl1.size(); i++)
135 {
136 string pn = pl1[i].getName() + "_" + TextTools::toString(modelIndex + 1);
138 }
139 string pname;
141 modelParameters_[modelIndex] = pl;
142
143 for (size_t i = 0; i < pl.size(); i++)
144 {
145 Parameter* p = pl[i].clone();
146 p->setName(p->getName() + "_" + TextTools::toString(modelIndex + 1));
147 addParameter_(p);
148 }
149}
150
152{
153 for (size_t i = 0; i < modelSet_.size(); i++)
154 {
155 out << "Model " << i + 1 << ": " << modelSet_[i]->getName() << "\t attached to nodes ";
156 for (size_t j = 0; j < modelToNodes_[i].size(); j++)
157 {
158 out << modelToNodes_[i][j];
159 }
160 out << endl;
161 }
162}
163
165{
166 // Update rate distribution:
167 if (rDist_)
168 rDist_->matchParametersValues(parameters);
169
170
171 // Then we update all models in the set:
172 for (size_t i = 0; i < modelParameters_.size(); i++)
173 {
174 for (size_t np = 0; np < modelParameters_[i].size(); np++)
175 {
176 modelParameters_[i][np].setValue(getParameterValue(modelParameters_[i][np].getName() + "_" + TextTools::toString(i + 1)));
177 }
178 modelSet_[i]->matchParametersValues(modelParameters_[i]);
179 }
180
182}
183
184
186{
187 ParameterList pl;
188
189 // Then we update all models in the set:
190 for (size_t i = 0; i < modelParameters_.size(); i++)
191 {
192 for (size_t np = 0; np < modelParameters_[i].size(); np++)
193 {
194 if (!independent || hasIndependentParameter(modelParameters_[i][np].getName() + "_" + TextTools::toString(i + 1)))
195 {
196 Parameter p(modelParameters_[i][np]);
197 p.setName(p.getName() + "_" + TextTools::toString(i + 1));
198 pl.addParameter(p);
199 }
200 }
201 }
202
203 return pl;
204}
205
207{
209 {
210 if (throwEx)
211 throw Exception("NonHomogeneousSubstitutionProcess::checkOrphanNodes(). No tree provided.");
212 return false;
213 }
214
215 vector<unsigned int> ids = getParametrizablePhyloTree()->getAllNodesIndexes();
216 unsigned int rootId = getParametrizablePhyloTree()->getNodeIndex(getParametrizablePhyloTree()->getRoot());
217 for (size_t i = 0; i < ids.size(); i++)
218 {
219 if (ids[i] != rootId && nodeToModel_.find(ids[i]) == nodeToModel_.end())
220 {
221 if (throwEx)
222 throw Exception("NonHomogeneousSubstitutionProcess::checkOrphanNodes(). Node '" + TextTools::toString(ids[i]) + "' in tree has no model associated.");
223 return false;
224 }
225 }
226 return true;
227}
228
230{
232 {
233 if (throwEx)
234 throw Exception("NonHomogeneousSubstitutionProcess::checkUnknownNodes(). No tree provided.");
235 return false;
236 }
237 vector<unsigned int> ids = getParametrizablePhyloTree()->getAllNodesIndexes();
238 unsigned int id;
239 unsigned int rootId = getParametrizablePhyloTree()->getNodeIndex(getParametrizablePhyloTree()->getRoot());
240
241 map<size_t, vector<unsigned int>>::const_iterator it;
242
243 for (it = modelToNodes_.begin(); it != modelToNodes_.end(); it++)
244 {
245 for (size_t j = 0; j < it->second.size(); j++)
246 {
247 id = it->second[j];
248 if (id == rootId || !VectorTools::contains(ids, id))
249 {
250 if (throwEx)
251 throw Exception("NonHomogeneousSubstitutionProcess::checkUnknownNodes(). Node '" + TextTools::toString(id) + "' is not found in tree or is the root node.");
252 return false;
253 }
254 }
255 }
256 return true;
257}
258
260{
261 for (size_t i = 1; i <= getNumberOfModels(); i++)
262 {
263 if (dynamic_pointer_cast<const MixedTransitionModelInterface>(getModel(i)) != nullptr)
264 return true;
265 }
266 return false;
267}
268
269
270void NonHomogeneousSubstitutionProcess::setModelScenario(shared_ptr<ModelScenario> modelscenario)
271{
272 auto vmod = modelscenario->getModels();
273
274 for (auto& mod:vmod)
275 {
276 if (find(modelSet_.begin(), modelSet_.end(), mod) == modelSet_.end())
277 throw Exception("NonHomogeneousSubstitutionProcess::setModelScenario: unknown model " + mod->getName());
278 }
279
280 modelScenario_ = modelscenario;
281}
282
283
284unique_ptr<AutonomousSubstitutionProcessInterface> NonHomogeneousSubstitutionProcess::createHomogeneousSubstitutionProcess(
285 shared_ptr<BranchModelInterface> model,
286 shared_ptr<DiscreteDistributionInterface> rdist,
287 shared_ptr<PhyloTree> tree,
288 shared_ptr<FrequencySetInterface> rootFreqs,
289 shared_ptr<ModelScenario> scenario)
290{
291 if (!tree)
292 throw Exception("NonHomogeneousSubstitutionProcess::createHomogeneousSubstitutionProcess: missing tree.");
293
294 // Check alphabet:
295 if (rootFreqs && model->alphabet().getAlphabetType() != rootFreqs->alphabet().getAlphabetType())
296 throw AlphabetMismatchException("NonHomogeneousSubstitutionProcess::createHomogeneousModelSet()", model->getAlphabet().get(), rootFreqs->getAlphabet().get());
297
298 unique_ptr<AutonomousSubstitutionProcessInterface> modelSet;
299
300 if (!rdist)
301 modelSet.reset(new SimpleSubstitutionProcess(model, tree));
302 else
303 modelSet.reset(new RateAcrossSitesSubstitutionProcess(model, rdist, tree));
304
305 if (rootFreqs)
306 modelSet->setRootFrequencySet(rootFreqs);
307
308 if (scenario)
309 modelSet->setModelScenario(scenario);
310
311 return modelSet;
312}
313
315 shared_ptr<BranchModelInterface> model,
316 shared_ptr<DiscreteDistributionInterface> rdist,
317 shared_ptr<PhyloTree> tree,
318 shared_ptr<FrequencySetInterface> rootFreqs,
319 const vector<string>& globalParameterNames,
320 shared_ptr<ModelScenario> scenario)
321{
322 if (!tree)
323 throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousSubstitutionProcess: missing tree.");
324
325 // Check alphabet:
326 if (rootFreqs && model->alphabet().getAlphabetType() != rootFreqs->alphabet().getAlphabetType())
327 throw AlphabetMismatchException("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet()", model->getAlphabet().get(), rootFreqs->getAlphabet().get());
328 if (dynamic_pointer_cast<MixedTransitionModelInterface>(model) != nullptr)
329 throw Exception("createNonHomogeneousSubstitutionProcess not yet programmed for mixture models.");
330
331 ParameterList globalParameters, branchParameters;
332 globalParameters = model->getIndependentParameters();
333
334 vector<string> globalParameterNames2; // vector of the complete names of global parameters
335
336 // First get correct parameter names
337 size_t i, j;
338
339 for (i = 0; i < globalParameterNames.size(); i++)
340 {
341 if (globalParameterNames[i].find("*") != string::npos)
342 {
343 for (j = 0; j < globalParameters.size(); j++)
344 {
345 StringTokenizer stj(globalParameterNames[i], "*", true, false);
346 size_t pos1, pos2;
347 string parn = globalParameters[j].getName();
348 bool flag(true);
349 string g = stj.nextToken();
350 pos1 = parn.find(g);
351 if (pos1 != 0)
352 flag = false;
353 pos1 += g.length();
354 while (flag && stj.hasMoreToken())
355 {
356 g = stj.nextToken();
357 pos2 = parn.find(g, pos1);
358 if (pos2 == string::npos)
359 {
360 flag = false;
361 break;
362 }
363 pos1 = pos2 + g.length();
364 }
365 if (flag &&
366 ((g.length() == 0) || (pos1 == parn.length()) || (parn.rfind(g) == parn.length() - g.length())))
367 globalParameterNames2.push_back(parn);
368 }
369 }
370 else if (!globalParameters.hasParameter(globalParameterNames[i]))
371 throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet. Parameter '" + globalParameterNames[i] + "' is not valid.");
372 else
373 globalParameterNames2.push_back(globalParameterNames[i]);
374 }
375
376 // remove non global parameters
377 for (i = globalParameters.size(); i > 0; i--)
378 {
379 if (find(globalParameterNames2.begin(), globalParameterNames2.end(), globalParameters[i - 1].getName()) == globalParameterNames2.end())
380 {
381 // not a global parameter:
382 branchParameters.addParameter(globalParameters[i - 1]);
383 globalParameters.deleteParameter(i - 1);
384 }
385 }
386
387 auto modelSet = make_unique<NonHomogeneousSubstitutionProcess>(rdist, tree, rootFreqs);
388
389 // We assign a copy of this model to all nodes in the tree (excepted root node), and link all parameters with it.
390 vector<unsigned int> ids = tree->getAllNodesIndexes();
391 unsigned int rootId = tree->getRootIndex();
392 size_t pos = 0;
393 for (i = 0; i < ids.size(); ++i)
394 {
395 if (ids[i] == rootId)
396 {
397 pos = i;
398 break;
399 }
400 }
401
402 ids.erase(ids.begin() + (long)pos);
403 sort(ids.begin(), ids.end());
404
405 for (i = 0; i < ids.size(); i++)
406 {
407 modelSet->addModel(shared_ptr<BranchModelInterface>(model->clone()), vector<unsigned int>(1, ids[i]));
408 }
409
410 // Now alias all global parameters on all nodes:
411 for (i = 0; i < globalParameters.size(); i++)
412 {
413 string pname = globalParameters[i].getName();
414
415 for (size_t nn = 1; nn < ids.size(); nn++)
416 {
417 modelSet->aliasParameters(pname + "_1", pname + "_" + TextTools::toString(nn + 1));
418 }
419 }
420
421 if (scenario)
422 throw Exception("NonHomogeneousSubstitutionProcess::createNonHomogeneousModelSet : setModelScenario(scenario) to be implemented.");
423
424 // Defines the hypernodes if mixed
425 // if (mixed)
426 // {
427 // MixedNonHomogeneousSubstitutionProcess* pMSMS = dynamic_cast<MixedNonHomogeneousSubstitutionProcess*>(modelSet);
428 // MixedSubstitutionModel* pMSM = dynamic_cast<MixedSubstitutionModel*>(model);
429
430 // size_t nbm = pMSM->getNumberOfModels();
431 // for (i = 0; i < nbm; i++)
432 // {
433 // pMSMS->addEmptyHyperNode();
434 // for (j = 0; j < ids.size(); j++)
435 // {
436 // pMSMS->addToHyperNode(j, vector<int>(1, static_cast<int>(i)));
437 // }
438 // }
439 // pMSMS->computeHyperNodesProbabilities();
440 // }
441
442 // delete model; // delete template model.
443 return modelSet;
444}
A partial implementation of the SubstitutionProcess interface.
std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const
void fireParameterChanged(const ParameterList &pl)
AbsractParametrizable interface.
AbstractAutonomousSubstitutionProcess & operator=(const AbstractAutonomousSubstitutionProcess &asp)
void deleteParameter_(size_t index)
void addParameter_(Parameter *parameter)
bool hasIndependentParameter(const std::string &name) const
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
double getParameterValue(const std::string &name) const override
std::shared_ptr< const Alphabet > getAlphabet() const
virtual std::string getAlphabetType() const=0
BranchModelInterface * clone() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
virtual const Alphabet & alphabet() const =0
Substitution process manager for non-homogeneous / non-reversible models of evolution.
std::vector< std::shared_ptr< BranchModelInterface > > modelSet_
Contains all models used in this tree.
std::map< unsigned int, size_t > nodeToModel_
Contains for each node in a tree the index of the corresponding model in modelSet_.
std::shared_ptr< DiscreteDistributionInterface > rDist_
Rate Distribution.
void setModel(std::shared_ptr< BranchModelInterface > model, size_t modelIndex)
Change a given model.
void fireParameterChanged(const ParameterList &parameters) override
void clear()
Resets all the information contained in this object.
void setModelScenario(std::shared_ptr< ModelScenario > modelscenario) override
Set the modelPath, after checking it is valid (ie modelpath has only the model of the process).
const BranchModelInterface & model(size_t n) const override
void listModelNames(std::ostream &out=std::cout) const
list all model names.
std::map< size_t, std::vector< unsigned int > > modelToNodes_
ParameterList getSubstitutionModelParameters(bool independent) const override
Get the INDEPENDENT parameters corresponding to the models.
void addModel(std::shared_ptr< BranchModelInterface > model, const std::vector< unsigned int > &nodesId)
Add a new model to the set, and set relationships with nodes and params.
static std::unique_ptr< NonHomogeneousSubstitutionProcess > createNonHomogeneousSubstitutionProcess(std::shared_ptr< BranchModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rdist, std::shared_ptr< PhyloTree > tree, std::shared_ptr< FrequencySetInterface > rootFreqs, const std::vector< std::string > &globalParameterNames, std::shared_ptr< ModelScenario > scenario=0)
Create a NonHomogeneousSubstitutionProcess object, with one model per branch.
static std::unique_ptr< AutonomousSubstitutionProcessInterface > createHomogeneousSubstitutionProcess(std::shared_ptr< BranchModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rdist, std::shared_ptr< PhyloTree > tree, std::shared_ptr< FrequencySetInterface > rootFreqs=0, std::shared_ptr< ModelScenario > scenario=0)
Create a NonHomogeneousSubstitutionProcess object, corresponding to the homogeneous case.
void setModelToNode(size_t modelIndex, unsigned int nodeNumber)
Associate an existing model with a given node.
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
std::vector< ParameterList > modelParameters_
Parameters for each model in the set.
NonHomogeneousSubstitutionProcess & operator=(const NonHomogeneousSubstitutionProcess &set)
virtual const ParameterList & getIndependentParameters() const=0
virtual bool hasParameter(const std::string &name) const
size_t size() const
virtual void addParameter(const Parameter &param)
ParameterList * clone() const
virtual void deleteParameter(const std::string &name)
virtual void setName(const std::string &name)
virtual const std::string & getName() const
Space and time homogeneous substitution process, without mixture.
const std::string & nextToken()
bool hasMoreToken() const
static bool contains(const std::vector< T > &vec, T el)
std::string toString(T t)
Defines the basic types of data flow nodes.