bpp-phyl3  3.0.0
RELAX.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../MixtureOfASubstitutionModel.h"
6 #include "RELAX.h"
7 #include "YN98.h"
8 
9 #include <cmath> /* pow */
10 
13 
14 using namespace bpp;
15 using namespace std;
16 
17 /******************************************************************************/
18 
20  shared_ptr<const GeneticCode> gc,
21  unique_ptr<CodonFrequencySetInterface> codonFreqs) :
23  AbstractWrappedModel("RELAX."),
27  YNGP_M("RELAX.") // RELAX currently inherits from YNGP_M as well, since it uses kappa and instead of the 5 GTR parameters
28 {
29  // set the initial omegas distribution
30  vector<double> omega_initials, omega_frequencies_initials;
31  omega_initials.push_back(0.5); omega_initials.push_back(1); omega_initials.push_back(2);
32  omega_frequencies_initials.push_back(0.333333); omega_frequencies_initials.push_back(0.333333); omega_frequencies_initials.push_back(0.333334);
33 
34  auto psdd = make_unique<SimpleDiscreteDistribution>(omega_initials, omega_frequencies_initials);
35 
36  map<string, unique_ptr<DiscreteDistributionInterface>> mpdd;
37  mpdd["omega"] = std::move(psdd);
38 
39  // build the submodel as a basic Yang Nielsen model (with kappa instead of 5 GTR nucleotide substitution rate parameters)
40  auto yn98 = make_unique<YN98>(gc, std::move(codonFreqs));
41 
42  // initialize the site model with the initial omegas distribution
43  mixedModelPtr_ = make_unique<MixtureOfASubstitutionModel>(gc->getSourceAlphabet(), std::move(yn98), mpdd);
44  mixedSubModelPtr_ = dynamic_cast<const MixtureOfASubstitutionModel*>(&mixedModel());
45 
46  vector<int> supportedChars = mixedModelPtr_->getAlphabetStates();
47 
48  // mapping the parameters
49  ParameterList pl = mixedModelPtr_->getParameters();
50  for (size_t i = 0; i < pl.size(); ++i)
51  {
52  lParPmodel_.addParameter(Parameter(pl[i])); // add the parameter to the biblio wrapper instance - see Laurent's response in https://groups.google.com/forum/#!searchin/biopp-help-forum/likelihood$20ratio$20test|sort:date/biopp-help-forum/lH8MYit_Mr8/2CBND79B11YJ
53  }
54 
55  // v consists of 9 shared theta parameters, that are used for the F3X4 estimation of codon frequencies
56  vector<std::string> v = dynamic_cast<const YN98&>(mixedModelPtr_->nModel(0)).frequencySet().getParameters().getParameterNames();
57 
58  for (auto& vi : v)
59  {
60  mapParNamesFromPmodel_[vi] = vi.substr(5);
61  }
62 
63  // map the parameters of RELAX to the parameters of the sub-models
64  mapParNamesFromPmodel_["YN98.kappa"] = "kappa";
65  mapParNamesFromPmodel_["YN98.omega_Simple.V1"] = "p"; // omega0=p*omega1 (p is re-parameterization of omega0)
66  mapParNamesFromPmodel_["YN98.omega_Simple.V2"] = "omega1";
67  mapParNamesFromPmodel_["YN98.omega_Simple.theta1"] = "theta1"; // frequency of omega1 (denoted in YNGP_M2's documentation as p0)
68  mapParNamesFromPmodel_["YN98.omega_Simple.V3"] = "omega2";
69  mapParNamesFromPmodel_["YN98.omega_Simple.theta2"] = "theta2"; // theta2 = (p1/(p1+p2))
70  /* codon frequencies parameterization using F3X4: for each _Full.theta, corresponding to a a codon position over {0,1,2}:
71  getFreq_(0) = theta1 * (1. - theta);
72  getFreq_(1) = (1 - theta2) * theta;
73  getFreq_(2) = theta2 * theta;
74  getFreq_(3) = (1 - theta1) * (1. - theta); */
75 
76  string st;
77  for (auto& it : mapParNamesFromPmodel_)
78  {
79  st = mixedModelPtr_->getParameterNameWithoutNamespace(it.first);
80  if (it.second.substr(0, 5) != "omega" && it.second.substr(0, 5) != "p")
81  {
82  addParameter_(new Parameter("RELAX." + it.second, mixedModelPtr_->getParameterValue(st),
83  mixedModelPtr_->parameter(st).hasConstraint() ? std::shared_ptr<ConstraintInterface>(mixedModelPtr_->parameter(st).getConstraint()->clone()) : 0));
84  }
85  }
86 
87  /* set the below parameters that are used for parameterizing the omega parameters of the sumodels of type YN98 as autoparameters to suppress exceptions when constraints of the YN98 omega parameters are exceeded
88  YN98_0.omega = (RELAX.p * RELAX.omega1) ^ RELAX.k
89  YN98_1.omega = RELAX.omega1 ^ RELAX.k
90  YN98_2.omega = RELAX.omega2 ^ RELAX.k */
91  // reparameterization of omega0: RELAX.omega0 = RELAX.p*RELAX.omega1
92  addParameter_(new Parameter("RELAX.p", 0.5, std::make_shared<IntervalConstraint>(0.01, 1, true, true)));
93 
94  addParameter_(new Parameter("RELAX.omega1", 1, std::make_shared<IntervalConstraint>(0.1, 1, true, true)));
95 
96  // the upper bound of omega3 in its submodel is 999, so I must restrict upperBound(RELAX.omega2)^upperBound(RELAX.k)<=999 -> set maximal omega to 5
97  addParameter_(new Parameter("RELAX.omega2", 2, std::make_shared<IntervalConstraint>(1, 999, true, true)));
98 
99  // add a selection intensity parameter k, which is 1 in the null case
100  addParameter_(new Parameter("RELAX.k", 1, std::make_shared<IntervalConstraint>(0, 10, false, true))); // selection intensity parameter for purifying and neutral selection parameters
101 
102  // look for synonymous codons
103  // assumes that the states number follow the map in the genetic code and thus:
104  // synfrom_ = index of source codon
105  // synto_ = index of destination codon
106  for (synfrom_ = 1; synfrom_ < supportedChars.size(); ++synfrom_)
107  {
108  for (synto_ = 0; synto_ < synfrom_; ++synto_)
109  {
110  if (gc->areSynonymous(supportedChars[synfrom_], supportedChars[synto_])
113  break;
114  }
115  if (synto_ < synfrom_)
116  break;
117  }
118 
119  if (synto_ == supportedChars.size())
120  throw Exception("Impossible to find synonymous codons");
121 
122  // update the 3 rate matrices of the model (strict BG or strict FG)
123  computeFrequencies(false);
124  updateMatrices_();
125 }
126 
127 
129 {
130  // update the values of the sub-model parameters, that are used in the 3 rate matrices
131  for (unsigned int i = 0; i < lParPmodel_.size(); ++i)
132  {
133  // first update the values of the non omega patrameters
134  const string& np = lParPmodel_[i].getName();
135  if (np.size() > 19 && np[18] == 'V')
136  {
137  double k = getParameterValue("k"); // get the value of k
138  int ind = -1;
139  if (np.size() > 19)
140  {
141  ind = TextTools::to<int>(np.substr(19)) - 1; // ind corresponds to the index of the omega that belongs to submodel ind+1
142  }
143  // change ind not to be -1 to allow names omega0, omega1, omega2
144  double omega;
145  if (ind == 0)
146  { // handle omega0 differently due to reparameterization via RELAX.p
147  omega = getParameterValue("p") * getParameterValue("omega1");
148  omega = pow(omega, k);
149  if (omega < 0.002)
150  {
151  omega = 0.002;
152  }
153  }
154  else if (ind == 1)
155  {
156  omega = getParameterValue("omega1");
157  omega = pow (omega, k);
158  if (omega <= 0.002)
159  {
160  omega = 0.002;
161  }
162  }
163  else
164  {
165  omega = getParameterValue("omega2");
166  omega = pow (omega, k);
167  if (omega > 999)
168  {
169  omega = 999;
170  }
171  }
172  lParPmodel_[i].setValue(omega);
173  }
174  else
175  {
177  }
178  }
179 
180 
181  mixedModelPtr_->matchParametersValues(lParPmodel_);
182 
183  // normalize the synonymous substitution rate in all the Q matrices of the 3 submodels to be the same
184  Vdouble vd;
185 
186  for (unsigned int i = 0; i < mixedModelPtr_->getNumberOfModels(); ++i)
187  {
188  vd.push_back(1 / mixedSubModelPtr_->subNModel(i).Qij(synfrom_, synto_));
189  }
190 
191  mixedModelPtr_->setVRates(vd);
192 }
const MixedTransitionModelInterface & mixedModel() const
std::unique_ptr< MixedTransitionModelInterface > mixedModelPtr_
const FrequencySetInterface & frequencySet() const override
Partial implementation of the SubstitutionModel interface for models that are set for matching the bi...
std::map< std::string, std::string > mapParNamesFromPmodel_
Tools to make the link between the Parameters of the object and those of pmixmodel_.
void addParameter_(Parameter *parameter)
Abstract class of Wrapping model class, where all methods are redirected from model().
const SubstitutionModelInterface & subNModel(size_t i) const
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void addParameter(const Parameter &param)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual double getParameterValue(const std::string &name) const=0
virtual const ParameterList & getParameters() const=0
virtual const Parameter & parameter(const std::string &name) const=0
void updateMatrices_() override
Definition: RELAX.cpp:128
RELAX(std::shared_ptr< const GeneticCode > gc, std::unique_ptr< CodonFrequencySetInterface > codonFreqs)
Definition: RELAX.cpp:19
virtual double Qij(size_t i, size_t j) const =0
A method for computing all necessary matrices.
virtual bool computeFrequencies() const =0
The Yang and Nielsen (1998) substitution model for codons.
Definition: YN98.h:57
Abstract generic class for The Yang et al (2000) M substitution models for codons....
Definition: YNGP_M.h:32
size_t synto_
Definition: YNGP_M.h:43
const MixtureOfASubstitutionModel * mixedSubModelPtr_
Definition: YNGP_M.h:37
size_t synfrom_
indexes of 2 codons states between which the substitution is synonymous, to set a basis to the homoge...
Definition: YNGP_M.h:43
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
ExtendedFloat pow(const ExtendedFloat &ef, double exp)