bpp-core3  3.0.0
MixtureOfDiscreteDistributions.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../../Text/TextTools.h"
6 #include "../../Utils/MapTools.h"
7 #include "../NumConstants.h"
9 
10 using namespace bpp;
11 using namespace std;
12 
14  const vector<unique_ptr<DiscreteDistributionInterface>>& distributions,
15  const vector<double>& probas ) :
16  AbstractDiscreteDistribution(1, "Mixture."),
17  vdd_(),
18  probas_(),
19  vNestedPrefix_()
20 {
21  if (distributions.size() != probas.size())
22  {
23  throw Exception("MixtureOfDiscreteDistributions. Distributions and probabilities vectors must have the same size (" + TextTools::toString(distributions.size()) + " != " + TextTools::toString(probas.size()) + ").");
24  }
25 
26  size_t size = distributions.size();
27  for (size_t i = 0; i < size; i++)
28  {
29  if (distributions[i] == 0)
30  throw Exception("MixtureOfDiscreteDistributions. Null DiscreteDistribution* in argument vector at index " + TextTools::toString(i));
31  }
32 
33  for (size_t i = 0; i < size; i++)
34  {
35  probas_.push_back(probas[i]);
36  }
37 
38  double sum = VectorTools::sum(probas);
39  if (fabs(1. - sum) > precision())
40  throw Exception("MixtureOfDiscreteDistributions. Probabilities must equal 1 (sum =" + TextTools::toString(sum) + ").");
41 
42  double y = 1;
43  for (size_t i = 0; i < size - 1; i++)
44  {
45  addParameter_(new Parameter("Mixture.theta" + TextTools::toString(i + 1), probas[i] / y, Parameter::PROP_CONSTRAINT_IN));
46  y -= probas[i];
47  }
48 
49 
50  for (size_t i = 0; i < size; i++)
51  {
52  vdd_.push_back(unique_ptr<DiscreteDistributionInterface>(distributions[i]->clone()));
53  }
54 
55  // Parameters
56 
57  for (size_t i = 0; i < size; i++)
58  {
59  vNestedPrefix_.push_back(TextTools::toString(i + 1) + "_" + distributions[i]->getNamespace());
60  }
61 
62  for (size_t i = 0; i < size; i++)
63  {
64  vdd_[i]->setNamespace("Mixture." + vNestedPrefix_[i]);
65  }
66 
67  for (size_t i = 0; i < size; i++)
68  {
70  }
71 
73 }
74 
77  vdd_(),
78  probas_(),
80 {
81  for (size_t i = 0; i < mdd.vdd_.size(); i++)
82  {
83  probas_.push_back(mdd.probas_[i]);
84  vdd_.push_back(unique_ptr<DiscreteDistributionInterface>(mdd.vdd_[i]->clone()));
85  vNestedPrefix_.push_back(mdd.vNestedPrefix_[i]);
86  }
87 }
88 
90 {
92  vdd_.clear();
93  probas_.clear();
94  vNestedPrefix_.clear();
95 
96  for (size_t i = 0; i < mdd.vdd_.size(); i++)
97  {
98  probas_.push_back(mdd.probas_[i]);
99  vdd_.push_back(unique_ptr<DiscreteDistributionInterface>(mdd.vdd_[i]->clone()));
100  vNestedPrefix_.push_back(mdd.vNestedPrefix_[i]);
101  }
102 
103  return *this;
104 }
105 
107 {
108  vdd_.clear();
109 }
110 
112 {
113  for (size_t i = 0; i < vdd_.size(); i++)
114  {
115  vdd_[i]->setNumberOfCategories(nbClasses);
116  }
117 
119 }
120 
121 
123 {
125  size_t size = vdd_.size();
126  double x = 1.0;
127  for (size_t i = 0; i < size - 1; i++)
128  {
129  probas_[i] = getParameterValue("theta" + TextTools::toString(i + 1)) * x;
130  x *= 1 - getParameterValue("theta" + TextTools::toString(i + 1));
131  }
132 
133  probas_[size - 1] = x;
134 
135 
136  for (size_t i = 0; i < size; i++)
137  {
138  vdd_[i]->matchParametersValues(parameters);
139  }
140 
142 }
143 
145 {
146  size_t size = vdd_.size();
147  distribution_.clear();
148  // calculation of distribution
149 
150  for (size_t i = 0; i < size; i++)
151  {
152  vector<double> values = vdd_[i]->getCategories();
153  for (size_t j = 0; j < values.size(); j++)
154  {
155  distribution_[values[j]] = 0;
156  }
157  }
158 
159  for (size_t i = 0; i < size; i++)
160  {
161  vector<double> values = vdd_[i]->getCategories();
162  vector<double> probas2 = vdd_[i]->getProbabilities();
163  for (size_t j = 0; j < values.size(); j++)
164  {
165  distribution_[values[j]] += probas2[j] * probas_[i];
166  }
167  }
168 
170 
171  // intMinMax_
172 
173  double uB, lB;
174  uB = -NumConstants::VERY_BIG();
175  lB = NumConstants::VERY_BIG();
176 
177  bool suB = true, slB = true;
178 
179  for (size_t i = 0; i < size; i++)
180  {
181  if (vdd_[i]->getLowerBound() <= lB)
182  {
183  lB = vdd_[i]->getLowerBound();
184  slB = vdd_[i]->strictLowerBound();
185  }
186  if (vdd_[i]->getUpperBound() >= uB)
187  {
188  uB = vdd_[i]->getUpperBound();
189  suB = vdd_[i]->strictUpperBound();
190  }
191  }
192 
193  intMinMax_->setLowerBound(lB, slB);
194  intMinMax_->setUpperBound(uB, suB);
195 
196  // Compute midpoint bounds_:
197  vector<double> values = MapTools::getKeys<double, double, AbstractDiscreteDistribution::Order>(distribution_);
198 
199  bounds_.resize(numberOfCategories_ - 1);
200 
201  // Fill from 0 to numberOfCategories_-1 with midpoints:
202  for (size_t i = 0; i < numberOfCategories_ - 1; i++)
203  {
204  bounds_[i] = (values[i] + values[i + 1]) / 2.;
205  }
206 }
207 
209 {
210  if (median_ != median)
211  {
212  median_ = median;
213  for (size_t i = 0; i < vdd_.size(); i++)
214  {
215  vdd_[i]->setMedian(median);
216  }
218  }
219 }
221 {
222  for (size_t i = 0; i < vdd_.size(); i++)
223  {
224  vdd_[i]->discretize();
225  }
226 
228 }
229 
231 {
232  double s = 0;
233  for (size_t i = 0; i < vdd_.size(); i++)
234  {
235  s += probas_[i] * vdd_[i]->pProb(x);
236  }
237  return s;
238 }
239 
241 {
242  throw Exception("MixtureOfDiscreteDistributions::qProb to difficult to compute: not implemented");
243  return 0;
244 }
245 
247 {
248  double s = 0;
249  for (size_t i = 0; i < vdd_.size(); i++)
250  {
251  s += probas_[i] * vdd_[i]->Expectation(a);
252  }
253  return s;
254 }
255 
257 {
258  for (size_t i = 0; i < vdd_.size(); i++)
259  {
260  vdd_[i]->restrictToConstraint(c);
261  }
262 
264 }
265 
266 /******************************************************************************/
267 
269 {
271  // We also need to update the namespace of the nested distributions:
272  for (size_t i = 0; i < vdd_.size(); i++)
273  {
274  vdd_[i]->setNamespace(prefix + vNestedPrefix_[i]);
275  }
276 }
MixtureOfDiscreteDistributions & operator=(const MixtureOfDiscreteDistributions &mdd)
std::string getNamespace() const override
void setNumberOfCategories(size_t nbClasses)
sets the number of categories of EACH submodel to nbClasses, so the number of categories of the mixtu...
Partial implementation of the DiscreteDistribution interface.
The constraint interface.
Definition: Constraints.h:28
static T sum(const std::vector< T > &v1)
Definition: VectorTools.h:587
void setNamespace(const std::string &prefix)
Set the namespace for the parameter names.
void fireParameterChanged(const ParameterList &parameters)
Notify the class when one or several parameters have changed.
double pProb(double x) const
Return the cumulative quantile of the continuous version of the distribution, ie .
double Expectation(double a) const
Return a primitive function used for the expectation of the continuous version of the distribution...
STL namespace.
This class is designed to facilitate the manipulation of parameters.
Definition: Parameter.h:97
double qProb(double x) const
Return the quantile of the continuous version of the distribution, ie y such that ...
void addParameter_(Parameter *parameter)
double getParameterValue(const std::string &name) const override
Get the value for parameter of name &#39;name&#39;.
A Discrete distribution object defined by a vector of Discrete Distributions and a set of probabiliti...
The parameter list object.
Definition: ParameterList.h:27
virtual void fireParameterChanged(const ParameterList &parameters)
Notify the class when one or several parameters have changed.
void restrictToConstraint(const ConstraintInterface &c)
Restricts the distribution to the domain where the constraint is respected, in addition of other pred...
AbstractDiscreteDistribution & operator=(const AbstractDiscreteDistribution &adde)
std::shared_ptr< IntervalConstraint > intMinMax_
the interval where the distribution is defined/restricted.
void addParameters_(const ParameterList &parameters)
double getLowerBound() const
methods about the range of the definition
static double VERY_BIG()
Definition: NumConstants.h:48
std::map< double, double, Order > distribution_
static const std::shared_ptr< IntervalConstraint > PROP_CONSTRAINT_IN
Definition: Parameter.h:311
Exception base class. Overload exception constructor (to control the exceptions mechanism). Destructor is already virtual (from std::exception)
Definition: Exceptions.h:20
MixtureOfDiscreteDistributions(const std::vector< std::unique_ptr< DiscreteDistributionInterface >> &distributions, const std::vector< double > &probas)
Builds a new MixtureOfDiscreteDistributions object from a vector of Discrete Distributions and a vect...
void setMedian(bool median)
Sets the median value to true to say that the value in a class is proportional to the median value of...
void setNamespace(const std::string &prefix)
Set the namespace for the parameter names.
MixtureOfDiscreteDistributions * clone() const
Create a copy of this object and send a pointer to it.
std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:115
void discretize()
Discretizes the distribution in equiprobable classes.
std::vector< std::unique_ptr< DiscreteDistributionInterface > > vdd_
const ParameterList & getParameters() const override
Get all parameters available.