bpp-core3  3.0.0
AbstractDiscreteDistribution.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Random/RandomTools.h"
6 #include "../VectorTools.h"
8 
9 using namespace bpp;
10 using namespace std;
11 
15 
16 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, const std::string& prefix, short discretization) :
18  numberOfCategories_(nbClasses),
19  distribution_(),
20  bounds_(nbClasses - 1),
21  intMinMax_(new IntervalConstraint(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(), true, true)),
22  median_(false),
23  discretizationScheme_(discretization)
24 {}
25 
26 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, double delta, const std::string& prefix, short discretization) :
28  numberOfCategories_(nbClasses),
29  distribution_(Order(delta)),
30  bounds_(nbClasses - 1),
31  intMinMax_(new IntervalConstraint(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(), true, true)),
32  median_(false),
33  discretizationScheme_(discretization)
34 {}
35 
40  bounds_(adde.bounds_),
41  intMinMax_(adde.intMinMax_->clone()),
42  median_(adde.median_),
44 {}
45 
47 {
51  bounds_ = adde.bounds_;
52  intMinMax_ = std::shared_ptr<IntervalConstraint>(adde.intMinMax_->clone());
53  median_ = adde.median_;
55 
56  return *this;
57 }
58 
59 /******************************************************************************/
60 
62 {
63  return numberOfCategories_;
64 }
65 
67 {
68  if (nbClasses <= 0)
69  cerr << "DEBUG: ERROR!!! Number of categories is <= 0 in AbstractDiscreteDistribution::setNumberOfCategories()." << endl;
70 
71  if (numberOfCategories_ != nbClasses)
72  {
73  numberOfCategories_ = nbClasses;
74  discretize();
75  }
76 }
77 
78 
79 /******************************************************************************/
80 
81 double AbstractDiscreteDistribution::getCategory(size_t categoryIndex) const
82 {
83  map<double, double>::const_iterator it = distribution_.begin();
84  for (unsigned int i = 0; i < categoryIndex; i++)
85  {
86  it++;
87  }
88  return it->first;
89 }
90 
91 /******************************************************************************/
92 
93 double AbstractDiscreteDistribution::getProbability(size_t categoryIndex) const
94 {
95  map<double, double>::const_iterator it = distribution_.begin();
96  for (unsigned int i = 0; i < categoryIndex; i++)
97  {
98  it++;
99  }
100  return it->second;
101 }
102 
103 /******************************************************************************/
104 
105 double AbstractDiscreteDistribution::getProbability(double category) const
106 {
107  return distribution_.find(category)->second;
108 }
109 
110 /******************************************************************************/
111 
113 {
114  Vdouble result(distribution_.size());
115  unsigned int i = 0;
116  for (map<double, double>::const_iterator it = distribution_.begin();
117  it != distribution_.end();
118  it++)
119  {
120  result[i] = it->first;
121  i++;
122  }
123  return result;
124 }
125 
126 /******************************************************************************/
127 
129 {
130  Vdouble result(distribution_.size());
131  size_t i = 0;
132  for (map<double, double>::const_iterator it = distribution_.begin();
133  it != distribution_.end();
134  it++)
135  {
136  result[i] = it->second;
137  i++;
138  }
139  return result;
140 }
141 
142 /******************************************************************************/
143 
144 void AbstractDiscreteDistribution::set(double category, double probability)
145 {
146  distribution_[category] = probability;
147 }
148 
149 /******************************************************************************/
150 
151 void AbstractDiscreteDistribution::add(double category, double probability)
152 {
153  if (distribution_.find(category) == distribution_.end())
154  {
155  // new category
156  distribution_[category] = probability;
157  }
158  else
159  {
160  // existing category
161  distribution_[category] += probability;
162  }
163 }
164 
165 /******************************************************************************/
166 
168 {
170  double cumprob = 0;
171  for (map<double, double>::const_iterator i = distribution_.begin();
172  i != distribution_.end();
173  i++)
174  {
175  cumprob += i->second;
176  if (r <= cumprob)
177  return i->first;
178  }
179  // This line can't be reached:
180  return -1.;
181 }
182 
183 /******************************************************************************/
184 
186 {
187  double prob = 0;
188  map<double, double>::const_iterator it = distribution_.find(category);
189  for (map<double, double>::const_iterator i = distribution_.begin();
190  i != it;
191  i++)
192  {
193  prob += i->second;
194  }
195  return prob;
196 }
197 
198 /******************************************************************************/
199 
201 {
202  double prob = 0;
203  map<double, double>::const_iterator it = distribution_.find(category);
204  if (it == distribution_.end())
205  return 0;
206  for (map<double, double>::const_iterator i = ++it;
207  i != distribution_.end();
208  i++)
209  {
210  prob += i->second;
211  }
212  return 1. - prob;
213 }
214 
215 /******************************************************************************/
216 
218 {
219  double prob = 0;
220  map<double, double>::const_iterator it = distribution_.find(category);
221  if (it == distribution_.end())
222  return 0;
223  for (map<double, double>::const_iterator i = ++it;
224  i != distribution_.end();
225  i++)
226  {
227  prob += i->second;
228  }
229  return prob;
230 }
231 
232 /******************************************************************************/
233 
235 {
236  double prob = 0;
237  map<double, double>::const_iterator it = distribution_.find(category);
238  for (map<double, double>::const_iterator i = distribution_.begin();
239  i != it;
240  i++)
241  {
242  prob += i->second;
243  }
244  return 1. - prob;
245 }
246 
247 /******************************************************************************/
248 
250 {
251  for (map<double, double>::const_iterator i = distribution_.begin(); i != distribution_.end(); i++)
252  {
253  (out << "Pr(" << (i->first) << ") = " << (i->second)).endLine();
254  }
255 }
256 
257 /******************************************************************************/
258 
260 {
261  if (!(intMinMax_->isCorrect(value)))
262  throw Exception("AbstractDiscreteDistribution::getValueCategory out of bounds:" + TextTools::toString(value));
263 
264  map<double, double>::const_iterator it = distribution_.begin();
265  for (unsigned int i = 1; i < bounds_.size(); i++)
266  {
267  if (value < bounds_[i])
268  break;
269  else
270  it++;
271  }
272 
273  return it->first;
274 }
275 
276 /******************************************************************************/
277 
279 {
280  if (!(intMinMax_->isCorrect(value)))
281  throw Exception("AbstractDiscreteDistribution::getValueCategory out of bounds:" + TextTools::toString(value));
282 
283  for (unsigned int i = 1; i < bounds_.size(); i++)
284  {
285  if (value < bounds_[i])
286  return i;
287  }
288 
289  throw bounds_.size();
290 }
291 
292 /***********************************************************************/
293 
295 {
296  /* discretization of distribution with equal proportions in each
297  category
298  */
299 
300  distribution_.clear();
301  bounds_.resize(numberOfCategories_ - 1);
302 
303  double minX = pProb(intMinMax_->getLowerBound());
304  double maxX = pProb(intMinMax_->getUpperBound());
305 
306  double ec;
307  size_t i;
308  vector<double> values(numberOfCategories_);
309 
310  if (maxX != minX)
311  {
312  // divide the domain into equiprobable intervals
313  ec = (maxX - minX) / static_cast<double>(numberOfCategories_);
314  for (i = 1; i < numberOfCategories_; i++)
315  {
316  bounds_[i - 1] = qProb(minX + static_cast<double>(i) * ec);
317  }
318 
319  // for each category, sets the value v as the median, adjusted
320  // such that the sum of the values = 1
321  if (median_)
322  {
323  double t = 0;
324  for (i = 0; i < numberOfCategories_; i++)
325  {
326  values[i] = qProb(minX + (static_cast<double>(i) + 0.5) * ec);
327  }
328 
329  for (i = 0, t = 0; i < numberOfCategories_; i++)
330  {
331  t += values[i];
332  }
333 
334  double mean = Expectation(intMinMax_->getUpperBound()) - Expectation(intMinMax_->getLowerBound());
335 
336  for (i = 0; i < numberOfCategories_; i++)
337  {
338  values[i] *= mean / t / ec;
339  }
340  }
341  else
342  // for each category, sets the value v such that
343  // v * length_of_the_interval = the surface of the category
344  {
345  double firstBound = intMinMax_->getLowerBound(), secondBound;
346  double a = Expectation(firstBound), b;
347  for (i = 0; i < numberOfCategories_ - 1; i++)
348  {
349  secondBound = bounds_[i];
350  b = Expectation(secondBound);
351  values[i] = (b - a) / ec;
352  if (values[i] < firstBound || values[i] > secondBound) // May happen if the two bounds are undistinguishable.
353  {
354  values[i] = (firstBound + secondBound) / 2.;
355  }
356  a = b;
357  firstBound = secondBound;
358  }
359  secondBound = intMinMax_->getUpperBound();
360  values[numberOfCategories_ - 1] = (Expectation(secondBound) - a) / ec;
361  if (values[numberOfCategories_ - 1] < firstBound || values[numberOfCategories_ - 1] > secondBound) // May happen if the two bounds are undistinguishable.
362  {
363  values[numberOfCategories_ - 1] = (firstBound + secondBound) / 2.;
364  }
365  }
366  }
367  else
368  // if maxX==minX, uniform discretization of the range
369  {
370  ec = (intMinMax_->getUpperBound() - intMinMax_->getLowerBound()) / static_cast<double>(numberOfCategories_);
371  for (i = 1; i < numberOfCategories_; i++)
372  {
373  bounds_[i - 1] = intMinMax_->getLowerBound() + static_cast<double>(i) * ec;
374  }
375 
376  values[0] = (intMinMax_->getLowerBound() + bounds_[0]) / 2;
377 
378  for (i = 1; i < numberOfCategories_ - 1; i++)
379  {
380  values[i] = (bounds_[i - 1] + bounds_[i]) / 2;
381  }
382 
383  values[numberOfCategories_ - 1] = (intMinMax_->getUpperBound() + bounds_[numberOfCategories_ - 1]) / 2;
384  }
385 
386  // adjustments near the boundaries of the domain, according to the precision chosen
387  if (intMinMax_->strictLowerBound())
388  {
389  for (i = 0; i < numberOfCategories_; i++)
390  {
391  if (values[i] < intMinMax_->getLowerBound() + precision())
392  values[i] = intMinMax_->getLowerBound() + precision();
393  else
394  break;
395  }
396  }
397  else
398  {
399  for (i = 0; i < numberOfCategories_; i++)
400  {
401  if (values[i] < intMinMax_->getLowerBound())
402  values[i] = intMinMax_->getLowerBound() + precision();
403  else
404  break;
405  }
406  }
407 
408  if (intMinMax_->strictUpperBound())
409  {
410  for (i = numberOfCategories_; i > 0; i--)
411  {
412  if (values[i - 1] > intMinMax_->getUpperBound() - precision())
413  values[i - 1] = intMinMax_->getUpperBound() - precision();
414  else
415  break;
416  }
417  }
418  else
419  {
420  for (i = numberOfCategories_; i > 0; i--)
421  {
422  if (values[i - 1] > intMinMax_->getUpperBound())
423  values[i - 1] = intMinMax_->getUpperBound() - precision();
424  else
425  break;
426  }
427  }
428 
429  // now the distribution_ map, taking care that all values are different
430 
431  double p = 1. / static_cast<double>(numberOfCategories_);
432  for (i = 0; i < numberOfCategories_; i++)
433  {
434  if (distribution_.find(values[i]) != distribution_.end())
435  {
436  int j = 1;
437  int f = ((values[i] + NumConstants::TINY()) >= intMinMax_->getUpperBound()) ? -1 : 1;
438  while (distribution_.find(values[i] + f * j * precision()) != distribution_.end())
439  {
440  j++;
441  f = ((values[i] + f * j * precision()) >= intMinMax_->getUpperBound()) ? -1 : 1;
442  }
443  distribution_[values[i] + f * j * precision()] = p;
444  }
445  else
446  distribution_[values[i]] = p;
447  }
448 
449  return;
450 }
451 
452 /***********************************************************************/
453 
455 {
456  /* discretization of distribution with equal intervals
457  */
458 
459  distribution_.clear();
460  bounds_.resize(numberOfCategories_ - 1);
461  vector<double> values(numberOfCategories_);
462 
463  double lowerBound = intMinMax_->getLowerBound();
464  double upperBound = intMinMax_->getUpperBound();
465  double condProb = pProb(upperBound) - pProb(lowerBound);
466  double interval = (upperBound - lowerBound) / static_cast<double>(numberOfCategories_);
467 
468  // Compute bounds:
469  for (size_t i = 0; i < numberOfCategories_ - 1; ++i)
470  {
471  bounds_[i] = lowerBound + (static_cast<double>(i) + 1.) * interval;
472  }
473 
474  // Compute values:
475  for (size_t i = 0; i < numberOfCategories_; ++i)
476  {
477  values[i] = lowerBound + (static_cast<double>(i) + 0.5) * interval;
478  }
479 
480  // Compute proportions:
481  deque<double> allBounds(bounds_.begin(), bounds_.end());
482  allBounds.push_front(lowerBound);
483  allBounds.push_back(upperBound);
484  for (size_t i = 0; i < numberOfCategories_; ++i)
485  {
486  distribution_[values[i]] = (pProb(allBounds[i + 1]) - pProb(allBounds[i])) / condProb;
487  }
488 
489  return;
490 }
491 
492 /***********************************************************************/
493 
495 {
497  {
499  }
501  {
503  }
504  else
505  {
507  // Check bounds:
508  deque<double> allBounds(bounds_.begin(), bounds_.end());
509  allBounds.push_front(intMinMax_->getLowerBound());
510  allBounds.push_back(intMinMax_->getUpperBound());
511  bool check = true;
512  for (size_t i = 1; check && i < numberOfCategories_ + 1; ++i)
513  {
514  if (allBounds[i] == allBounds[i - 1])
515  check = false;
516  }
517  if (!check)
518  {
519  // cout << "Unidentifiable bounds. Falling back to equal intervals." << endl;
521  }
522  }
523 }
524 
525 /***********************************************************************/
526 
528 {
530  vb[0] = getLowerBound();
531  for (unsigned int i = 0; i < numberOfCategories_ - 1; i++)
532  {
533  vb[i + 1] = getBound(i);
534  }
536  return vb;
537 }
538 
540 {
541  try
542  {
543  const IntervalConstraint& pi = dynamic_cast<const IntervalConstraint&>(c);
544 
545  if (!(*intMinMax_ <= pi))
546  {
547  *intMinMax_ &= c;
548  discretize();
549  }
550  }
551  catch (exception& e)
552  {
553  throw Exception("AbstractDiscreteDistribution::restrictToConstraint: the constraint is not an interval");
554  }
555 }
void set(double category, double probability)
Set the probability associated to a class.
double getInfCumulativeProbability(double category) const
double getCategory(size_t categoryIndex) const
Comparator class for AbstractDiscreteDistribution.
virtual void restrictToConstraint(const ConstraintInterface &c)
Restricts the distribution to the domain where the constraint is respected, in addition of other pred...
DiscreteDistributionInterface * clone() const =0
Create a copy of this object and send a pointer to it.
Partial implementation of the DiscreteDistribution interface.
double getIInfCumulativeProbability(double category) const
this static class contains several useful constant values.
Definition: NumConstants.h:20
An interval, either bounded or not, which can also have infinite bounds.
Definition: Constraints.h:101
The constraint interface.
Definition: Constraints.h:28
double getSSupCumulativeProbability(double category) const
double getSupCumulativeProbability(double category) const
void add(double category, double probability)
Modify the probability associated to a class.
STL namespace.
double getProbability(size_t categoryIndex) const
AbstractDiscreteDistribution & operator=(const AbstractDiscreteDistribution &adde)
virtual double Expectation(double a) const =0
Return a primitive function used for the expectation of the continuous version of the distribution...
std::shared_ptr< IntervalConstraint > intMinMax_
the interval where the distribution is defined/restricted.
A partial implementation of the Parametrizable interface.
void print(OutputStream &out) const
Print the distribution (categories and corresponding probabilities) to a stream.
static double giveRandomNumberBetweenZeroAndEntry(double entry)
Get a double random value (between 0 and specified range).
Definition: RandomTools.h:78
double rand() const
Draw a random number from this distribution.
static double TINY()
Definition: NumConstants.h:46
AbstractDiscreteDistribution(size_t nbClasses, const std::string &prefix="", short discretization=DISCRETIZATION_EQUAL_PROB)
double getLowerBound() const
methods about the range of the definition
std::vector< double > Vdouble
Definition: VectorTools.h:34
std::map< double, double, Order > distribution_
virtual void discretize()
Discretizes the distribution in equiprobable classes.
OutputStream interface.
Definition: OutputStream.h:29
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
Exception base class. Overload exception constructor (to control the exceptions mechanism). Destructor is already virtual (from std::exception)
Definition: Exceptions.h:20
virtual double pProb(double x) const =0
Return the cumulative quantile of the continuous version of the distribution, ie .
void setNumberOfCategories(size_t nbClasses)
sets the number of categories and discretizes if there is a change in this number.
virtual double qProb(double x) const =0
Return the quantile of the continuous version of the distribution, ie y such that ...
std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:115