bpp-core3  3.0.0
AbstractDiscreteDistribution.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractDiscreteDistribution.cpp
3 // Authors:
4 // Julien Dutheil
5 // Created: ?
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 19, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for numerical calculus.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 
42 #include "../Random/RandomTools.h"
43 #include "../VectorTools.h"
45 
46 using namespace bpp;
47 using namespace std;
48 
49 
50 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, const std::string& prefix) :
52  numberOfCategories_(nbClasses),
53  distribution_(),
54  bounds_(nbClasses - 1),
55  intMinMax_(new IntervalConstraint(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(), true, true)),
56  median_(false)
57 {}
58 
59 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, double delta, const std::string& prefix) :
61  numberOfCategories_(nbClasses),
62  distribution_(Order(delta)),
63  bounds_(nbClasses - 1),
64  intMinMax_(new IntervalConstraint(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(), true, true)),
65  median_(false)
66 {}
67 
70  numberOfCategories_(adde.numberOfCategories_),
71  distribution_(adde.distribution_),
72  bounds_(adde.bounds_),
73  intMinMax_(adde.intMinMax_->clone()),
74  median_(adde.median_)
75 {}
76 
78 {
82  bounds_ = adde.bounds_;
83  intMinMax_ = std::shared_ptr<IntervalConstraint>(adde.intMinMax_->clone());
84  median_ = adde.median_;
85 
86  return *this;
87 }
88 
89 /******************************************************************************/
90 
92 {
93  return numberOfCategories_;
94 }
95 
97 {
98  if (nbClasses <= 0)
99  cerr << "DEBUG: ERROR!!! Number of categories is <= 0 in AbstractDiscreteDistribution::setNumberOfCategories()." << endl;
100 
101  if (numberOfCategories_ != nbClasses)
102  {
103  numberOfCategories_ = nbClasses;
104  discretize();
105  }
106 }
107 
108 
109 /******************************************************************************/
110 
111 double AbstractDiscreteDistribution::getCategory(size_t categoryIndex) const
112 {
113  map<double, double>::const_iterator it = distribution_.begin();
114  for (unsigned int i = 0; i < categoryIndex; i++)
115  {
116  it++;
117  }
118  return it->first;
119 }
120 
121 /******************************************************************************/
122 
123 double AbstractDiscreteDistribution::getProbability(size_t categoryIndex) const
124 {
125  map<double, double>::const_iterator it = distribution_.begin();
126  for (unsigned int i = 0; i < categoryIndex; i++)
127  {
128  it++;
129  }
130  return it->second;
131 }
132 
133 /******************************************************************************/
134 
135 double AbstractDiscreteDistribution::getProbability(double category) const
136 {
137  return distribution_.find(category)->second;
138 }
139 
140 /******************************************************************************/
141 
143 {
144  Vdouble result(distribution_.size());
145  unsigned int i = 0;
146  for (map<double, double>::const_iterator it = distribution_.begin();
147  it != distribution_.end();
148  it++)
149  {
150  result[i] = it->first;
151  i++;
152  }
153  return result;
154 }
155 
156 /******************************************************************************/
157 
159 {
160  Vdouble result(distribution_.size());
161  size_t i = 0;
162  for (map<double, double>::const_iterator it = distribution_.begin();
163  it != distribution_.end();
164  it++)
165  {
166  result[i] = it->second;
167  i++;
168  }
169  return result;
170 }
171 
172 /******************************************************************************/
173 
174 void AbstractDiscreteDistribution::set(double category, double probability)
175 {
176  distribution_[category] = probability;
177 }
178 
179 /******************************************************************************/
180 
181 void AbstractDiscreteDistribution::add(double category, double probability)
182 {
183  if (distribution_.find(category) == distribution_.end())
184  {
185  // new category
186  distribution_[category] = probability;
187  }
188  else
189  {
190  // existing category
191  distribution_[category] += probability;
192  }
193 }
194 
195 /******************************************************************************/
196 
198 {
200  double cumprob = 0;
201  for (map<double, double>::const_iterator i = distribution_.begin();
202  i != distribution_.end();
203  i++)
204  {
205  cumprob += i->second;
206  if (r <= cumprob)
207  return i->first;
208  }
209  // This line can't be reached:
210  return -1.;
211 }
212 
213 /******************************************************************************/
214 
216 {
217  double prob = 0;
218  map<double, double>::const_iterator it = distribution_.find(category);
219  for (map<double, double>::const_iterator i = distribution_.begin();
220  i != it;
221  i++)
222  {
223  prob += i->second;
224  }
225  return prob;
226 }
227 
228 /******************************************************************************/
229 
231 {
232  double prob = 0;
233  map<double, double>::const_iterator it = distribution_.find(category);
234  if (it == distribution_.end())
235  return 0;
236  for (map<double, double>::const_iterator i = ++it;
237  i != distribution_.end();
238  i++)
239  {
240  prob += i->second;
241  }
242  return 1. - prob;
243 }
244 
245 /******************************************************************************/
246 
248 {
249  double prob = 0;
250  map<double, double>::const_iterator it = distribution_.find(category);
251  if (it == distribution_.end())
252  return 0;
253  for (map<double, double>::const_iterator i = ++it;
254  i != distribution_.end();
255  i++)
256  {
257  prob += i->second;
258  }
259  return prob;
260 }
261 
262 /******************************************************************************/
263 
265 {
266  double prob = 0;
267  map<double, double>::const_iterator it = distribution_.find(category);
268  for (map<double, double>::const_iterator i = distribution_.begin();
269  i != it;
270  i++)
271  {
272  prob += i->second;
273  }
274  return 1. - prob;
275 }
276 
277 /******************************************************************************/
278 
280 {
281  for (map<double, double>::const_iterator i = distribution_.begin(); i != distribution_.end(); i++)
282  {
283  (out << "Pr(" << (i->first) << ") = " << (i->second)).endLine();
284  }
285 }
286 
287 /******************************************************************************/
288 
290 {
291  if (!(intMinMax_->isCorrect(value)))
292  throw Exception("AbstractDiscreteDistribution::getValueCategory out of bounds:" + TextTools::toString(value));
293 
294  map<double, double>::const_iterator it = distribution_.begin();
295  for (unsigned int i = 1; i < bounds_.size(); i++)
296  {
297  if (value < bounds_[i])
298  break;
299  else
300  it++;
301  }
302 
303  return it->first;
304 }
305 
306 /******************************************************************************/
307 
309 {
310  if (!(intMinMax_->isCorrect(value)))
311  throw Exception("AbstractDiscreteDistribution::getValueCategory out of bounds:" + TextTools::toString(value));
312 
313  for (unsigned int i = 1; i < bounds_.size(); i++)
314  {
315  if (value < bounds_[i])
316  return i;
317  }
318 
319  throw bounds_.size();
320 }
321 
322 /***********************************************************************/
323 
324 
326 {
327  /* discretization of distribution with equal proportions in each
328  category
329  */
330 
331  distribution_.clear();
332  bounds_.resize(numberOfCategories_ - 1);
333 
334  double minX = pProb(intMinMax_->getLowerBound());
335  double maxX = pProb(intMinMax_->getUpperBound());
336 
337  double ec;
338  size_t i;
339  vector<double> values(numberOfCategories_);
340 
341  if (maxX != minX)
342  {
343  // divide the domain into equiprobable intervals
344  ec = (maxX - minX) / static_cast<double>(numberOfCategories_);
345 
346  for (i = 1; i < numberOfCategories_; i++)
347  {
348  bounds_[i - 1] = qProb(minX + static_cast<double>(i) * ec);
349  }
350 
351  // for each category, sets the value v as the median, adjusted
352  // such that the sum of the values = 1
353  if (median_)
354  {
355  double t = 0;
356  for (i = 0; i < numberOfCategories_; i++)
357  {
358  values[i] = qProb(minX + (static_cast<double>(i) + 0.5) * ec);
359  }
360 
361  for (i = 0, t = 0; i < numberOfCategories_; i++)
362  {
363  t += values[i];
364  }
365 
366  double mean = Expectation(intMinMax_->getUpperBound()) - Expectation(intMinMax_->getLowerBound());
367 
368  for (i = 0; i < numberOfCategories_; i++)
369  {
370  values[i] *= mean / t / ec;
371  }
372  }
373  else
374  // for each category, sets the value v such that
375  // v * length_of_the_interval = the surface of the category
376  {
377  double a = Expectation(intMinMax_->getLowerBound()), b;
378  for (i = 0; i < numberOfCategories_ - 1; i++)
379  {
380  b = Expectation(bounds_[i]);
381  values[i] = (b - a) / ec;
382  a = b;
383  }
384  values[numberOfCategories_ - 1] = (Expectation(intMinMax_->getUpperBound()) - a) / ec;
385  }
386  }
387  else
388  // if maxX==minX, uniform discretization of the range
389  {
390  ec = (intMinMax_->getUpperBound() - intMinMax_->getLowerBound()) / static_cast<double>(numberOfCategories_);
391  for (i = 1; i < numberOfCategories_; i++)
392  {
393  bounds_[i - 1] = intMinMax_->getLowerBound() + static_cast<double>(i) * ec;
394  }
395 
396  values[0] = (intMinMax_->getLowerBound() + bounds_[0]) / 2;
397 
398  for (i = 1; i < numberOfCategories_ - 1; i++)
399  {
400  values[i] = (bounds_[i - 1] + bounds_[i]) / 2;
401  }
402 
403  values[numberOfCategories_ - 1] = (intMinMax_->getUpperBound() + bounds_[numberOfCategories_ - 1]) / 2;
404  }
405 
406  // adjustments near the boundaries of the domain, according to the precision chosen
407  if (intMinMax_->strictLowerBound())
408  {
409  for (i = 0; i < numberOfCategories_; i++)
410  {
411  if (values[i] < intMinMax_->getLowerBound() + precision())
412  values[i] = intMinMax_->getLowerBound() + precision();
413  else
414  break;
415  }
416  }
417  else
418  {
419  for (i = 0; i < numberOfCategories_; i++)
420  {
421  if (values[i] < intMinMax_->getLowerBound())
422  values[i] = intMinMax_->getLowerBound() + precision();
423  else
424  break;
425  }
426  }
427 
428  if (intMinMax_->strictUpperBound())
429  {
430  for (i = numberOfCategories_; i > 0; i--)
431  {
432  if (values[i - 1] > intMinMax_->getUpperBound() - precision())
433  values[i - 1] = intMinMax_->getUpperBound() - precision();
434  else
435  break;
436  }
437  }
438  else
439  {
440  for (i = numberOfCategories_; i > 0; i--)
441  {
442  if (values[i - 1] > intMinMax_->getUpperBound())
443  values[i - 1] = intMinMax_->getUpperBound() - precision();
444  else
445  break;
446  }
447  }
448 
449  // now the distribution_ map, taking care that all values are different
450 
451  double p = 1. / static_cast<double>(numberOfCategories_);
452  for (i = 0; i < numberOfCategories_; i++)
453  {
454  if (distribution_.find(values[i]) != distribution_.end())
455  {
456  int j = 1;
457  int f = ((values[i] + NumConstants::TINY()) >= intMinMax_->getUpperBound()) ? -1 : 1;
458  while (distribution_.find(values[i] + f * j * precision()) != distribution_.end())
459  {
460  j++;
461  f = ((values[i] + f * j * precision()) >= intMinMax_->getUpperBound()) ? -1 : 1;
462  }
463  distribution_[values[i] + f * j * precision()] = p;
464  }
465  else
466  distribution_[values[i]] = p;
467  }
468 
469  return;
470 }
471 
473 {
475  vb[0] = getLowerBound();
476  for (unsigned int i = 0; i < numberOfCategories_ - 1; i++)
477  {
478  vb[i + 1] = getBound(i);
479  }
481  return vb;
482 }
483 
485 {
486  const IntervalConstraint* pi = dynamic_cast<const IntervalConstraint*>(&c);
487 
488  if (!pi)
489  throw Exception("AbstractDiscreteDistribution::restrictToConstraint: the constraint is not an interval");
490 
491  if (!(*intMinMax_ <= (*pi)))
492  {
493  *intMinMax_ &= c;
494  discretize();
495  }
496 }
Comparator class for AbstractDiscreteDistribution.
Partial implementation of the DiscreteDistribution interface.
double getCategory(size_t categoryIndex) const
double getIInfCumulativeProbability(double category) const
void set(double category, double probability)
Set the probability associated to a class.
void add(double category, double probability)
Modify the probability associated to a class.
double getSSupCumulativeProbability(double category) const
double rand() const
Draw a random number from this distribution.
double getProbability(size_t categoryIndex) const
virtual void restrictToConstraint(const Constraint &c)
Restricts the distribution to the domain where the constraint is respected, in addition of other pred...
std::map< double, double, Order > distribution_
double getLowerBound() const
methods about the range of the definition
double getInfCumulativeProbability(double category) const
void setNumberOfCategories(size_t nbClasses)
sets the number of categories and discretizes if there is a change in this number.
double getSupCumulativeProbability(double category) const
AbstractDiscreteDistribution(size_t nbClasses, const std::string &prefix="")
void print(OutputStream &out) const
Print the distribution (categories and corresponding probabilities) to a stream.
AbstractDiscreteDistribution & operator=(const AbstractDiscreteDistribution &adde)
virtual void discretize()
Discretizes the distribution in equiprobable classes.
std::shared_ptr< IntervalConstraint > intMinMax_
the interval where the distribution is defined/restricted.
A partial implementation of the Parametrizable interface.
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
The constraint interface.
Definition: Constraints.h:66
virtual double Expectation(double a) const =0
Return a primitive function used for the expectation of the continuous version of the distribution,...
virtual double pProb(double x) const =0
Return the cumulative quantile of the continuous version of the distribution, ie .
virtual double qProb(double x) const =0
Return the quantile of the continuous version of the distribution, ie y such that .
Exception base class. Overload exception constructor (to control the exceptions mechanism)....
Definition: Exceptions.h:59
An interval, either bounded or not, which can also have infinite bounds.
Definition: Constraints.h:139
this static class contains several useful constant values.
Definition: NumConstants.h:57
static double TINY()
Definition: NumConstants.h:82
OutputStream interface.
Definition: OutputStream.h:67
static double giveRandomNumberBetweenZeroAndEntry(double entry, const RandomFactory &generator= *DEFAULT_GENERATOR)
Get a double random value (between 0 and specified range).
Definition: RandomTools.cpp:63
std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:153
std::vector< double > Vdouble
Definition: VectorTools.h:70