bpp-core3  3.0.0
BppODiscreteDistributionFormat.cpp
Go to the documentation of this file.
1 //
2 // File: BppODiscreteDistributionFormat.cpp
3 // Authors:
4 // Laurent Guéguen
5 // Created: lundi 3 septembre 2012, à 14h 48
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 
42 #include "../Io/FileTools.h"
43 #include "../Numeric/AutoParameter.h"
44 #include "../Numeric/Prob/BetaDiscreteDistribution.h"
45 #include "../Numeric/Prob/ConstantDistribution.h"
46 #include "../Numeric/Prob/DiscreteDistribution.h"
47 #include "../Numeric/Prob/ExponentialDiscreteDistribution.h"
48 #include "../Numeric/Prob/GammaDiscreteDistribution.h"
49 #include "../Numeric/Prob/GaussianDiscreteDistribution.h"
50 #include "../Numeric/Prob/InvariantMixedDiscreteDistribution.h"
51 #include "../Numeric/Prob/MixtureOfDiscreteDistributions.h"
52 #include "../Numeric/Prob/SimpleDiscreteDistribution.h"
53 #include "../Numeric/Prob/TruncatedExponentialDiscreteDistribution.h"
54 #include "../Numeric/Prob/UniformDiscreteDistribution.h"
55 #include "../Text/KeyvalTools.h"
56 #include "../Text/StringTokenizer.h"
57 #include "../Text/TextTools.h"
60 
61 using namespace bpp;
62 
63 // From the STL:
64 #include <iomanip>
65 
66 using namespace std;
67 
68 
70  const std::string& distDescription,
71  bool parseArguments)
72 {
73  unparsedArguments_.clear();
74  string distName;
75  unique_ptr<DiscreteDistribution> rDist;
76  map<string, string> args;
77  KeyvalTools::parseProcedure(distDescription, distName, args);
78 
79  if ((distName == "InvariantMixed") || (distName == "Invariant"))
80  {
81  // We have to parse the nested distribution first:
82  string nestedDistDescription = args["dist"];
83  if (TextTools::isEmpty(nestedDistDescription))
84  throw Exception("BppODiscreteDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
85  if (verbose_)
86  ApplicationTools::displayResult("Invariant Mixed distribution", distName );
87  BppODiscreteDistributionFormat nestedReader(verbose_);
88  DiscreteDistribution* nestedDistribution = nestedReader.readDiscreteDistribution(nestedDistDescription, true);
89  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
90 
91  // Now we create the Invariant rate distribution:
92  rDist.reset(new InvariantMixedDiscreteDistribution(nestedDistribution, 0.1, 0.000001));
93 
94  // Then we update the parameter set:
95  for (map<string, string>::iterator it = unparsedArgumentsNested.begin();
96  it != unparsedArgumentsNested.end(); it++)
97  {
98  unparsedArguments_["Invariant." + it->first] = it->second;
99  }
100 
101  if (args.find("p") != args.end())
102  unparsedArguments_["Invariant.p"] = args["p"];
103  }
104  else if (distName == "Constant")
105  {
106  if (args.find("value") == args.end())
107  throw Exception("Missing argument 'value' in Constant distribution");
108  rDist.reset(new ConstantDistribution(TextTools::to<double>(args["value"])));
109  unparsedArguments_["Constant.value"] = args["value"];
110  }
111  else if (distName == "Simple")
112  {
113  if (args.find("values") == args.end())
114  throw Exception("Missing argument 'values' in Simple distribution");
115  if (args.find("probas") == args.end())
116  throw Exception("Missing argument 'probas' in Simple distribution");
117  vector<double> probas, values;
118 
119  string rf = args["values"];
120  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
121  while (strtok.hasMoreToken())
122  values.push_back(TextTools::toDouble(strtok.nextToken()));
123 
124  rf = args["probas"];
125  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
126  while (strtok2.hasMoreToken())
127  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
128 
129  std::map<size_t, std::vector<double> > ranges;
130 
131  if (args.find("ranges") != args.end())
132  {
133  string rr = args["ranges"];
134  StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
135  string desc;
136  double deb, fin;
137  unsigned int num;
138  size_t po, pf, ppv;
139  while (strtok3.hasMoreToken())
140  {
141  desc = strtok3.nextToken();
142  po = desc.find("[");
143  ppv = desc.find(";");
144  pf = desc.find("]");
145  num = (unsigned int)(TextTools::toInt(desc.substr(1, po - 1)));
146  deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
147  fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
148  vector<double> vd;
149  vd.push_back(deb);
150  vd.push_back(fin);
151  ranges[num] = vd;
152  }
153  }
154  if (ranges.size() == 0)
155  rDist.reset(new SimpleDiscreteDistribution(values, probas));
156  else
157  rDist.reset(new SimpleDiscreteDistribution(values, ranges, probas));
158 
159  vector<string> v = rDist->getParameters().getParameterNames();
160 
161  for (unsigned int i = 0; i < v.size(); i++)
162  {
163  unparsedArguments_[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
164  }
165  }
166  else if (distName == "Mixture")
167  {
168  if (args.find("probas") == args.end())
169  throw Exception("Missing argument 'probas' in Mixture distribution");
170  vector<double> probas;
171  vector<DiscreteDistribution*> v_pdd;
173  string rf = args["probas"];
174  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
175  while (strtok2.hasMoreToken())
176  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
177 
178  vector<string> v_nestedDistrDescr;
179 
180  unsigned int nbd = 0;
181  while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
182  v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
183 
184  if (v_nestedDistrDescr.size() != probas.size())
185  throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
186 
187  BppODiscreteDistributionFormat nestedReader;
188 
189  for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
190  {
191  pdd = nestedReader.readDiscreteDistribution(v_nestedDistrDescr[i], true);
192  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
193 
194  for (map<string, string>::iterator it = unparsedArgumentsNested.begin(); it != unparsedArgumentsNested.end(); it++)
195  {
196  unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
197  }
198  v_pdd.push_back(pdd);
199  }
200  rDist.reset(new MixtureOfDiscreteDistributions(v_pdd, probas));
201  }
202  else
203  {
204  if (args.find("n") == args.end())
205  throw Exception("Missing argument 'n' (number of classes) in " + distName
206  + " distribution");
207  unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
208 
209  if (distName == "Gamma")
210  {
211  double offset = 0;
212 
213  if (args.find("offset") != args.end())
214  try
215  {
216  offset = TextTools::toDouble(args["offset"]);
217  }
218  catch (Exception& e)
219  {}
220 
221  if (args.find("ParamOffset") != args.end())
222  rDist.reset(new GammaDiscreteDistribution(nbClasses, 1, 1, true, offset));
223  else
224  rDist.reset(new GammaDiscreteDistribution(nbClasses, 1, 1, false, offset));
225 
226  if (args.find("alpha") != args.end())
227  unparsedArguments_["Gamma.alpha"] = args["alpha"];
228  if (args.find("beta") != args.end())
229  unparsedArguments_["Gamma.beta"] = args["beta"];
230  if (args.find("offset") != args.end())
231  unparsedArguments_["Gamma.offset"] = args["offset"];
232  }
233  else if (distName == "Gaussian")
234  {
235  rDist.reset(new GaussianDiscreteDistribution(nbClasses, 0, 1));
236  if (args.find("mu") != args.end())
237  unparsedArguments_["Gaussian.mu"] = args["mu"];
238  if (args.find("sigma") != args.end())
239  unparsedArguments_["Gaussian.sigma"] = args["sigma"];
240  }
241  else if (distName == "Beta")
242  {
243  rDist.reset(new BetaDiscreteDistribution(nbClasses, 1, 1));
244  if (args.find("alpha") != args.end())
245  unparsedArguments_["Beta.alpha"] = args["alpha"];
246  if (args.find("beta") != args.end())
247  unparsedArguments_["Beta.beta"] = args["beta"];
248  }
249  else if (distName == "Exponential")
250  {
251  rDist.reset(new ExponentialDiscreteDistribution(nbClasses, 1));
252  if (args.find("lambda") != args.end())
253  unparsedArguments_["Exponential.lambda"] = args["lambda"];
254  if (args.find("median") != args.end())
255  rDist->setMedian(true);
256  }
257  else if (distName == "TruncExponential")
258  {
259  rDist.reset(new TruncatedExponentialDiscreteDistribution(nbClasses, 1, 0));
260 
261  if (args.find("median") != args.end())
262  rDist->setMedian(true);
263  if (args.find("lambda") != args.end())
264  unparsedArguments_["TruncExponential.lambda"] = args["lambda"];
265  if (args.find("tp") != args.end())
266  unparsedArguments_["TruncExponential.tp"] = args["tp"];
267  }
268  else if (distName == "Uniform")
269  {
270  if (args.find("begin") == args.end())
271  throw Exception("Missing argument 'begin' in Uniform distribution");
272  if (args.find("end") == args.end())
273  throw Exception("Missing argument 'end' in Uniform distribution");
274  rDist.reset(new UniformDiscreteDistribution(
275  nbClasses,
276  TextTools::to<double>(args["begin"]),
277  TextTools::to<double>(args["end"])));
278  }
279  else
280  {
281  throw Exception("Unknown distribution: " + distName + ".");
282  }
283  }
284  if (verbose_)
285  {
286  ApplicationTools::displayResult("Distribution", distName);
287  ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
288  }
289 
290  if (parseArguments)
291  initialize_(*rDist);
292 
293  return rDist.release();
294 }
295 
296 
298  const DiscreteDistribution& dist,
299  OutputStream& out,
300  std::map<std::string, std::string>& globalAliases,
301  std::vector<std::string>& writtenNames) const
302 {
303  bool comma = false;
304 
305  const DiscreteDistribution* pd;
306 
307  out << dist.getName() + "(";
308 
309  const InvariantMixedDiscreteDistribution* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
310  if (invar)
311  {
312  pd = invar->getVariableSubDistribution();
313  out << "dist=";
314  writeDiscreteDistribution(*pd, out, globalAliases, writtenNames);
315  comma = true;
316  }
317  else
318  {
319  const MixtureOfDiscreteDistributions* mix = dynamic_cast<const MixtureOfDiscreteDistributions*>(&dist);
320  if (mix)
321  {
322  size_t nd = mix->getNumberOfDistributions();
323  for (size_t i = 0; i < nd; i++)
324  {
325  if (comma)
326  out << ",";
327  out << "dist" + TextTools::toString(i + 1) + "=";
328  writeDiscreteDistribution(*mix->getNDistribution(i), out, globalAliases, writtenNames);
329  comma = true;
330  }
331  out << ",probas=(";
332  for (size_t i = 0; i < nd; i++)
333  {
334  out << mix->getNProbability(i);
335  if (i != nd - 1)
336  out << ",";
337  }
338  out << ")";
339  for (size_t i = 1; i < nd; i++)
340  {
341  writtenNames.push_back(mix->getNamespace() + "theta" + TextTools::toString(i));
342  }
343  }
344  }
345 
346  if (dynamic_cast<const BetaDiscreteDistribution*>(&dist) ||
347  dynamic_cast<const ExponentialDiscreteDistribution*>(&dist) ||
348  dynamic_cast<const GammaDiscreteDistribution*>(&dist) ||
349  dynamic_cast<const GaussianDiscreteDistribution*>(&dist) ||
350  dynamic_cast<const TruncatedExponentialDiscreteDistribution*>(&dist) ||
351  dynamic_cast<const UniformDiscreteDistribution*>(&dist))
352  {
353  if (comma)
354  out << ",";
355  out << "n=" << dist.getNumberOfCategories();
356  comma = true;
357  }
358 
359  const ConstantDistribution* pc = dynamic_cast<const ConstantDistribution*>(&dist);
360  if (pc && dist.getNumberOfParameters() == 0)
361  {
362  if (comma)
363  out << ",";
364  out << "value=" << pc->getLowerBound();
365  comma = true;
366  }
367 
368  const SimpleDiscreteDistribution* ps = dynamic_cast<const SimpleDiscreteDistribution*>(&dist);
369  if (ps)
370  {
371  size_t nd = ps->getNumberOfCategories();
372  if (comma)
373  out << ",";
374  out << "values=(";
375  for (size_t i = 0; i < nd; i++)
376  {
377  out << ps->getCategory(i);
378  if (i != nd - 1)
379  out << ",";
380  }
381  out << "),probas=(";
382  for (size_t i = 0; i < nd; i++)
383  {
384  out << ps->getProbability(i);
385  if (i != nd - 1)
386  out << ",";
387  }
388  out << ")";
389 
390  const std::map<size_t, std::vector<double> > range = ps->getRanges();
391  if (range.size() != 0)
392  {
393  out << ",ranges=(";
394  std::map<size_t, std::vector<double> >::const_iterator it(range.begin());
395  while (it != range.end())
396  {
397  out << "V" << TextTools::toString(it->first);
398  out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
399  it++;
400  if (it != range.end())
401  out << ",";
402  }
403  out << ")";
404  }
405 
406  for (size_t i = 1; i < nd; i++)
407  {
408  writtenNames.push_back(ps->getNamespace() + "theta" + TextTools::toString(i));
409  }
410  for (size_t i = 1; i < nd + 1; i++)
411  {
412  writtenNames.push_back(ps->getNamespace() + "V" + TextTools::toString(i));
413  }
414  comma = true;
415  }
416 
417  // Writing the parameters
419  bOP.write(dynamic_cast<const ParameterAliasable*>(&dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
420  out << ")";
421 }
422 
424  DiscreteDistribution& rDist)
425 {
427 
428  for (size_t i = 0; i < pl.size(); ++i)
429  {
430  AutoParameter ap(pl[i]);
432  pl.setParameter(i, ap);
433  }
434 
435  for (size_t i = 0; i < pl.size(); ++i)
436  {
437  const string pName = pl[i].getName();
438  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue());
439  pl[i].setValue(value);
440  if (verbose_)
441  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
442  }
443 
444  rDist.matchParametersValues(pl);
445  if (verbose_)
446  {
447  for (size_t c = 0; c < rDist.getNumberOfCategories(); ++c)
448  {
450  + " (Pr = " + TextTools::toString(rDist.getProbability(c)) + ") rate",
451  TextTools::toString(rDist.getCategory(c)));
452  }
453  }
454 }
double getCategory(size_t categoryIndex) const
double getProbability(size_t categoryIndex) const
static std::shared_ptr< OutputStream > warning
The output stream where warnings have to be displayed.
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
Get a double parameter.
static void displayResult(const std::string &text, const T &result)
Print a result message.
The AutoParameter class.
Definition: AutoParameter.h:61
virtual void setMessageHandler(OutputStream *mh)
Set the message handler for this AutoParameter.
Discretized Beta distribution with parameters alpha and beta, on a given interval....
Discrete Distribution I/O in BppO format.
DiscreteDistribution * readDiscreteDistribution(const std::string &distDescription, bool parseArguments=true)
Read a discrete distribution from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const
void initialize_(DiscreteDistribution &rDist)
Set parameter initial values of a given distribution according to options.
void writeDiscreteDistribution(const DiscreteDistribution &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a discrete distribution to a stream.
Parametrizable output in BppO format.
void write(const Parametrizable *parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const
Write a Parametrizable to a stream.
Constant discrete distribution.
double getLowerBound() const
methods about the range of the definition
Interface for discrete distribution objects.
virtual double getProbability(size_t categoryIndex) const =0
virtual double getCategory(size_t categoryIndex) const =0
virtual std::string getName() const =0
Get the name of the distribution.
virtual size_t getNumberOfCategories() const =0
Exception base class. Overload exception constructor (to control the exceptions mechanism)....
Definition: Exceptions.h:59
Discretized Exponential distribution.
Discretized Gamma distribution with an offset.
Discretized Gaussian distribution.
Discrete mixed distribution, with a one-category fixed value (called "invariant") and a user-specifie...
const DiscreteDistribution * getVariableSubDistribution() const
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
Parse (not recursively) a procedure string.
A Discrete distribution object defined by a vector of Discrete Distributions and a set of probabiliti...
double getNProbability(size_t n) const
Returns the probability of the n-th discrete distribution in the mixture.
size_t getNumberOfDistributions() const
Returns the number of discrete distributions in the mixture.
const DiscreteDistribution * getNDistribution(size_t n) const
Returns a pointer to the n-th discrete distribution in the mixture.
OutputStream interface.
Definition: OutputStream.h:67
Extend the Parametrizable interface with support for parameter aliases.
virtual const ParameterList & getIndependentParameters() const =0
Get the minimal list of parameters to set the model.
The parameter list object.
Definition: ParameterList.h:65
size_t size() const
Definition: ParameterList.h:92
virtual std::vector< std::string > getParameterNames() const
Get all parameter names in the list.
virtual void setParameter(size_t index, const Parameter &param)
Change given parameter.
virtual size_t getNumberOfParameters() const =0
Get the number of parameters.
virtual bool matchParametersValues(const ParameterList &parameters)=0
Update the parameters from parameters.
A Discrete distribution object, where some specific probabilities are assigned to a finite set of val...
const std::map< size_t, std::vector< double > > getRanges() const
A tokenizer for strings.
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
bool hasMoreToken() const
Tell if some tokens are still available.
Discretized Truncated (on the right) Exponential distribution, where the probabilities are given the ...
Discretized Uniform distribution. All categories are equidistributed all along a given interval.
int toInt(const std::string &s, char scientificNotation)
Convert from string to int.
Definition: TextTools.cpp:246
double toDouble(const std::string &s, char dec, char scientificNotation)
Convert from string to double.
Definition: TextTools.cpp:255
bool isEmpty(const std::string &s)
Tell if a string is empty. A string is considered to be 'empty' if it is only made of white spaces.
Definition: TextTools.cpp:58
std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:153