bpp-core3  3.0.0
BppODiscreteDistributionFormat.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Io/FileTools.h"
6 #include "../Numeric/AutoParameter.h"
7 #include "../Numeric/Prob/BetaDiscreteDistribution.h"
8 #include "../Numeric/Prob/ConstantDistribution.h"
9 #include "../Numeric/Prob/DiscreteDistribution.h"
10 #include "../Numeric/Prob/ExponentialDiscreteDistribution.h"
11 #include "../Numeric/Prob/GammaDiscreteDistribution.h"
12 #include "../Numeric/Prob/GaussianDiscreteDistribution.h"
13 #include "../Numeric/Prob/InvariantMixedDiscreteDistribution.h"
14 #include "../Numeric/Prob/MixtureOfDiscreteDistributions.h"
15 #include "../Numeric/Prob/SimpleDiscreteDistribution.h"
16 #include "../Numeric/Prob/TruncatedExponentialDiscreteDistribution.h"
17 #include "../Numeric/Prob/UniformDiscreteDistribution.h"
18 #include "../Text/KeyvalTools.h"
19 #include "../Text/StringTokenizer.h"
20 #include "../Text/TextTools.h"
23 
24 using namespace bpp;
25 
26 // From the STL:
27 #include <iomanip>
28 
29 using namespace std;
30 
31 
32 unique_ptr<DiscreteDistributionInterface> BppODiscreteDistributionFormat::readDiscreteDistribution(
33  const std::string& distDescription,
34  bool parseArguments)
35 {
36  unparsedArguments_.clear();
37  string distName;
38  unique_ptr<DiscreteDistributionInterface> rDist;
39  map<string, string> args;
40  KeyvalTools::parseProcedure(distDescription, distName, args);
41 
42  if ((distName == "InvariantMixed") || (distName == "Invariant"))
43  {
44  // We have to parse the nested distribution first:
45  string nestedDistDescription = args["dist"];
46  if (TextTools::isEmpty(nestedDistDescription))
47  throw Exception("BppODiscreteDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
48  if (verbose_)
49  ApplicationTools::displayResult("Invariant Mixed distribution", distName );
50  BppODiscreteDistributionFormat nestedReader(verbose_);
51  auto nestedDistribution = nestedReader.readDiscreteDistribution(nestedDistDescription, true);
52  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
53 
54  // Now we create the Invariant rate distribution:
55  rDist = make_unique<InvariantMixedDiscreteDistribution>(std::move(nestedDistribution), 0.1, 0.000001);
56 
57  // Then we update the parameter set:
58  for (auto& it : unparsedArgumentsNested)
59  {
60  unparsedArguments_["Invariant." + it.first] = it.second;
61  }
62 
63  if (args.find("p") != args.end())
64  unparsedArguments_["Invariant.p"] = args["p"];
65  }
66  else if (distName == "Constant")
67  {
68  if (args.find("value") == args.end())
69  throw Exception("Missing argument 'value' in Constant distribution");
70  rDist = make_unique<ConstantDistribution>(TextTools::to<double>(args["value"]));
71  unparsedArguments_["Constant.value"] = args["value"];
72  }
73  else if (distName == "Simple")
74  {
75  if (args.find("values") == args.end())
76  throw Exception("Missing argument 'values' in Simple distribution");
77  if (args.find("probas") == args.end())
78  throw Exception("Missing argument 'probas' in Simple distribution");
79  vector<double> probas, values;
80 
81  string rf = args["values"];
82  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
83  while (strtok.hasMoreToken())
84  values.push_back(TextTools::toDouble(strtok.nextToken()));
85 
86  rf = args["probas"];
87  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
88  while (strtok2.hasMoreToken())
89  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
90 
91  std::map<size_t, std::vector<double>> ranges;
92 
93  if (args.find("ranges") != args.end())
94  {
95  string rr = args["ranges"];
96  StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
97  string desc;
98  double deb, fin;
99  unsigned int num;
100  size_t po, pf, ppv;
101  while (strtok3.hasMoreToken())
102  {
103  desc = strtok3.nextToken();
104  po = desc.find("[");
105  ppv = desc.find(";");
106  pf = desc.find("]");
107  num = (unsigned int)(TextTools::toInt(desc.substr(1, po - 1)));
108  deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
109  fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
110  vector<double> vd;
111  vd.push_back(deb);
112  vd.push_back(fin);
113  ranges[num] = vd;
114  }
115  }
116  if (ranges.size() == 0)
117  rDist = make_unique<SimpleDiscreteDistribution>(values, probas);
118  else
119  rDist = make_unique<SimpleDiscreteDistribution>(values, ranges, probas);
120 
121  vector<string> v = rDist->getParameters().getParameterNames();
122 
123  for (auto i : v)
124  {
125  unparsedArguments_[i] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(i)));
126  }
127  }
128  else if (distName == "Mixture")
129  {
130  if (args.find("probas") == args.end())
131  throw Exception("Missing argument 'probas' in Mixture distribution");
132  vector<double> probas;
133  vector<unique_ptr<DiscreteDistributionInterface>> v_pdd;
134  unique_ptr<DiscreteDistributionInterface> pdd;
135  string rf = args["probas"];
136  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
137  while (strtok2.hasMoreToken())
138  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
139 
140  vector<string> v_nestedDistrDescr;
141 
142  unsigned int nbd = 0;
143  while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
144  v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
145 
146  if (v_nestedDistrDescr.size() != probas.size())
147  throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
148 
149  BppODiscreteDistributionFormat nestedReader;
150 
151  for (unsigned i = 0; i < v_nestedDistrDescr.size(); ++i)
152  {
153  pdd = nestedReader.readDiscreteDistribution(v_nestedDistrDescr[i], true);
154  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
155 
156  for (auto& it : unparsedArgumentsNested)
157  {
158  unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
159  }
160  v_pdd.push_back(std::move(pdd));
161  }
162  rDist = make_unique<MixtureOfDiscreteDistributions>(v_pdd, probas);
163  }
164  else
165  {
166  if (args.find("n") == args.end())
167  throw Exception("Missing argument 'n' (number of classes) in " + distName
168  + " distribution");
169  unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
170 
171  if (distName == "Gamma")
172  {
173  double offset = 0;
174 
175  if (args.find("offset") != args.end())
176  try
177  {
178  offset = TextTools::toDouble(args["offset"]);
179  }
180  catch (Exception& e)
181  {}
182 
183  if (args.find("ParamOffset") != args.end())
184  rDist.reset(new GammaDiscreteDistribution(nbClasses, 1, 1, true, offset));
185  else
186  rDist.reset(new GammaDiscreteDistribution(nbClasses, 1, 1, false, offset));
187 
188  if (args.find("alpha") != args.end())
189  unparsedArguments_["Gamma.alpha"] = args["alpha"];
190  if (args.find("beta") != args.end())
191  unparsedArguments_["Gamma.beta"] = args["beta"];
192  if (args.find("offset") != args.end())
193  unparsedArguments_["Gamma.offset"] = args["offset"];
194  }
195  else if (distName == "Gaussian")
196  {
197  rDist.reset(new GaussianDiscreteDistribution(nbClasses, 0, 1));
198  if (args.find("mu") != args.end())
199  unparsedArguments_["Gaussian.mu"] = args["mu"];
200  if (args.find("sigma") != args.end())
201  unparsedArguments_["Gaussian.sigma"] = args["sigma"];
202  }
203  else if (distName == "Beta")
204  {
205  rDist.reset(new BetaDiscreteDistribution(nbClasses, 1, 1));
206  if (args.find("alpha") != args.end())
207  unparsedArguments_["Beta.alpha"] = args["alpha"];
208  if (args.find("beta") != args.end())
209  unparsedArguments_["Beta.beta"] = args["beta"];
210  }
211  else if (distName == "Exponential")
212  {
213  rDist.reset(new ExponentialDiscreteDistribution(nbClasses, 1));
214  if (args.find("lambda") != args.end())
215  unparsedArguments_["Exponential.lambda"] = args["lambda"];
216  if (args.find("median") != args.end())
217  rDist->setMedian(true);
218  }
219  else if (distName == "TruncExponential")
220  {
221  rDist.reset(new TruncatedExponentialDiscreteDistribution(nbClasses, 1, 0));
222 
223  if (args.find("median") != args.end())
224  rDist->setMedian(true);
225  if (args.find("lambda") != args.end())
226  unparsedArguments_["TruncExponential.lambda"] = args["lambda"];
227  if (args.find("tp") != args.end())
228  unparsedArguments_["TruncExponential.tp"] = args["tp"];
229  }
230  else if (distName == "Uniform")
231  {
232  if (args.find("begin") == args.end())
233  throw Exception("Missing argument 'begin' in Uniform distribution");
234  if (args.find("end") == args.end())
235  throw Exception("Missing argument 'end' in Uniform distribution");
236  rDist.reset(new UniformDiscreteDistribution(
237  nbClasses,
238  TextTools::to<double>(args["begin"]),
239  TextTools::to<double>(args["end"])));
240  }
241  else
242  {
243  throw Exception("Unknown distribution: " + distName + ".");
244  }
245  }
246  if (verbose_)
247  {
248  ApplicationTools::displayResult("Distribution", distName);
250  }
251 
252  if (parseArguments)
253  initialize_(*rDist);
254 
255  return rDist;
256 }
257 
258 
260  const DiscreteDistributionInterface& dist,
261  OutputStream& out,
262  std::map<std::string, std::string>& globalAliases,
263  std::vector<std::string>& writtenNames) const
264 {
265  bool comma = false;
266 
267  out << dist.getName() + "(";
268 
269  auto* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
270  if (invar)
271  {
272  auto& pd = invar->variableSubDistribution();
273  out << "dist=";
274  writeDiscreteDistribution(pd, out, globalAliases, writtenNames);
275  comma = true;
276  }
277  else
278  {
279  try
280  {
281  auto& mix = dynamic_cast<const MixtureOfDiscreteDistributions&>(dist);
282  size_t nd = mix.getNumberOfDistributions();
283  for (size_t i = 0; i < nd; ++i)
284  {
285  if (comma)
286  out << ",";
287  out << "dist" + TextTools::toString(i + 1) + "=";
288  writeDiscreteDistribution(mix.nDistribution(i), out, globalAliases, writtenNames);
289  comma = true;
290  }
291  out << ",probas=(";
292  for (size_t i = 0; i < nd; ++i)
293  {
294  out << mix.getNProbability(i);
295  if (i != nd - 1)
296  out << ",";
297  }
298  out << ")";
299  for (size_t i = 1; i < nd; ++i)
300  {
301  writtenNames.push_back(mix.getNamespace() + "theta" + TextTools::toString(i));
302  }
303  }
304  catch (bad_cast&)
305  {}
306  }
307 
308  if (dynamic_cast<const BetaDiscreteDistribution*>(&dist) ||
309  dynamic_cast<const ExponentialDiscreteDistribution*>(&dist) ||
310  dynamic_cast<const GammaDiscreteDistribution*>(&dist) ||
311  dynamic_cast<const GaussianDiscreteDistribution*>(&dist) ||
312  dynamic_cast<const TruncatedExponentialDiscreteDistribution*>(&dist) ||
313  dynamic_cast<const UniformDiscreteDistribution*>(&dist))
314  {
315  if (comma)
316  out << ",";
317  out << "n=" << dist.getNumberOfCategories();
318  comma = true;
319  }
320 
321  try
322  {
323  auto& pc = dynamic_cast<const ConstantDistribution&>(dist);
324  if (dist.getNumberOfParameters() == 0)
325  {
326  if (comma)
327  out << ",";
328  out << "value=" << pc.getLowerBound();
329  comma = true;
330  }
331  }
332  catch (bad_cast&)
333  {}
334 
335  try
336  {
337  auto& ps = dynamic_cast<const SimpleDiscreteDistribution&>(dist);
338  size_t nd = ps.getNumberOfCategories();
339  if (comma)
340  out << ",";
341  out << "values=(";
342  for (size_t i = 0; i < nd; ++i)
343  {
344  out << ps.getCategory(i);
345  if (i != nd - 1)
346  out << ",";
347  }
348  out << "),probas=(";
349  for (size_t i = 0; i < nd; ++i)
350  {
351  out << ps.getProbability(i);
352  if (i != nd - 1)
353  out << ",";
354  }
355  out << ")";
356 
357  auto range = ps.getRanges();
358  if (range.size() != 0)
359  {
360  out << ",ranges=(";
361  auto it(range.begin());
362  while (it != range.end())
363  {
364  out << "V" << TextTools::toString(it->first);
365  out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
366  it++;
367  if (it != range.end())
368  out << ",";
369  }
370  out << ")";
371  }
372 
373  for (size_t i = 1; i < nd; ++i)
374  {
375  writtenNames.push_back(ps.getNamespace() + "theta" + TextTools::toString(i));
376  }
377  for (size_t i = 1; i < nd + 1; ++i)
378  {
379  writtenNames.push_back(ps.getNamespace() + "V" + TextTools::toString(i));
380  }
381  comma = true;
382  }
383  catch (bad_cast&)
384  {}
385 
386  // Writing the parameters
388  bOP.write(dynamic_cast<const ParameterAliasable&>(dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
389  out << ")";
390 }
391 
394 {
396 
397  for (size_t i = 0; i < pl.size(); ++i)
398  {
399  AutoParameter ap(pl[i]);
401  pl.setParameter(i, ap);
402  }
403 
404  for (size_t i = 0; i < pl.size(); ++i)
405  {
406  const string pName = pl[i].getName();
407  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue());
408  pl[i].setValue(value);
409  if (verbose_)
410  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
411  }
412 
413  rDist.matchParametersValues(pl);
414  if (verbose_)
415  {
416  for (size_t c = 0; c < rDist.getNumberOfCategories(); ++c)
417  {
419  + " (Pr = " + TextTools::toString(rDist.getProbability(c)) + ") rate",
420  TextTools::toString(rDist.getCategory(c)));
421  }
422  }
423 }
virtual bool matchParametersValues(const ParameterList &parameters)=0
Update the parameters from parameters.
const DiscreteDistributionInterface & variableSubDistribution() const
virtual std::string getName() const =0
Get the name of the distribution.
A tokenizer for strings.
double toDouble(const std::string &s, char dec, char scientificNotation)
Convert from string to double.
Definition: TextTools.cpp:217
virtual void setMedian(bool median)=0
Sets the median value to true to say that the value in a class is proportional to the median value of...
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
virtual double getParameterValue(const std::string &name) const =0
Get the value for parameter of name &#39;name&#39;.
bool hasMoreToken() const
Tell if some tokens are still available.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments=true)
Read a discrete distribution from a string.
static void displayResult(const std::string &text, const T &result)
Print a result message.
Parametrizable output in BppO format.
virtual const ParameterList & getParameters() const =0
Get all parameters available.
size_t size() const
Definition: ParameterList.h:56
STL namespace.
Interface for discrete distribution objects.
Discretized Gaussian distribution.
Discretized Beta distribution with parameters alpha and beta, on a given interval. On default, the interval is , but it can be restricted.
virtual double getProbability(size_t categoryIndex) const =0
Discretized Exponential distribution.
A Discrete distribution object defined by a vector of Discrete Distributions and a set of probabiliti...
The parameter list object.
Definition: ParameterList.h:27
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Write a Parametrizable to a stream.
A Discrete distribution object, where some specific probabilities are assigned to a finite set of val...
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
Get a double parameter.
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Set the message handler for this AutoParameter.
Definition: AutoParameter.h:95
size_t getNumberOfDistributions() const
Returns the number of discrete distributions in the mixture.
static std::shared_ptr< OutputStream > warning
The output stream where warnings have to be displayed.
virtual std::vector< std::string > getParameterNames() const
Get all parameter names in the list.
virtual size_t getNumberOfCategories() const =0
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a discrete distribution to a stream.
The AutoParameter class.
Definition: AutoParameter.h:23
virtual const ParameterList & getIndependentParameters() const =0
Get the minimal list of parameters to set the model.
const std::map< std::string, std::string > & getUnparsedArguments() const
void initialize_(DiscreteDistributionInterface &rDist)
Set parameter initial values of a given distribution according to options.
OutputStream interface.
Definition: OutputStream.h:29
virtual void setParameter(size_t index, const Parameter &param)
Change given parameter.
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const =0
Resolves a parameter name according to the current namespace.
Discretized Uniform distribution. All categories are equidistributed all along a given interval...
Exception base class. Overload exception constructor (to control the exceptions mechanism). Destructor is already virtual (from std::exception)
Definition: Exceptions.h:20
Discretized Gamma distribution with an offset.
int toInt(const std::string &s, char scientificNotation)
Convert from string to int.
Definition: TextTools.cpp:208
Constant discrete distribution.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
Parse (not recursively) a procedure string.
Discrete mixed distribution, with a one-category fixed value (called "invariant") and a user-specifie...
virtual double getCategory(size_t categoryIndex) const =0
bool isEmpty(const std::string &s)
Tell if a string is empty. A string is considered to be &#39;empty&#39; if it is only made of white spaces...
Definition: TextTools.cpp:20
std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:115
Discrete Distribution I/O in BppO format.
virtual size_t getNumberOfParameters() const =0
Get the number of parameters.
Discretized Truncated (on the right) Exponential distribution, where the probabilities are given the ...