bpp-phyl3  3.0.0
BppORateDistributionFormat.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Model/RateDistribution/ConstantRateDistribution.h"
6 #include "../Model/RateDistribution/ExponentialDiscreteRateDistribution.h"
7 #include "../Model/RateDistribution/GammaDiscreteRateDistribution.h"
8 #include "../Model/RateDistribution/GaussianDiscreteRateDistribution.h"
10 
11 // From bpp-core:
12 #include <Bpp/Text/KeyvalTools.h>
17 
18 using namespace bpp;
19 
20 // From the STL:
21 #include <iomanip>
22 
23 using namespace std;
24 
25 
26 unique_ptr<DiscreteDistributionInterface> BppORateDistributionFormat::readDiscreteDistribution(
27  const std::string& distDescription,
28  bool parseArguments)
29 {
30  unparsedArguments_.clear();
31  string distName;
32  map<string, string> args;
33  KeyvalTools::parseProcedure(distDescription, distName, args);
34  unique_ptr<DiscreteDistributionInterface> rDist;
35 
36  if (distName == "Uniform")
37  throw Exception("BppO Warning: Uniform distribution is deprecated, use Constant instead.");
38 
39  if ((distName == "InvariantMixed") || (distName == "Invariant"))
40  {
41  // We have to parse the nested distribution first:
42  string nestedDistDescription = args["dist"];
43  if (TextTools::isEmpty(nestedDistDescription))
44  throw Exception("BppORateDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
45  if (verbose_)
46  ApplicationTools::displayResult("Invariant Mixed distribution", distName );
47  BppORateDistributionFormat nestedReader(false);
48  auto nestedDistribution = nestedReader.readDiscreteDistribution(nestedDistDescription, false);
49  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
50 
51  // Now we create the Invariant rate distribution:
52  rDist = make_unique<InvariantMixedDiscreteDistribution>(std::move(nestedDistribution), 0.1, 0.000001);
53 
54  // Then we update the parameter set:
55  for (auto it : unparsedArgumentsNested)
56  {
57  unparsedArguments_["Invariant." + it.first] = it.second;
58  }
59 
60  if (args.find("p") != args.end())
61  unparsedArguments_["Invariant.p"] = args["p"];
62  }
63  else if (distName == "Constant")
64  {
65  if (!allowConstant_)
66  throw Exception("BppORateDistributionFormat::read(). Constant distribution not allowed.");
67 
68  if (args.find("value") != args.end())
69  ApplicationTools::displayMessage("Found argument 'value' in Constant distribution. Constant distribution is defined to have an average of 1.");
70  rDist = make_unique<ConstantRateDistribution>();
71  }
72  else if (distName == "Simple")
73  {
74  if (args.find("values") == args.end())
75  throw Exception("Missing argument 'values' in Simple distribution");
76  if (args.find("probas") == args.end())
77  throw Exception("Missing argument 'probas' in Simple distribution");
78  vector<double> probas, values;
79 
80  string rf = args["values"];
81  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
82  while (strtok.hasMoreToken())
83  values.push_back(TextTools::toDouble(strtok.nextToken()));
84 
85  rf = args["probas"];
86  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
87  while (strtok2.hasMoreToken())
88  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
89 
90  std::map<size_t, std::vector<double>> ranges;
91 
92  if (args.find("ranges") != args.end())
93  {
94  string rr = args["ranges"];
95  StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
96  string desc;
97  double deb, fin;
98  size_t num;
99  size_t po, pf, ppv;
100  while (strtok3.hasMoreToken())
101  {
102  desc = strtok3.nextToken();
103  po = desc.find("[");
104  ppv = desc.find(";");
105  pf = desc.find("]");
106  num = (size_t)(TextTools::toInt(desc.substr(1, po - 1)));
107  deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
108  fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
109  vector<double> vd;
110  vd.push_back(deb);
111  vd.push_back(fin);
112  ranges[num] = vd;
113  }
114  }
115  if (ranges.size() == 0)
116  rDist = make_unique<SimpleDiscreteDistribution>(values, probas);
117  else
118  rDist = make_unique<SimpleDiscreteDistribution>(values, ranges, probas);
119 
120  vector<string> v = rDist->getParameters().getParameterNames();
121 
122  for (size_t i = 0; i < v.size(); ++i)
123  {
124  unparsedArguments_[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
125  }
126  }
127  else if (distName == "Mixture")
128  {
129  if (args.find("probas") == args.end())
130  throw Exception("Missing argument 'probas' in Mixture distribution");
131  vector<double> probas;
132  vector<unique_ptr<DiscreteDistributionInterface>> v_pdd;
133  string rf = args["probas"];
134  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
135  while (strtok2.hasMoreToken())
136  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
137 
138  vector<string> v_nestedDistrDescr;
139 
140  size_t nbd = 0;
141  while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
142  v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
143 
144  if (v_nestedDistrDescr.size() != probas.size())
145  throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
146 
147  for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
148  {
149  BppORateDistributionFormat nestedReader(false);
150  auto pdd = nestedReader.readDiscreteDistribution(v_nestedDistrDescr[i], false);
151  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
152 
153  for (auto& it : unparsedArgumentsNested)
154  {
155  unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
156  }
157  v_pdd.push_back(std::move(pdd));
158  }
159  rDist = make_unique<MixtureOfDiscreteDistributions>(v_pdd, probas);
160  }
161  else
162  {
163  if (args.find("n") == args.end())
164  throw Exception("Missing argument 'n' (number of classes) in " + distName
165  + " distribution");
166  size_t nbClasses = TextTools::to<size_t>(args["n"]);
167 
168  if (distName == "Gamma")
169  {
170  rDist = make_unique<GammaDiscreteRateDistribution>(nbClasses, 1.);
171 
172  if (args.find("alpha") != args.end())
173  unparsedArguments_["Gamma.alpha"] = args["alpha"];
174  if (args.find("beta") != args.end())
175  throw Exception("Found argument 'beta' in Gamma distribution. Gamma distribution is defined to have an average of 1, with beta=alpha.");
176  }
177  else if (distName == "Gaussian")
178  {
179  rDist = make_unique<GaussianDiscreteRateDistribution>(nbClasses, 1);
180  if (args.find("mu") != args.end())
181  throw Exception("Found argument 'mu' in Gaussian distribution. Gaussian distribution is defined to have an average of 1, with mu=1.");
182  if (args.find("sigma") != args.end())
183  unparsedArguments_["Gaussian.sigma"] = args["sigma"];
184  }
185  else if (distName == "Exponential")
186  {
187  rDist = make_unique<ExponentialDiscreteRateDistribution>(nbClasses);
188  if (args.find("lambda") != args.end())
189  throw Exception("Found argument 'lambda' in Exponential distribution. Exponential distribution is defined to have an average of 1, with lambda=1.");
190  }
191  else
192  {
193  throw Exception("Unsupported rate distribution: " + distName + ".");
194  }
195  }
196  if (verbose_)
197  {
198  ApplicationTools::displayResult("Distribution", distName);
199  ApplicationTools::displayResult("Number of classes", TextTools::toString((int)rDist->getNumberOfCategories()));
200  }
201 
202  if (parseArguments)
203  initialize_(*rDist);
204 
205  return rDist;
206 }
207 
208 
210  const DiscreteDistributionInterface& dist,
211  OutputStream& out,
212  std::map<std::string, std::string>& globalAliases,
213  std::vector<std::string>& writtenNames) const
214 {
215  bool comma = false;
216 
217 
218  out << dist.getName() + "(";
219 
220  auto* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
221  if (invar)
222  {
223  auto& pd = invar->variableSubDistribution();
224  out << "dist=";
225  writeDiscreteDistribution(pd, out, globalAliases, writtenNames);
226  comma = true;
227  }
228  else
229  {
230  try
231  {
232  auto& mix = dynamic_cast<const MixtureOfDiscreteDistributions&>(dist);
233  size_t nd = mix.getNumberOfDistributions();
234  for (size_t i = 0; i < nd; ++i)
235  {
236  if (comma)
237  out << ",";
238  out << "dist" + TextTools::toString(i + 1) + "=";
239  writeDiscreteDistribution(mix.nDistribution(i), out, globalAliases, writtenNames);
240  comma = true;
241  }
242  out << ",probas=(";
243  for (size_t i = 0; i < nd; ++i)
244  {
245  out << mix.getNProbability(i);
246  if (i != nd - 1)
247  out << ",";
248  }
249  out << ")";
250  for (size_t i = 1; i < nd; ++i)
251  {
252  writtenNames.push_back(mix.getNamespace() + "theta" + TextTools::toString(i));
253  }
254  }
255  catch (bad_cast&)
256  {}
257  }
258  if (dynamic_cast<const ExponentialDiscreteRateDistribution*>(&dist) ||
259  dynamic_cast<const GammaDiscreteRateDistribution*>(&dist) ||
260  dynamic_cast<const GaussianDiscreteRateDistribution*>(&dist))
261  {
262  if (comma)
263  out << ",";
264  out << "n=" << dist.getNumberOfCategories();
265  comma = true;
266  }
267 
268  try
269  {
270  auto& ps = dynamic_cast<const SimpleDiscreteDistribution&>(dist);
271  size_t nd = ps.getNumberOfCategories();
272  if (comma)
273  out << ",";
274  out << "values=(";
275  for (size_t i = 0; i < nd; ++i)
276  {
277  out << ps.getCategory(i);
278  if (i != nd - 1)
279  out << ",";
280  }
281  out << "),probas=(";
282  for (size_t i = 0; i < nd; ++i)
283  {
284  out << ps.getProbability(i);
285  if (i != nd - 1)
286  out << ",";
287  }
288  out << ")";
289 
290  auto range = ps.getRanges();
291  if (range.size() != 0)
292  {
293  out << ", ranges=(";
294  auto it = range.begin();
295  while (it != range.end())
296  {
297  out << "V" << TextTools::toString(it->first);
298  out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
299  it++;
300  if (it != range.end())
301  out << ",";
302  }
303  }
304 
305  out << ")";
306 
307  for (size_t i = 1; i < nd; ++i)
308  {
309  writtenNames.push_back(ps.getNamespace() + "theta" + TextTools::toString(i));
310  }
311  for (size_t i = 1; i < nd + 1; ++i)
312  {
313  writtenNames.push_back(ps.getNamespace() + "V" + TextTools::toString(i));
314  }
315 
316  comma = true;
317  }
318  catch (bad_cast&)
319  {}
320 
321 
322  // Writing the parameters
324  bOP.write(dynamic_cast<const ParameterAliasable&>(dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
325  out << ")";
326 }
size_t getNumberOfCategories() const
static void displayMessage(const std::string &text)
static void displayResult(const std::string &text, const T &result)
const std::map< std::string, std::string > & getUnparsedArguments() const
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
virtual size_t getNumberOfCategories() const=0
virtual std::string getName() const=0
const DiscreteDistributionInterface & variableSubDistribution() const
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
virtual const ParameterList & getIndependentParameters() const=0
virtual std::vector< std::string > getParameterNames() const
const std::string & nextToken()
bool hasMoreToken() const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
std::string toString(T t)
Defines the basic types of data flow nodes.