bpp-phyl3 3.0.0
BppORateDistributionFormat.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../Model/RateDistribution/ConstantRateDistribution.h"
6#include "../Model/RateDistribution/ExponentialDiscreteRateDistribution.h"
7#include "../Model/RateDistribution/GammaDiscreteRateDistribution.h"
8#include "../Model/RateDistribution/GaussianDiscreteRateDistribution.h"
10
11// From bpp-core:
17
18using namespace bpp;
19
20// From the STL:
21#include <iomanip>
22
23using namespace std;
24
25
26unique_ptr<DiscreteDistributionInterface> BppORateDistributionFormat::readDiscreteDistribution(
27 const std::string& distDescription,
28 bool parseArguments)
29{
30 unparsedArguments_.clear();
31 string distName;
32 map<string, string> args;
33 KeyvalTools::parseProcedure(distDescription, distName, args);
34 unique_ptr<DiscreteDistributionInterface> rDist;
35
36 if (distName == "Uniform")
37 throw Exception("BppO Warning: Uniform distribution is deprecated, use Constant instead.");
38
39 if ((distName == "InvariantMixed") || (distName == "Invariant"))
40 {
41 // We have to parse the nested distribution first:
42 string nestedDistDescription = args["dist"];
43 if (TextTools::isEmpty(nestedDistDescription))
44 throw Exception("BppORateDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
45 if (verbose_)
46 ApplicationTools::displayResult("Invariant Mixed distribution", distName );
47 BppORateDistributionFormat nestedReader(false);
48 auto nestedDistribution = nestedReader.readDiscreteDistribution(nestedDistDescription, false);
49 map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
50
51 // Now we create the Invariant rate distribution:
52 rDist = make_unique<InvariantMixedDiscreteDistribution>(std::move(nestedDistribution), 0.1, 0.000001);
53
54 // Then we update the parameter set:
55 for (auto it : unparsedArgumentsNested)
56 {
57 unparsedArguments_["Invariant." + it.first] = it.second;
58 }
59
60 if (args.find("p") != args.end())
61 unparsedArguments_["Invariant.p"] = args["p"];
62 }
63 else if (distName == "Constant")
64 {
65 if (!allowConstant_)
66 throw Exception("BppORateDistributionFormat::read(). Constant distribution not allowed.");
67
68 if (args.find("value") != args.end())
69 ApplicationTools::displayMessage("Found argument 'value' in Constant distribution. Constant distribution is defined to have an average of 1.");
70 rDist = make_unique<ConstantRateDistribution>();
71 }
72 else if (distName == "Simple")
73 {
74 if (args.find("values") == args.end())
75 throw Exception("Missing argument 'values' in Simple distribution");
76 if (args.find("probas") == args.end())
77 throw Exception("Missing argument 'probas' in Simple distribution");
78 vector<double> probas, values;
79
80 string rf = args["values"];
81 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
82 while (strtok.hasMoreToken())
83 values.push_back(TextTools::toDouble(strtok.nextToken()));
84
85 rf = args["probas"];
86 StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
87 while (strtok2.hasMoreToken())
88 probas.push_back(TextTools::toDouble(strtok2.nextToken()));
89
90 std::map<size_t, std::vector<double>> ranges;
91
92 if (args.find("ranges") != args.end())
93 {
94 string rr = args["ranges"];
95 StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
96 string desc;
97 double deb, fin;
98 size_t num;
99 size_t po, pf, ppv;
100 while (strtok3.hasMoreToken())
101 {
102 desc = strtok3.nextToken();
103 po = desc.find("[");
104 ppv = desc.find(";");
105 pf = desc.find("]");
106 num = (size_t)(TextTools::toInt(desc.substr(1, po - 1)));
107 deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
108 fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
109 vector<double> vd;
110 vd.push_back(deb);
111 vd.push_back(fin);
112 ranges[num] = vd;
113 }
114 }
115 if (ranges.size() == 0)
116 rDist = make_unique<SimpleDiscreteDistribution>(values, probas);
117 else
118 rDist = make_unique<SimpleDiscreteDistribution>(values, ranges, probas);
119
120 vector<string> v = rDist->getParameters().getParameterNames();
121
122 for (size_t i = 0; i < v.size(); ++i)
123 {
124 unparsedArguments_[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
125 }
126 }
127 else if (distName == "Mixture")
128 {
129 if (args.find("probas") == args.end())
130 throw Exception("Missing argument 'probas' in Mixture distribution");
131 vector<double> probas;
132 vector<unique_ptr<DiscreteDistributionInterface>> v_pdd;
133 string rf = args["probas"];
134 StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
135 while (strtok2.hasMoreToken())
136 probas.push_back(TextTools::toDouble(strtok2.nextToken()));
137
138 vector<string> v_nestedDistrDescr;
139
140 size_t nbd = 0;
141 while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
142 v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
143
144 if (v_nestedDistrDescr.size() != probas.size())
145 throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
146
147 for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
148 {
149 BppORateDistributionFormat nestedReader(false);
150 auto pdd = nestedReader.readDiscreteDistribution(v_nestedDistrDescr[i], false);
151 map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
152
153 for (auto& it : unparsedArgumentsNested)
154 {
155 unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
156 }
157 v_pdd.push_back(std::move(pdd));
158 }
159 rDist = make_unique<MixtureOfDiscreteDistributions>(v_pdd, probas);
160 }
161 else
162 {
163 if (args.find("n") == args.end())
164 throw Exception("Missing argument 'n' (number of classes) in " + distName
165 + " distribution");
166 size_t nbClasses = TextTools::to<size_t>(args["n"]);
167
168 if (distName == "Gamma")
169 {
170 rDist = make_unique<GammaDiscreteRateDistribution>(nbClasses, 1.);
171
172 if (args.find("alpha") != args.end())
173 unparsedArguments_["Gamma.alpha"] = args["alpha"];
174 if (args.find("beta") != args.end())
175 throw Exception("Found argument 'beta' in Gamma distribution. Gamma distribution is defined to have an average of 1, with beta=alpha.");
176 }
177 else if (distName == "Gaussian")
178 {
179 rDist = make_unique<GaussianDiscreteRateDistribution>(nbClasses, 1);
180 if (args.find("mu") != args.end())
181 throw Exception("Found argument 'mu' in Gaussian distribution. Gaussian distribution is defined to have an average of 1, with mu=1.");
182 if (args.find("sigma") != args.end())
183 unparsedArguments_["Gaussian.sigma"] = args["sigma"];
184 }
185 else if (distName == "Exponential")
186 {
187 rDist = make_unique<ExponentialDiscreteRateDistribution>(nbClasses);
188 if (args.find("lambda") != args.end())
189 throw Exception("Found argument 'lambda' in Exponential distribution. Exponential distribution is defined to have an average of 1, with lambda=1.");
190 }
191 else
192 {
193 throw Exception("Unsupported rate distribution: " + distName + ".");
194 }
195 }
196 if (verbose_)
197 {
198 ApplicationTools::displayResult("Distribution", distName);
199 ApplicationTools::displayResult("Number of classes", TextTools::toString((int)rDist->getNumberOfCategories()));
200 }
201
202 if (parseArguments)
203 initialize_(*rDist);
204
205 return rDist;
206}
207
208
211 OutputStream& out,
212 std::map<std::string, std::string>& globalAliases,
213 std::vector<std::string>& writtenNames) const
214{
215 bool comma = false;
216
217
218 out << dist.getName() + "(";
219
220 auto* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
221 if (invar)
222 {
223 auto& pd = invar->variableSubDistribution();
224 out << "dist=";
225 writeDiscreteDistribution(pd, out, globalAliases, writtenNames);
226 comma = true;
227 }
228 else
229 {
230 try
231 {
232 auto& mix = dynamic_cast<const MixtureOfDiscreteDistributions&>(dist);
233 size_t nd = mix.getNumberOfDistributions();
234 for (size_t i = 0; i < nd; ++i)
235 {
236 if (comma)
237 out << ",";
238 out << "dist" + TextTools::toString(i + 1) + "=";
239 writeDiscreteDistribution(mix.nDistribution(i), out, globalAliases, writtenNames);
240 comma = true;
241 }
242 out << ",probas=(";
243 for (size_t i = 0; i < nd; ++i)
244 {
245 out << mix.getNProbability(i);
246 if (i != nd - 1)
247 out << ",";
248 }
249 out << ")";
250 for (size_t i = 1; i < nd; ++i)
251 {
252 writtenNames.push_back(mix.getNamespace() + "theta" + TextTools::toString(i));
253 }
254 }
255 catch (bad_cast&)
256 {}
257 }
258 if (dynamic_cast<const ExponentialDiscreteRateDistribution*>(&dist) ||
259 dynamic_cast<const GammaDiscreteRateDistribution*>(&dist) ||
260 dynamic_cast<const GaussianDiscreteRateDistribution*>(&dist))
261 {
262 if (comma)
263 out << ",";
264 out << "n=" << dist.getNumberOfCategories();
265 comma = true;
266 }
267
268 try
269 {
270 auto& ps = dynamic_cast<const SimpleDiscreteDistribution&>(dist);
271 size_t nd = ps.getNumberOfCategories();
272 if (comma)
273 out << ",";
274 out << "values=(";
275 for (size_t i = 0; i < nd; ++i)
276 {
277 out << ps.getCategory(i);
278 if (i != nd - 1)
279 out << ",";
280 }
281 out << "),probas=(";
282 for (size_t i = 0; i < nd; ++i)
283 {
284 out << ps.getProbability(i);
285 if (i != nd - 1)
286 out << ",";
287 }
288 out << ")";
289
290 auto range = ps.getRanges();
291 if (range.size() != 0)
292 {
293 out << ", ranges=(";
294 auto it = range.begin();
295 while (it != range.end())
296 {
297 out << "V" << TextTools::toString(it->first);
298 out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
299 it++;
300 if (it != range.end())
301 out << ",";
302 }
303 }
304
305 out << ")";
306
307 for (size_t i = 1; i < nd; ++i)
308 {
309 writtenNames.push_back(ps.getNamespace() + "theta" + TextTools::toString(i));
310 }
311 for (size_t i = 1; i < nd + 1; ++i)
312 {
313 writtenNames.push_back(ps.getNamespace() + "V" + TextTools::toString(i));
314 }
315
316 comma = true;
317 }
318 catch (bad_cast&)
319 {}
320
321
322 // Writing the parameters
324 bOP.write(dynamic_cast<const ParameterAliasable&>(dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
325 out << ")";
326}
size_t getNumberOfCategories() const
static void displayMessage(const std::string &text)
static void displayResult(const std::string &text, const T &result)
const std::map< std::string, std::string > & getUnparsedArguments() const
std::map< std::string, std::string > unparsedArguments_
void initialize_(DiscreteDistributionInterface &rDist)
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
virtual size_t getNumberOfCategories() const=0
virtual std::string getName() const=0
const DiscreteDistributionInterface & variableSubDistribution() const
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
virtual const ParameterList & getIndependentParameters() const=0
virtual std::vector< std::string > getParameterNames() const
const std::string & nextToken()
bool hasMoreToken() const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
std::string toString(T t)
Defines the basic types of data flow nodes.