bpp-phyl3 3.0.0
BppOFrequencySetFormat.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../Model/FrequencySet/CodonFrequencySet.h"
6#include "../Model/FrequencySet/MvaFrequencySet.h"
7#include "../Model/FrequencySet/NucleotideFrequencySet.h"
8#include "../Model/FrequencySet/ProteinFrequencySet.h"
10
11// From bpp-core:
12#include <Bpp/Io/FileTools.h>
14#include <Bpp/Text/TextTools.h>
19
20// From bpp-seq:
24
25using namespace bpp;
26
27// From the STL:
28#include <iomanip>
29#include <memory>
30
32
33using namespace std;
34
35unsigned char BppOFrequencySetFormat::DNA = 1;
36unsigned char BppOFrequencySetFormat::RNA = 2;
37unsigned char BppOFrequencySetFormat::NUCLEOTIDE = 1 | 2;
38unsigned char BppOFrequencySetFormat::PROTEIN = 4;
39unsigned char BppOFrequencySetFormat::CODON = 8;
40unsigned char BppOFrequencySetFormat::WORD = 16;
41unsigned char BppOFrequencySetFormat::ALL = 1 | 2 | 4 | 8 | 16;
42
43
44std::unique_ptr<FrequencySetInterface> BppOFrequencySetFormat::readFrequencySet(
45 std::shared_ptr<const Alphabet> alphabet,
46 const std::string& freqDescription,
47 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
48 size_t nData,
49 bool parseArguments)
50{
51 unparsedArguments_.clear();
52 string freqName;
53 map<string, string> args;
54 KeyvalTools::parseProcedure(freqDescription, freqName, args);
55 unique_ptr<FrequencySetInterface> pFS;
56
57 if (args.find("data") != args.end())
58 nData = TextTools::to<size_t>(args["data"]);
59
60 if (nData && mData.find(nData) == mData.end())
61 throw Exception("Unknown data for number " + TextTools::toString(nData));
62
63 if (freqName == "Fixed")
64 {
66 {
68 pFS = make_unique<FixedNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
69 else
70 throw Exception("Nucleotide alphabet not supported.");
71 }
72 else if (AlphabetTools::isProteicAlphabet(*alphabet))
73 {
75 pFS = make_unique<FixedProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
76 else
77 throw Exception("Protein alphabet not supported.");
78 }
79 else if (AlphabetTools::isCodonAlphabet(*alphabet))
80 {
81 if (alphabetCode_ & CODON)
82 {
83 if (!geneticCode_)
84 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
86 }
87 else
88 {
89 throw Exception("Codon alphabet not supported.");
90 }
91 }
92 else
93 {
94 pFS = make_unique<FixedFrequencySet>(make_shared<const CanonicalStateMap>(alphabet, false));
95 }
96 }
97 else if (freqName == "Full")
98 {
99 unsigned short method = 1;
100 if (args.find("method") != args.end())
101 {
102 if (args["method"] == "local")
103 method = 2;
104 else if (args["method"] == "binary")
105 method = 3;
106 }
107
109 {
111 pFS = make_unique<FullNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
112 else
113 throw Exception("Nucleotide alphabet not supported.");
114 }
115 else if (AlphabetTools::isProteicAlphabet(*alphabet))
116 {
118 pFS = make_unique<FullProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet), false, method);
119 else
120 throw Exception("Protein alphabet not supported.");
121 }
122 else if (AlphabetTools::isCodonAlphabet(*alphabet))
123 {
124 if (alphabetCode_ & CODON)
125 {
126 if (!geneticCode_)
127 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
128 pFS = make_unique<FullCodonFrequencySet>(geneticCode_);
129 }
130 else
131 {
132 throw Exception("Codon alphabet not supported.");
133 }
134 }
135 else
136 {
137 // NB: jdutheil 25/09/14 => gap models will not be supported before we add the appropriate option!
138 pFS = make_unique<FullFrequencySet>(make_shared<CanonicalStateMap>(alphabet, false), false, method);
139 }
140 }
141 else if (freqName == "Empirical")
142 {
143 string fname = args["file"];
144 if (TextTools::isEmpty(fname))
145 throw Exception("'file' argument missing for user-defined frequencies.");
146
147 size_t nCol = 1;
148 if (args.find("col") != args.end())
149 nCol = size_t(TextTools::toInt(args["col"]));
150
152 {
154 pFS = make_unique<UserNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet), fname, nCol);
155 else
156 throw Exception("Nucleotide alphabet not supported.");
157 }
158 else if (AlphabetTools::isProteicAlphabet(*alphabet))
159 {
161 pFS = make_unique<UserProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet), fname, nCol);
162 else
163 throw Exception("Protein alphabet not supported.");
164 }
165 else if (AlphabetTools::isCodonAlphabet(*alphabet))
166 {
167 if (alphabetCode_ & CODON)
168 {
169 if (!geneticCode_)
170 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
171 pFS = make_unique<UserCodonFrequencySet>(geneticCode_, fname, nCol);
172 }
173 else
174 {
175 throw Exception("Codon alphabet not supported.");
176 }
177 }
178 else
179 {
180 pFS = make_unique<UserFrequencySet>(make_shared<const CanonicalStateMap>(alphabet, false), fname, nCol);
181 }
182 }
183 else if (freqName == "GC")
184 {
185 if (!AlphabetTools::isNucleicAlphabet(*alphabet))
186 throw Exception("Error, invalid frequencies " + freqName + " with non-nucleic alphabet.");
188 pFS = make_unique<GCFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
189 else
190 throw Exception("Nucleotide alphabet not supported.");
191 }
192
193 // INDEPENDENTWORD
194 else if (freqName == "Word")
195 {
196 if (!(alphabetCode_ & WORD))
197 throw Exception("Word alphabet not supported.");
198 if (!AlphabetTools::isWordAlphabet(*alphabet))
199 throw Exception("BppOFrequencySetFormat::readFrequencySet(...).\n\t Bad alphabet type "
200 + alphabet->getAlphabetType() + " for frequencies set " + freqName + ".");
201
202 auto pWA = dynamic_pointer_cast<const WordAlphabet>(alphabet);
203
204 if (!pWA)
205 throw Exception("BppOFrequencySetFormat::read : Word freq name is from WordAlphabet.");
206
207 if (args.find("frequency") != args.end())
208 {
209 string sAFS = args["frequency"];
210
211 unsigned int nbfreq = pWA->getLength();
212 string st = "";
213 for (unsigned i = 0; i < nbfreq; ++i)
214 {
215 st += TextTools::toString(i + 1);
216 }
217
219 auto pFS2 = nestedReader.readFrequencySet(pWA->getNAlphabet(0), sAFS, mData, nData, false);
220 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
221 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
222 {
223 unparsedArguments_[st + "_" + it->first] = it->second;
224 }
225 pFS = make_unique<WordFromUniqueFrequencySet>(pWA, std::move(pFS2));
226 }
227 else
228 {
229 if (args.find("frequency1") == args.end())
230 throw Exception("BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set 'Word'.");
231 vector<string> v_sAFS;
232 vector<unique_ptr<FrequencySetInterface>> v_AFS;
233 unsigned int nbfreq = 1;
234
235 while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
236 {
237 v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
238 }
239
240 if (v_sAFS.size() != pWA->getLength())
241 throw Exception("BppOFrequencySetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") does not match length of the words (" + TextTools::toString(pWA->getLength()) + ")");
242
243 for (size_t i = 0; i < v_sAFS.size(); ++i)
244 {
246 pFS = nestedReader.readFrequencySet(pWA->getNAlphabet(i), v_sAFS[i], mData, nData, false);
247 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
248 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
249 {
250 unparsedArguments_[TextTools::toString(i + 1) + "_" + it->first] = it->second;
251 }
252 v_AFS.push_back(std::move(pFS));
253 }
254
255 pFS = make_unique<WordFromIndependentFrequencySet>(pWA, v_AFS);
256 }
257 }
258 // From Model
259 else if (freqName == "FromModel")
260 {
261 if (args.find("model") == args.end())
262 throw Exception("Missing argument 'model' for frequencies " + freqName + ".");
263
264 BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
265 if (geneticCode_)
266 nestedReader.setGeneticCode(geneticCode_);
267
268 auto model = nestedReader.readTransitionModel(alphabet, args["model"], mData, nData, false);
269 pFS = make_unique<FromModelFrequencySet>(std::move(model));
270 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
271 for (auto& it : unparsedParameterValuesNested)
272 {
273 unparsedArguments_["FromModel." + it.first] = it.second;
274 }
275 }
276
277
278 // INDEPENDENT CODON
279 else if (freqName == "Codon")
280 {
281 if (!(alphabetCode_ & CODON))
282 throw Exception("Codon alphabet not supported.");
283 if (!AlphabetTools::isCodonAlphabet(*alphabet))
284 throw Exception("BppOFrequencySetFormat::read.\n\t Bad alphabet type "
285 + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
286 if (!geneticCode_)
287 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
288
289 auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
290
291 if (args.find("frequency") != args.end())
292 {
293 string sAFS = args["frequency"];
294
296 auto pFS2 = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
297 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
298
299 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
300 {
301 unparsedArguments_["123_" + it->first] = it->second;
302 }
303
304 pFS = make_unique<CodonFromUniqueFrequencySet>(geneticCode_, std::move(pFS2), "Codon");
305 }
306 else
307 {
308 if (args.find("frequency1") == args.end())
309 throw Exception("BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set.");
310 vector<string> v_sAFS;
311 vector<unique_ptr<FrequencySetInterface>> v_AFS;
312 unsigned int nbfreq = 1;
313
314 while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
315 {
316 v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
317 }
318
319 if (v_sAFS.size() != 3)
320 throw Exception("BppOFrequencySetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") is not three");
321
322 for (size_t i = 0; i < v_sAFS.size(); ++i)
323 {
325 pFS = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), v_sAFS[i], mData, nData, false);
326 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
327 for (auto& it : unparsedParameterValuesNested)
328 {
329 unparsedArguments_[TextTools::toString(i + 1) + "_" + it.first] = it.second;
330 }
331 v_AFS.push_back(std::move(pFS));
332 }
333
334 if (!geneticCode_)
335 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
336 pFS = make_unique<CodonFromIndependentFrequencySet>(geneticCode_, v_AFS, "Codon");
337 }
338 }
339
340 // CODON PER AA Frequencies
341 else if (freqName == "FullPerAA")
342 {
343 if (!(alphabetCode_ & CODON))
344 throw Exception("Codon alphabet not supported.");
345 if (!AlphabetTools::isCodonAlphabet(*alphabet))
346 throw Exception("BppOFrequencySetFormat::read.\n\t Bad alphabet type "
347 + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
348
349 if (!geneticCode_)
350 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
351
352 auto pPA = dynamic_pointer_cast<const ProteicAlphabet>(geneticCode_->getTargetAlphabet());
353
354 unsigned short method = 1;
355 if (args.find("method") != args.end())
356 {
357 if (args["method"] == "local")
358 method = 2;
359 else if (args["method"] == "binary")
360 method = 3;
361 }
362
363 if (args.find("protein_frequencies") != args.end())
364 {
365 string sPFS = args["protein_frequencies"];
367 auto tmpPtr = nestedReader.readFrequencySet(pPA, sPFS, mData, nData, false);
368 unique_ptr<ProteinFrequencySetInterface> pPFS(dynamic_cast<ProteinFrequencySetInterface*>(tmpPtr.release()));
369 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
370
371 for (auto& it : unparsedParameterValuesNested)
372 {
373 unparsedArguments_["FullPerAA." + it.first] = it.second;
374 }
375 pFS = make_unique<FullPerAACodonFrequencySet>(geneticCode_, std::move(pPFS), method);
376 }
377 else
378 pFS = make_unique<FullPerAACodonFrequencySet>(geneticCode_, method);
379 }
380
381 // codeml frequencies syntax
382
383 else if (AlphabetTools::isCodonAlphabet(*alphabet))
384 {
385 if (!(alphabetCode_ & CODON))
386 throw Exception("Codon alphabet not supported.");
387 if (!geneticCode_)
388 throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
389
390 auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
391
392 if (args.find("genetic_code") != args.end())
393 {
394 ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
395 throw Exception("BppOFrequencySetFormat::read. Deprecated 'genetic_code' argument.");
396 }
397 short opt = -1;
398 string mgmtStopCodon = "quadratic";
399
400 if (freqName == "F0")
401 {
403 }
404 else if (freqName == "F1X4")
405 {
407
408 if (args.find("mgmtStopCodons") != args.end())
409 mgmtStopCodon = args["mgmtStopCodons"];
410 else if (args.find("mgmtStopCodon") != args.end())
411 mgmtStopCodon = args["mgmtStopCodon"];
412
413 if (verbose_)
414 ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
415
416 if (args.find("frequency") != args.end())
417 {
418 string sAFS = args["frequency"];
419
421 auto pFS2 = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
422 if (pFS2->getName() != "Full")
423 throw Exception("BppOFrequencySetFormat::read. The frequency option in F1X4 can only be Full");
424
425 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
426
427 for (auto& it : unparsedParameterValuesNested)
428 {
429 unparsedArguments_["123_" + it.first] = it.second;
430 }
431 }
432
433 if (args.find("123_theta") != args.end())
434 unparsedArguments_["123_Full.theta"] = args["123_theta"];
435 if (args.find("123_theta1") != args.end())
436 unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
437 if (args.find("123_theta2") != args.end())
438 unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
439 if (args.find("theta") != args.end())
440 unparsedArguments_["123_Full.theta"] = args["123_theta"];
441 if (args.find("theta1") != args.end())
442 unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
443 if (args.find("theta2") != args.end())
444 unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
445 }
446 else if (freqName == "F3X4")
447 {
449
450 if (args.find("mgmtStopCodons") != args.end())
451 mgmtStopCodon = args["mgmtStopCodons"];
452 else if (args.find("mgmtStopCodon") != args.end())
453 mgmtStopCodon = args["mgmtStopCodon"];
454
455 if (verbose_)
456 ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
457
458 if (args.find("frequency1") != args.end() ||
459 args.find("frequency2") != args.end() ||
460 args.find("frequency3") != args.end())
461 {
462 vector<string> v_sAFS;
463
464 for (unsigned int nbfreq = 1; nbfreq <= 3; ++nbfreq)
465 {
466 if (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
467 v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq)]);
468 else
469 v_sAFS.push_back("");
470 }
471
472 for (size_t i = 0; i < v_sAFS.size(); ++i)
473 {
475
476 auto sAFS = v_sAFS[i];
477 if (sAFS != "")
478 {
479 pFS = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
480 if (pFS->getName() != "Full")
481 throw Exception("BppOFrequencySetFormat::read. The frequency options in F3X4 can only be Full");
482
483 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
484 for (auto& it : unparsedParameterValuesNested)
485 {
486 unparsedArguments_[TextTools::toString(i + 1) + "_" + it.first] = it.second;
487 }
488 }
489 }
490 }
491 for (unsigned int i = 1; i <= 3; ++i)
492 {
493 if (args.find(TextTools::toString(i) + "_theta") != args.end())
494 unparsedArguments_[TextTools::toString(i) + "_Full.theta"] = args[TextTools::toString(i) + "_theta"];
495 if (args.find(TextTools::toString(i) + "_theta1") != args.end())
496 unparsedArguments_[TextTools::toString(i) + "_Full.theta1"] = args[TextTools::toString(i) + "_theta1"];
497 if (args.find(TextTools::toString(i) + "_theta2") != args.end())
498 unparsedArguments_[TextTools::toString(i) + "_Full.theta2"] = args[TextTools::toString(i) + "_theta2"];
499 }
500 }
501 else if (freqName == "F61")
502 {
504 }
505 if (opt != -1)
506 {
507 unsigned short method = 1;
508 if (args.find("method") != args.end())
509 {
510 if (args["method"] == "local")
511 method = 2;
512 else if (args["method"] == "binary")
513 method = 3;
514 }
516 }
517 else
518 throw Exception("Unknown frequency option: " + freqName);
519 }
520
521 // MVAprotein freq set for COaLA model
522 else if (freqName == "MVAprotein")
523 {
524 if (args.find("model") == args.end())
525 throw Exception("Missing argument 'model' for frequencies " + freqName + ".");
526
527 BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
528
529 auto model = nestedReader.readTransitionModel(alphabet, args["model"], mData, nData, false);
530 if (dynamic_cast<Coala*>(model.get()) == 0)
531 throw Exception("MVAprotein frequency set needs a Coala model");
532
533 auto coala = unique_ptr<Coala>(dynamic_cast<Coala*>(model.release()));
534 // map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
535
536 auto mvaFS = make_unique<MvaFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
537 // mvaFS->setParamValues(args);
538 mvaFS->initSet(*coala);
539
540 pFS = std::move(mvaFS);
541 }
542 else
543 throw Exception("Unknown frequency option: " + freqName);
544
545 // Update parameter args:
546 vector<string> pnames = pFS->getParameters().getParameterNames();
547
548 string pref = pFS->getNamespace();
549 for (size_t i = 0; i < pnames.size(); i++)
550 {
551 string name = pFS->getParameterNameWithoutNamespace(pnames[i]);
552 if (args.find(name) != args.end())
553 unparsedArguments_[pref + name] = args[name];
554 }
555
556 // Now look if some parameters are aliased:
557 ParameterList pl = pFS->getIndependentParameters();
558 string pname, pval, pname2;
559 for (size_t i = 0; i < pl.size(); i++)
560 {
561 pname = pFS->getParameterNameWithoutNamespace(pl[i].getName());
562
563 if (args.find(pname) == args.end())
564 continue;
565 pval = args[pname];
566
567 if (((pval.rfind("_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind("_") + 1, string::npos)))) ||
568 (pval.find("(") != string::npos))
569 continue;
570 bool found = false;
571 for (size_t j = 0; j < pl.size() && !found; j++)
572 {
573 pname2 = pFS->getParameterNameWithoutNamespace(pl[j].getName());
574
575 if (j == i)
576 continue;
577 if (pval == pname2)
578 {
579 // This is an alias...
580 // NB: this may throw an exception if incorrect! We leave it as is for now :s
581 pFS->aliasParameters(pname2, pname);
582 if (verbose_)
583 ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
584 found = true;
585 }
586 }
587 // if (!TextTools::isDecimalNumber(pval) && !found)
588 // throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
589 }
590
591 // Forward arguments:
592 if (args.find("init") != args.end())
593 unparsedArguments_["init"] = args["init"];
594
595 if (args.find("init.observedPseudoCount") != args.end())
596 unparsedArguments_["init.observedPseudoCount"] = args["init.observedPseudoCount"];
597
598 std::shared_ptr<const AlignmentDataInterface> data(0);
599 if (nData)
600 data = mData.at(nData);
601
602 if (args.find("values") != args.end())
603 {
604 unparsedArguments_["init"] = "values" + args["values"];
605 initialize_(*pFS, data);
606 }
607 else if (parseArguments)
608 initialize_(*pFS, data);
609
610 return pFS;
611}
612
614 const FrequencySetInterface& freqset,
615 OutputStream& out,
616 std::map<std::string, std::string>& globalAliases,
617 std::vector<std::string>& writtenNames) const
618{
619 ParameterList pl = freqset.getParameters();
620 vector<string> vpl = pl.getParameterNames();
621
622 for (auto& pn : vpl)
623 {
624 if (find(writtenNames.begin(), writtenNames.end(), pn) != writtenNames.end())
625 pl.deleteParameter(pn);
626 }
627
628 if (pl.size() == 0)
629 return;
630
631 int p = out.getPrecision();
632 out.setPrecision(12);
633 bool comma(false);
634 string name = freqset.getName();
635 out << name << "(";
636
637 bool oValues = false;
638
639 if ((name == "Fixed") || (name == "F0") || (name == "Full"))
640 {
641 vector<double> vf = freqset.getFrequencies();
642 out << "values=(";
643 for (size_t i = 0; i < vf.size(); ++i)
644 {
645 if (i != 0)
646 out << ", ";
647 out << vf[i];
648 }
649 out << ")";
650
651 vector<string> npl = pl.getParameterNames();
652 writtenNames.insert(writtenNames.end(), npl.begin(), npl.end());
653
654 comma = true;
655
656 oValues = true;
657 }
658
659 try
660 {
661 auto ufs = dynamic_cast<const UserFrequencySet&>(freqset);
662 out << "file=" << ufs.getPath();
663 size_t nCol = ufs.getColumnNumber();
664
665 if (nCol != 1)
666 out << ", col=" << nCol;
667
668 oValues = true;
669 }
670 catch (bad_cast&)
671 {}
672
673 // For Word or Codon FS : length mgmt
674 try
675 {
676 auto pWFI = dynamic_cast<const WordFromIndependentFrequencySet&>(freqset);
677 if (name != "F3X4")
678 {
679 for (size_t i = 0; i < pWFI.getLength(); ++i)
680 {
681 if (i != 0)
682 out << ", ";
683 out << "frequency" << i + 1 << "=";
684 writeFrequencySet(pWFI.frequencySetForLetter(i), out, globalAliases, writtenNames);
685 }
686 comma = true;
687 }
688 }
689 catch (bad_cast&)
690 {}
691
692 try
693 {
694 auto pWFU = dynamic_cast<const WordFromUniqueFrequencySet&>(freqset);
695 if (name != "F1X4")
696 {
697 for (size_t i = 0; i < pWFU.getLength(); ++i)
698 {
699 if (i != 0)
700 out << ", ";
701 out << "frequency=";
702 writeFrequencySet(pWFU.frequencySetForLetter(i), out, globalAliases, writtenNames);
703 }
704 comma = true;
705 }
706 }
707 catch (bad_cast&)
708 {}
709
710 // FromModel
711
712 try
713 {
714 auto pFMFS = dynamic_cast<const FromModelFrequencySet&>(freqset);
715 if (comma)
716 out << ", ";
717 out << "model=";
718
719 BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, 0);
720
721 bIO.write(*(pFMFS.getModel()), out, globalAliases, writtenNames);
722 comma = true;
723 }
724 catch (bad_cast&)
725 {}
726
727
728 // FullPerAA
729 try
730 {
731 auto pFPA = dynamic_cast<const FullPerAACodonFrequencySet&>(freqset);
732 out << "protein_frequencies=";
733
734 if (pFPA.hasProteinFrequencySet())
735 {
736 writeFrequencySet(pFPA.proteinFrequencySet(), out, globalAliases, writtenNames);
737 }
738 else
739 {
740 out << "None";
741 }
742 comma = true;
743
744 unsigned short meth = pFPA.getMethod();
745 if (meth > 1)
746 {
747 if (comma)
748 out << ",";
749 out << "method=" << ((meth == 2) ? "local" : "binary");
750 comma = true;
751 }
752 }
753 catch (bad_cast&)
754 {}
755
756
757 // method
758 try
759 {
760 size_t meth = dynamic_cast<const FullProteinFrequencySet&>(freqset).getMethod();
761 if (meth > 1)
762 {
763 if (comma)
764 out << ",";
765 out << "method=" << ((meth == 2) ? "local" : "binary");
766 comma = true;
767 }
768 }
769 catch (bad_cast&)
770 {}
771
772 try
773 {
774 auto pF = dynamic_cast<const FullCodonFrequencySet&>(freqset);
775 unsigned short meth = pF.getMethod();
776 if (meth > 1)
777 {
778 if (comma)
779 out << ",";
780 out << "method=" << ((meth == 2) ? "local" : "binary");
781 comma = true;
782 }
783 }
784 catch (bad_cast&)
785 {}
786
787 // mgmtStopCodon
788 try
789 {
790 auto pCFU = dynamic_cast<const CodonFromUniqueFrequencySet&>(freqset);
791 if (pCFU.getMgmtStopCodon() != "quadratic")
792 {
793 if (comma)
794 out << ",";
795 out << "mgmtStopCodons=" << pCFU.getMgmtStopCodon();
796 comma = true;
797 }
798 }
799 catch (bad_cast&)
800 {}
801
802 try
803 {
804 auto pCFI = dynamic_cast<const CodonFromIndependentFrequencySet&>(freqset);
805 if (pCFI.getMgmtStopCodon() != "quadratic")
806 {
807 if (comma)
808 out << ",";
809 out << "mgmtStopCodons=" << pCFI.getMgmtStopCodon();
810 comma = true;
811 }
812 }
813 catch (bad_cast&)
814 {}
815
816 // All remaining parameters
817 if (!oValues)
818 {
820 bIO.write(freqset, out, globalAliases, pl.getParameterNames(), writtenNames, true, comma);
821 }
822
823 out << ")";
824 out.setPrecision(p);
825}
826
828 FrequencySetInterface& freqSet,
829 std::shared_ptr<const AlignmentDataInterface> data)
830{
831 if (unparsedArguments_.find("init") != unparsedArguments_.end())
832 {
833 // Initialization using the "init" option
834 string init = unparsedArguments_["init"];
835
836 if (init == "observed")
837 {
838 if (!data)
839 throw Exception("Missing data number for init 'observed'.");
840
841 unsigned int psc = 0;
842 if (unparsedArguments_.find("init.observedPseudoCount") != unparsedArguments_.end())
843 psc = TextTools::to<unsigned int>(unparsedArguments_["init.observedPseudoCount"]);
844
845 map<int, double> freqs;
847
849 }
850 else if (init.substr(0, 6) == "values")
851 {
852 // Initialization using the "values" argument
853 vector<double> frequencies;
854 string rf = init.substr(6);
855
856 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
857 while (strtok.hasMoreToken())
858 frequencies.push_back(TextTools::toDouble(strtok.nextToken()));
859 freqSet.setFrequencies(frequencies);
860 }
861 else if (init == "balanced")
862 {
863 // Nothing to do here, this is the default instantiation.
864 }
865 else
866 throw Exception("Unknown init argument");
867
868 unparsedArguments_.erase(unparsedArguments_.find("init"));
869 if (unparsedArguments_.find("init.observedPseudoCount") != unparsedArguments_.end())
870 unparsedArguments_.erase(unparsedArguments_.find("init.observedPseudoCount"));
871 }
872
873 // Explicit initialization of each parameter
875
876 for (size_t i = 0; i < pl.size(); ++i)
877 {
878 AutoParameter ap(pl[i]);
879 if (verbose_)
881 pl.setParameter(i, ap);
882 }
883
884 for (size_t i = 0; i < pl.size(); ++i)
885 {
886 const string pName = pl[i].getName();
887
888 try
889 {
890 double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
891
892 pl[i].setValue(value);
893
894 if (unparsedArguments_.find(pName) != unparsedArguments_.end())
895 unparsedArguments_.erase(unparsedArguments_.find(pName));
896
897 if (verbose_)
898 ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
899 }
900 catch (Exception& e)
901 {}
902 }
903
904 freqSet.matchParametersValues(pl);
905}
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static std::shared_ptr< OutputStream > warning
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
void initialize_(FrequencySetInterface &freqSet, std::shared_ptr< const AlignmentDataInterface > data)
const std::map< std::string, std::string > & getUnparsedArguments() const override
std::shared_ptr< const GeneticCode > geneticCode_
std::map< std::string, std::string > unparsedArguments_
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Substitution model I/O in BppO format.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
const std::map< std::string, std::string > & getUnparsedArguments() const override
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true)
The Coala branch-heterogeneous amino-acid substitution model.
Definition: Coala.h:57
static std::unique_ptr< CodonFrequencySetInterface > getFrequencySetForCodons(short option, std::shared_ptr< const GeneticCode > gCode, const std::string &mgmtStopCodon="quadratic", unsigned short method=1)
A helper function that provide frequencies set for codon models according to PAML option.
the Frequencies in codons are the product of Independent Frequencies in letters with the frequencies ...
the Frequencies in codons are the product of the frequencies for a unique FrequencySet in letters,...
FrequencySet for codons, with no parameter.
Parametrize a set of state frequencies.
Definition: FrequencySet.h:29
virtual void setFrequenciesFromAlphabetStatesFrequencies(const std::map< int, double > &frequencies)=0
Set the Frequencies from the one of the map which keys match with a letter of the Alphabet....
virtual std::string getName() const =0
virtual const Vdouble & getFrequencies() const =0
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
FrequencySet defined from the equilibrium distribution of a given model.
Definition: FrequencySet.h:252
A generic FrequencySet for Full Codon alphabets.
unsigned short getMethod() const
FrequencySet integrating ProteinFrequencySet inside CodonFrequencySet. In this case,...
Protein FrequencySet using 19 independent parameters to model the 20 frequencies.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
virtual int getPrecision() const=0
virtual OutputStream & setPrecision(int digit)=0
virtual const ParameterList & getIndependentParameters() const=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void setParameter(size_t index, const Parameter &param)
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual const ParameterList & getParameters() const=0
Parametrize a set of state frequencies for proteins.
static void getFrequencies(const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
const std::string & nextToken()
bool hasMoreToken() const
FrequencySet to be read in a file. More specifically, a frequency set is read in a column of a given ...
Definition: FrequencySet.h:386
the Frequencies in words are the product of Independent Frequencies in letters
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.