bpp-phyl3  3.0.0
BppOFrequencySetFormat.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Model/FrequencySet/CodonFrequencySet.h"
6 #include "../Model/FrequencySet/MvaFrequencySet.h"
7 #include "../Model/FrequencySet/NucleotideFrequencySet.h"
8 #include "../Model/FrequencySet/ProteinFrequencySet.h"
10 
11 // From bpp-core:
12 #include <Bpp/Io/FileTools.h>
14 #include <Bpp/Text/TextTools.h>
16 #include <Bpp/Text/KeyvalTools.h>
19 
20 // From bpp-seq:
24 
25 using namespace bpp;
26 
27 // From the STL:
28 #include <iomanip>
29 #include <memory>
30 
32 
33 using namespace std;
34 
35 unsigned char BppOFrequencySetFormat::DNA = 1;
36 unsigned char BppOFrequencySetFormat::RNA = 2;
37 unsigned char BppOFrequencySetFormat::NUCLEOTIDE = 1 | 2;
38 unsigned char BppOFrequencySetFormat::PROTEIN = 4;
39 unsigned char BppOFrequencySetFormat::CODON = 8;
40 unsigned char BppOFrequencySetFormat::WORD = 16;
41 unsigned char BppOFrequencySetFormat::ALL = 1 | 2 | 4 | 8 | 16;
42 
43 
44 std::unique_ptr<FrequencySetInterface> BppOFrequencySetFormat::readFrequencySet(
45  std::shared_ptr<const Alphabet> alphabet,
46  const std::string& freqDescription,
47  const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
48  size_t nData,
49  bool parseArguments)
50 {
51  unparsedArguments_.clear();
52  string freqName;
53  map<string, string> args;
54  KeyvalTools::parseProcedure(freqDescription, freqName, args);
55  unique_ptr<FrequencySetInterface> pFS;
56 
57  if (freqName == "Fixed")
58  {
59  if (AlphabetTools::isNucleicAlphabet(*alphabet))
60  {
61  if (alphabetCode_ & NUCLEOTIDE)
62  pFS = make_unique<FixedNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
63  else
64  throw Exception("Nucleotide alphabet not supported.");
65  }
66  else if (AlphabetTools::isProteicAlphabet(*alphabet))
67  {
68  if (alphabetCode_ & PROTEIN)
69  pFS = make_unique<FixedProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
70  else
71  throw Exception("Protein alphabet not supported.");
72  }
73  else if (AlphabetTools::isCodonAlphabet(*alphabet))
74  {
75  if (alphabetCode_ & CODON)
76  {
77  if (!geneticCode_)
78  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
79  pFS.reset(new FixedCodonFrequencySet(geneticCode_));
80  }
81  else
82  {
83  throw Exception("Codon alphabet not supported.");
84  }
85  }
86  else
87  {
88  pFS = make_unique<FixedFrequencySet>(make_shared<const CanonicalStateMap>(alphabet, false));
89  }
90  }
91  else if (freqName == "Full")
92  {
93  unsigned short method = 1;
94  if (args.find("method") != args.end())
95  {
96  if (args["method"] == "local")
97  method = 2;
98  else if (args["method"] == "binary")
99  method = 3;
100  }
101 
102  if (AlphabetTools::isNucleicAlphabet(*alphabet))
103  {
104  if (alphabetCode_ & NUCLEOTIDE)
105  pFS = make_unique<FullNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
106  else
107  throw Exception("Nucleotide alphabet not supported.");
108  }
109  else if (AlphabetTools::isProteicAlphabet(*alphabet))
110  {
111  if (alphabetCode_ & PROTEIN)
112  pFS = make_unique<FullProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet), false, method);
113  else
114  throw Exception("Protein alphabet not supported.");
115  }
116  else if (AlphabetTools::isCodonAlphabet(*alphabet))
117  {
118  if (alphabetCode_ & CODON)
119  {
120  if (!geneticCode_)
121  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
122  pFS = make_unique<FullCodonFrequencySet>(geneticCode_);
123  }
124  else
125  {
126  throw Exception("Codon alphabet not supported.");
127  }
128  }
129  else
130  {
131  // NB: jdutheil 25/09/14 => gap models will not be supported before we add the appropriate option!
132  pFS = make_unique<FullFrequencySet>(make_shared<CanonicalStateMap>(alphabet, false), false, method);
133  }
134  }
135  else if (freqName == "Empirical")
136  {
137  string fname = args["file"];
138  if (TextTools::isEmpty(fname))
139  throw Exception("'file' argument missing for user-defined frequencies.");
140 
141  size_t nCol = 1;
142  if (args.find("col") != args.end())
143  nCol = size_t(TextTools::toInt(args["col"]));
144 
145  if (AlphabetTools::isNucleicAlphabet(*alphabet))
146  {
147  if (alphabetCode_ & NUCLEOTIDE)
148  pFS = make_unique<UserNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet), fname, nCol);
149  else
150  throw Exception("Nucleotide alphabet not supported.");
151  }
152  else if (AlphabetTools::isProteicAlphabet(*alphabet))
153  {
154  if (alphabetCode_ & PROTEIN)
155  pFS = make_unique<UserProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet), fname, nCol);
156  else
157  throw Exception("Protein alphabet not supported.");
158  }
159  else if (AlphabetTools::isCodonAlphabet(*alphabet))
160  {
161  if (alphabetCode_ & CODON)
162  {
163  if (!geneticCode_)
164  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
165  pFS = make_unique<UserCodonFrequencySet>(geneticCode_, fname, nCol);
166  }
167  else
168  {
169  throw Exception("Codon alphabet not supported.");
170  }
171  }
172  else
173  {
174  pFS = make_unique<UserFrequencySet>(make_shared<const CanonicalStateMap>(alphabet, false), fname, nCol);
175  }
176  }
177  else if (freqName == "GC")
178  {
179  if (!AlphabetTools::isNucleicAlphabet(*alphabet))
180  throw Exception("Error, invalid frequencies " + freqName + " with non-nucleic alphabet.");
181  if (alphabetCode_ & NUCLEOTIDE)
182  pFS = make_unique<GCFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
183  else
184  throw Exception("Nucleotide alphabet not supported.");
185  }
186 
187  // INDEPENDENTWORD
188  else if (freqName == "Word")
189  {
190  if (!(alphabetCode_ & WORD))
191  throw Exception("Word alphabet not supported.");
192  if (!AlphabetTools::isWordAlphabet(*alphabet))
193  throw Exception("BppOFrequencySetFormat::readFrequencySet(...).\n\t Bad alphabet type "
194  + alphabet->getAlphabetType() + " for frequencies set " + freqName + ".");
195 
196  auto pWA = dynamic_pointer_cast<const WordAlphabet>(alphabet);
197 
198  if (!pWA)
199  throw Exception("BppOFrequencySetFormat::read : Word freq name is from WordAlphabet.");
200 
201  if (args.find("frequency") != args.end())
202  {
203  string sAFS = args["frequency"];
204 
205  unsigned int nbfreq = pWA->getLength();
206  string st = "";
207  for (unsigned i = 0; i < nbfreq; ++i)
208  {
209  st += TextTools::toString(i + 1);
210  }
211 
212  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
213  auto pFS2 = nestedReader.readFrequencySet(pWA->getNAlphabet(0), sAFS, mData, nData, false);
214  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
215  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
216  {
217  unparsedArguments_[st + "_" + it->first] = it->second;
218  }
219  pFS = make_unique<WordFromUniqueFrequencySet>(pWA, std::move(pFS2));
220  }
221  else
222  {
223  if (args.find("frequency1") == args.end())
224  throw Exception("BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set 'Word'.");
225  vector<string> v_sAFS;
226  vector<unique_ptr<FrequencySetInterface>> v_AFS;
227  unsigned int nbfreq = 1;
228 
229  while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
230  {
231  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
232  }
233 
234  if (v_sAFS.size() != pWA->getLength())
235  throw Exception("BppOFrequencySetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") does not match length of the words (" + TextTools::toString(pWA->getLength()) + ")");
236 
237  for (size_t i = 0; i < v_sAFS.size(); ++i)
238  {
239  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
240  pFS = nestedReader.readFrequencySet(pWA->getNAlphabet(i), v_sAFS[i], mData, nData, false);
241  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
242  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
243  {
244  unparsedArguments_[TextTools::toString(i + 1) + "_" + it->first] = it->second;
245  }
246  v_AFS.push_back(std::move(pFS));
247  }
248 
249  pFS = make_unique<WordFromIndependentFrequencySet>(pWA, v_AFS);
250  }
251  }
252  // From Model
253  else if (freqName == "FromModel")
254  {
255  if (args.find("model") == args.end())
256  throw Exception("Missing argument 'model' for frequencies " + freqName + ".");
257 
258  BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
259  if (geneticCode_)
260  nestedReader.setGeneticCode(geneticCode_);
261 
262  auto model = nestedReader.readTransitionModel(alphabet, args["model"], mData, nData, false);
263  pFS = make_unique<FromModelFrequencySet>(std::move(model));
264  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
265  for (auto& it : unparsedParameterValuesNested)
266  {
267  unparsedArguments_["FromModel." + it.first] = it.second;
268  }
269  }
270 
271 
272  // INDEPENDENT CODON
273  else if (freqName == "Codon")
274  {
275  if (!(alphabetCode_ & CODON))
276  throw Exception("Codon alphabet not supported.");
277  if (!AlphabetTools::isCodonAlphabet(*alphabet))
278  throw Exception("BppOFrequencySetFormat::read.\n\t Bad alphabet type "
279  + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
280  if (!geneticCode_)
281  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
282 
283  auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
284 
285  if (args.find("frequency") != args.end())
286  {
287  string sAFS = args["frequency"];
288 
289  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
290  auto pFS2 = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
291  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
292 
293  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
294  {
295  unparsedArguments_["123_" + it->first] = it->second;
296  }
297 
298  pFS = make_unique<CodonFromUniqueFrequencySet>(geneticCode_, std::move(pFS2), "Codon");
299  }
300  else
301  {
302  if (args.find("frequency1") == args.end())
303  throw Exception("BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set.");
304  vector<string> v_sAFS;
305  vector<unique_ptr<FrequencySetInterface>> v_AFS;
306  unsigned int nbfreq = 1;
307 
308  while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
309  {
310  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
311  }
312 
313  if (v_sAFS.size() != 3)
314  throw Exception("BppOFrequencySetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") is not three");
315 
316  for (size_t i = 0; i < v_sAFS.size(); ++i)
317  {
318  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
319  pFS = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), v_sAFS[i], mData, nData, false);
320  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
321  for (auto& it : unparsedParameterValuesNested)
322  {
323  unparsedArguments_[TextTools::toString(i + 1) + "_" + it.first] = it.second;
324  }
325  v_AFS.push_back(std::move(pFS));
326  }
327 
328  if (!geneticCode_)
329  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
330  pFS = make_unique<CodonFromIndependentFrequencySet>(geneticCode_, v_AFS, "Codon");
331  }
332  }
333 
334  // CODON PER AA Frequencies
335  else if (freqName == "FullPerAA")
336  {
337  if (!(alphabetCode_ & CODON))
338  throw Exception("Codon alphabet not supported.");
339  if (!AlphabetTools::isCodonAlphabet(*alphabet))
340  throw Exception("BppOFrequencySetFormat::read.\n\t Bad alphabet type "
341  + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
342 
343  if (!geneticCode_)
344  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
345 
346  auto pPA = dynamic_pointer_cast<const ProteicAlphabet>(geneticCode_->getTargetAlphabet());
347 
348  unsigned short method = 1;
349  if (args.find("method") != args.end())
350  {
351  if (args["method"] == "local")
352  method = 2;
353  else if (args["method"] == "binary")
354  method = 3;
355  }
356 
357  if (args.find("protein_frequencies") != args.end())
358  {
359  string sPFS = args["protein_frequencies"];
360  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
361  auto tmpPtr = nestedReader.readFrequencySet(pPA, sPFS, mData, nData, false);
362  unique_ptr<ProteinFrequencySetInterface> pPFS(dynamic_cast<ProteinFrequencySetInterface*>(tmpPtr.release()));
363  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
364 
365  for (auto& it : unparsedParameterValuesNested)
366  {
367  unparsedArguments_["FullPerAA." + it.first] = it.second;
368  }
369  pFS = make_unique<FullPerAACodonFrequencySet>(geneticCode_, std::move(pPFS), method);
370  }
371  else
372  pFS = make_unique<FullPerAACodonFrequencySet>(geneticCode_, method);
373  }
374 
375  // codeml frequencies syntax
376 
377  else if (AlphabetTools::isCodonAlphabet(*alphabet))
378  {
379  if (!(alphabetCode_ & CODON))
380  throw Exception("Codon alphabet not supported.");
381  if (!geneticCode_)
382  throw Exception("BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
383 
384  auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
385 
386  if (args.find("genetic_code") != args.end())
387  {
388  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
389  throw Exception("BppOFrequencySetFormat::read. Deprecated 'genetic_code' argument.");
390  }
391  short opt = -1;
392  string mgmtStopCodon = "quadratic";
393 
394  if (freqName == "F0")
395  {
397  }
398  else if (freqName == "F1X4")
399  {
401 
402  if (args.find("mgmtStopCodons") != args.end())
403  mgmtStopCodon = args["mgmtStopCodons"];
404  else if (args.find("mgmtStopCodon") != args.end())
405  mgmtStopCodon = args["mgmtStopCodon"];
406 
407  if (verbose_)
408  ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
409 
410  if (args.find("frequency") != args.end())
411  {
412  string sAFS = args["frequency"];
413 
414  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
415  auto pFS2 = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
416  if (pFS2->getName() != "Full")
417  throw Exception("BppOFrequencySetFormat::read. The frequency option in F1X4 can only be Full");
418 
419  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
420 
421  for (auto& it : unparsedParameterValuesNested)
422  {
423  unparsedArguments_["123_" + it.first] = it.second;
424  }
425  }
426 
427  if (args.find("123_theta") != args.end())
428  unparsedArguments_["123_Full.theta"] = args["123_theta"];
429  if (args.find("123_theta1") != args.end())
430  unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
431  if (args.find("123_theta2") != args.end())
432  unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
433  if (args.find("theta") != args.end())
434  unparsedArguments_["123_Full.theta"] = args["123_theta"];
435  if (args.find("theta1") != args.end())
436  unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
437  if (args.find("theta2") != args.end())
438  unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
439  }
440  else if (freqName == "F3X4")
441  {
443 
444  if (args.find("mgmtStopCodons") != args.end())
445  mgmtStopCodon = args["mgmtStopCodons"];
446  else if (args.find("mgmtStopCodon") != args.end())
447  mgmtStopCodon = args["mgmtStopCodon"];
448 
449  if (verbose_)
450  ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
451 
452  if (args.find("frequency1") != args.end() ||
453  args.find("frequency2") != args.end() ||
454  args.find("frequency3") != args.end())
455  {
456  vector<string> v_sAFS;
457 
458  for (unsigned int nbfreq = 1; nbfreq <= 3; ++nbfreq)
459  {
460  if (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
461  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq)]);
462  else
463  v_sAFS.push_back("");
464  }
465 
466  for (size_t i = 0; i < v_sAFS.size(); ++i)
467  {
468  BppOFrequencySetFormat nestedReader(alphabetCode_, false, warningLevel_);
469 
470  auto sAFS = v_sAFS[i];
471  if (sAFS != "")
472  {
473  pFS = nestedReader.readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData, false);
474  if (pFS->getName() != "Full")
475  throw Exception("BppOFrequencySetFormat::read. The frequency options in F3X4 can only be Full");
476 
477  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
478  for (auto& it : unparsedParameterValuesNested)
479  {
480  unparsedArguments_[TextTools::toString(i + 1) + "_" + it.first] = it.second;
481  }
482  }
483  }
484  }
485  for (unsigned int i = 1; i <= 3; ++i)
486  {
487  if (args.find(TextTools::toString(i) + "_theta") != args.end())
488  unparsedArguments_[TextTools::toString(i) + "_Full.theta"] = args[TextTools::toString(i) + "_theta"];
489  if (args.find(TextTools::toString(i) + "_theta1") != args.end())
490  unparsedArguments_[TextTools::toString(i) + "_Full.theta1"] = args[TextTools::toString(i) + "_theta1"];
491  if (args.find(TextTools::toString(i) + "_theta2") != args.end())
492  unparsedArguments_[TextTools::toString(i) + "_Full.theta2"] = args[TextTools::toString(i) + "_theta2"];
493  }
494  }
495  else if (freqName == "F61")
496  {
498  }
499  if (opt != -1)
500  {
501  unsigned short method = 1;
502  if (args.find("method") != args.end())
503  {
504  if (args["method"] == "local")
505  method = 2;
506  else if (args["method"] == "binary")
507  method = 3;
508  }
509  pFS = CodonFrequencySetInterface::getFrequencySetForCodons(opt, geneticCode_, mgmtStopCodon, method);
510  }
511  else
512  throw Exception("Unknown frequency option: " + freqName);
513  }
514 
515  // MVAprotein freq set for COaLA model
516  else if (freqName == "MVAprotein")
517  {
518  if (args.find("model") == args.end())
519  throw Exception("Missing argument 'model' for frequencies " + freqName + ".");
520 
521  BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
522 
523  auto model = nestedReader.readTransitionModel(alphabet, args["model"], mData, nData, false);
524  if (dynamic_cast<Coala*>(model.get())==0)
525  throw Exception("MVAprotein frequency set needs a Coala model");
526 
527  auto coala = unique_ptr<Coala>(dynamic_cast<Coala*>(model.release()));
528  //map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
529 
530  auto mvaFS = make_unique<MvaFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
531  //mvaFS->setParamValues(args);
532  mvaFS->initSet(*coala);
533 
534  pFS = std::move(mvaFS);
535  }
536  else
537  throw Exception("Unknown frequency option: " + freqName);
538 
539  // Update parameter args:
540  vector<string> pnames = pFS->getParameters().getParameterNames();
541 
542  string pref = pFS->getNamespace();
543  for (size_t i = 0; i < pnames.size(); i++)
544  {
545  string name = pFS->getParameterNameWithoutNamespace(pnames[i]);
546  if (args.find(name) != args.end())
547  unparsedArguments_[pref + name] = args[name];
548  }
549 
550  // Now look if some parameters are aliased:
551  ParameterList pl = pFS->getIndependentParameters();
552  string pname, pval, pname2;
553  for (size_t i = 0; i < pl.size(); i++)
554  {
555  pname = pFS->getParameterNameWithoutNamespace(pl[i].getName());
556 
557  if (args.find(pname) == args.end())
558  continue;
559  pval = args[pname];
560 
561  if (((pval.rfind("_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind("_") + 1, string::npos)))) ||
562  (pval.find("(") != string::npos))
563  continue;
564  bool found = false;
565  for (size_t j = 0; j < pl.size() && !found; j++)
566  {
567  pname2 = pFS->getParameterNameWithoutNamespace(pl[j].getName());
568 
569  if (j == i)
570  continue;
571  if (pval == pname2)
572  {
573  // This is an alias...
574  // NB: this may throw an exception if incorrect! We leave it as is for now :s
575  pFS->aliasParameters(pname2, pname);
576  if (verbose_)
577  ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
578  found = true;
579  }
580  }
581  // if (!TextTools::isDecimalNumber(pval) && !found)
582  // throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
583  }
584 
585  // Forward arguments:
586  if (args.find("init") != args.end())
587  unparsedArguments_["init"] = args["init"];
588 
589  if (args.find("init.observedPseudoCount") != args.end())
590  unparsedArguments_["init.observedPseudoCount"] = args["init.observedPseudoCount"];
591 
592  std::shared_ptr<const AlignmentDataInterface> data(0);
593  if (nData)
594  data = mData.at(nData);
595 
596  if (args.find("values") != args.end())
597  {
598  unparsedArguments_["init"] = "values" + args["values"];
599  initialize_(*pFS, data);
600  }
601  else if (parseArguments)
602  initialize_(*pFS, data);
603 
604  return pFS;
605 }
606 
608  const FrequencySetInterface& freqset,
609  OutputStream& out,
610  std::map<std::string, std::string>& globalAliases,
611  std::vector<std::string>& writtenNames) const
612 {
613  ParameterList pl = freqset.getParameters();
614  vector<string> vpl = pl.getParameterNames();
615 
616  for (auto& pn : vpl)
617  {
618  if (find(writtenNames.begin(), writtenNames.end(), pn) != writtenNames.end())
619  pl.deleteParameter(pn);
620  }
621 
622  if (pl.size() == 0)
623  return;
624 
625  int p = out.getPrecision();
626  out.setPrecision(12);
627  bool comma(false);
628  string name = freqset.getName();
629  out << name << "(";
630 
631  bool oValues = false;
632 
633  if ((name == "Fixed") || (name == "F0") || (name == "Full"))
634  {
635  vector<double> vf = freqset.getFrequencies();
636  out << "values=(";
637  for (size_t i = 0; i < vf.size(); ++i)
638  {
639  if (i != 0)
640  out << ", ";
641  out << vf[i];
642  }
643  out << ")";
644 
645  vector<string> npl = pl.getParameterNames();
646  writtenNames.insert(writtenNames.end(), npl.begin(), npl.end());
647 
648  comma = true;
649 
650  oValues = true;
651  }
652 
653  try
654  {
655  auto ufs = dynamic_cast<const UserFrequencySet&>(freqset);
656  out << "file=" << ufs.getPath();
657  size_t nCol = ufs.getColumnNumber();
658 
659  if (nCol != 1)
660  out << ", col=" << nCol;
661 
662  oValues = true;
663  }
664  catch (bad_cast&)
665  {}
666 
667  // For Word or Codon FS : length mgmt
668  try
669  {
670  auto pWFI = dynamic_cast<const WordFromIndependentFrequencySet&>(freqset);
671  if (name != "F3X4")
672  {
673  for (size_t i = 0; i < pWFI.getLength(); ++i)
674  {
675  if (i != 0)
676  out << ", ";
677  out << "frequency" << i + 1 << "=";
678  writeFrequencySet(pWFI.frequencySetForLetter(i), out, globalAliases, writtenNames);
679  }
680  comma = true;
681  }
682  }
683  catch (bad_cast&)
684  {}
685 
686  try
687  {
688  auto pWFU = dynamic_cast<const WordFromUniqueFrequencySet&>(freqset);
689  if (name != "F1X4")
690  {
691  for (size_t i = 0; i < pWFU.getLength(); ++i)
692  {
693  if (i != 0)
694  out << ", ";
695  out << "frequency=";
696  writeFrequencySet(pWFU.frequencySetForLetter(i), out, globalAliases, writtenNames);
697  }
698  comma = true;
699  }
700  }
701  catch (bad_cast&)
702  {}
703 
704  // FromModel
705 
706  try
707  {
708  auto pFMFS = dynamic_cast<const FromModelFrequencySet&>(freqset);
709  if (comma)
710  out << ", ";
711  out << "model=";
712 
713  BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, 0);
714 
715  bIO.write(*(pFMFS.getModel()), out, globalAliases, writtenNames);
716  comma = true;
717  }
718  catch (bad_cast&)
719  {}
720 
721 
722  // FullPerAA
723  try
724  {
725  auto pFPA = dynamic_cast<const FullPerAACodonFrequencySet&>(freqset);
726  out << "protein_frequencies=";
727 
728  if (pFPA.hasProteinFrequencySet())
729  {
730  writeFrequencySet(pFPA.proteinFrequencySet(), out, globalAliases, writtenNames);
731  }
732  else
733  {
734  out << "None";
735  }
736  comma = true;
737 
738  unsigned short meth = pFPA.getMethod();
739  if (meth > 1)
740  {
741  if (comma)
742  out << ",";
743  out << "method=" << ((meth == 2) ? "local" : "binary");
744  comma = true;
745  }
746  }
747  catch (bad_cast&)
748  {}
749 
750 
751  // method
752  try
753  {
754  size_t meth = dynamic_cast<const FullProteinFrequencySet&>(freqset).getMethod();
755  if (meth > 1)
756  {
757  if (comma)
758  out << ",";
759  out << "method=" << ((meth == 2) ? "local" : "binary");
760  comma = true;
761  }
762  }
763  catch (bad_cast&)
764  {}
765 
766  try
767  {
768  auto pF = dynamic_cast<const FullCodonFrequencySet&>(freqset);
769  unsigned short meth = pF.getMethod();
770  if (meth > 1)
771  {
772  if (comma)
773  out << ",";
774  out << "method=" << ((meth == 2) ? "local" : "binary");
775  comma = true;
776  }
777  }
778  catch (bad_cast&)
779  {}
780 
781  // mgmtStopCodon
782  try
783  {
784  auto pCFU = dynamic_cast<const CodonFromUniqueFrequencySet&>(freqset);
785  if (pCFU.getMgmtStopCodon() != "quadratic")
786  {
787  if (comma)
788  out << ",";
789  out << "mgmtStopCodons=" << pCFU.getMgmtStopCodon();
790  comma = true;
791  }
792  }
793  catch (bad_cast&)
794  {}
795 
796  try
797  {
798  auto pCFI = dynamic_cast<const CodonFromIndependentFrequencySet&>(freqset);
799  if (pCFI.getMgmtStopCodon() != "quadratic")
800  {
801  if (comma)
802  out << ",";
803  out << "mgmtStopCodons=" << pCFI.getMgmtStopCodon();
804  comma = true;
805  }
806  }
807  catch (bad_cast&)
808  {}
809 
810 // All remaining parameters
811  if (!oValues)
812  {
814  bIO.write(freqset, out, globalAliases, pl.getParameterNames(), writtenNames, true, comma);
815  }
816 
817  out << ")";
818  out.setPrecision(p);
819 }
820 
822  FrequencySetInterface& freqSet,
823  std::shared_ptr<const AlignmentDataInterface> data)
824 {
825  if (unparsedArguments_.find("init") != unparsedArguments_.end())
826  {
827  // Initialization using the "init" option
828  string init = unparsedArguments_["init"];
829 
830  if (init == "observed" && data)
831  {
832  unsigned int psc = 0;
833  if (unparsedArguments_.find("init.observedPseudoCount") != unparsedArguments_.end())
834  psc = TextTools::to<unsigned int>(unparsedArguments_["init.observedPseudoCount"]);
835 
836  map<int, double> freqs;
837  SequenceContainerTools::getFrequencies(*data, freqs, psc);
838 
840  }
841  else if (init.substr(0, 6) == "values")
842  {
843  // Initialization using the "values" argument
844  vector<double> frequencies;
845  string rf = init.substr(6);
846 
847  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
848  while (strtok.hasMoreToken())
849  frequencies.push_back(TextTools::toDouble(strtok.nextToken()));
850  freqSet.setFrequencies(frequencies);
851  }
852  else if (init == "balanced")
853  {
854  // Nothing to do here, this is the default instantiation.
855  }
856  else
857  throw Exception("Unknown init argument");
858 
859  unparsedArguments_.erase(unparsedArguments_.find("init"));
860  if (unparsedArguments_.find("init.observedPseudoCount") != unparsedArguments_.end())
861  unparsedArguments_.erase(unparsedArguments_.find("init.observedPseudoCount"));
862  }
863 
864  // Explicit initialization of each parameter
866 
867  for (size_t i = 0; i < pl.size(); ++i)
868  {
869  AutoParameter ap(pl[i]);
870  if (verbose_)
872  pl.setParameter(i, ap);
873  }
874 
875  for (size_t i = 0; i < pl.size(); ++i)
876  {
877  const string pName = pl[i].getName();
878 
879  try
880  {
881  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
882 
883  pl[i].setValue(value);
884 
885  if (unparsedArguments_.find(pName) != unparsedArguments_.end())
886  unparsedArguments_.erase(unparsedArguments_.find(pName));
887 
888  if (verbose_)
889  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
890  }
891  catch (Exception& e)
892  {}
893  }
894 
895  freqSet.matchParametersValues(pl);
896 }
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static std::shared_ptr< OutputStream > warning
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void initialize_(FrequencySetInterface &freqSet, std::shared_ptr< const AlignmentDataInterface > data)
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Substitution model I/O in BppO format.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true)
The Coala branch-heterogeneous amino-acid substitution model.
Definition: Coala.h:57
static std::unique_ptr< CodonFrequencySetInterface > getFrequencySetForCodons(short option, std::shared_ptr< const GeneticCode > gCode, const std::string &mgmtStopCodon="quadratic", unsigned short method=1)
A helper function that provide frequencies set for codon models according to PAML option.
the Frequencies in codons are the product of Independent Frequencies in letters with the frequencies ...
the Frequencies in codons are the product of the frequencies for a unique FrequencySet in letters,...
FrequencySet useful for homogeneous and stationary models, codon implementation.
Parametrize a set of state frequencies.
Definition: FrequencySet.h:29
virtual const Vdouble & getFrequencies() const =0
virtual void setFrequenciesFromAlphabetStatesFrequencies(const std::map< int, double > &frequencies)=0
Set the Frequencies from the one of the map which keys match with a letter of the Alphabet....
virtual std::string getName() const =0
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
FrequencySet defined from the equilibrium distribution of a given model.
Definition: FrequencySet.h:252
A generic FrequencySet for Full Codon alphabets.
unsigned short getMethod() const
FrequencySet integrating ProteinFrequencySet inside CodonFrequencySet. In this case,...
Protein FrequencySet using 19 independent parameters to model the 20 frequencies.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
virtual int getPrecision() const=0
virtual OutputStream & setPrecision(int digit)=0
virtual const ParameterList & getIndependentParameters() const=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void setParameter(size_t index, const Parameter &param)
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual const ParameterList & getParameters() const=0
Parametrize a set of state frequencies for proteins.
static void getFrequencies(const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
const std::string & nextToken()
bool hasMoreToken() const
FrequencySet to be read in a file. More specifically, a frequency set is read in a column of a given ...
Definition: FrequencySet.h:386
the Frequencies in words are the product of Independent Frequencies in letters
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.