bpp-phyl3  3.0.0
BppOSubstitutionModelFormat.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 #include <Bpp/Io/FileTools.h>
8 #include <Bpp/Io/OutputStream.h>
11 #include <Bpp/Text/KeyvalTools.h>
13 #include <Bpp/Text/TextTools.h>
14 
15 #include "../App/PhylogeneticsApplicationTools.h"
16 #include "../Model/AbstractBiblioMixedTransitionModel.h"
17 #include "../Model/BinarySubstitutionModel.h"
18 #include "../Model/D1WalkSubstitutionModel.h"
19 #include "../Model/Codon/AbstractCodonAAFitnessSubstitutionModel.h"
20 #include "../Model/Codon/AbstractCodonAARateSubstitutionModel.h"
21 #include "../Model/Codon/AbstractCodonBGCSubstitutionModel.h"
22 #include "../Model/Codon/AbstractCodonClusterAASubstitutionModel.h"
23 #include "../Model/Codon/AbstractCodonCpGSubstitutionModel.h"
24 #include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
25 #include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
26 #include "../Model/Codon/AbstractCodonFitnessSubstitutionModel.h"
27 #include "../Model/Codon/AbstractCodonFrequenciesSubstitutionModel.h"
28 #include "../Model/Codon/AbstractCodonPhaseFrequenciesSubstitutionModel.h"
29 #include "../Model/Codon/CodonAdHocSubstitutionModel.h"
30 #include "../Model/Codon/CodonSameAARateSubstitutionModel.h"
31 #include "../Model/Codon/DFP07.h"
32 #include "../Model/Codon/GY94.h"
33 #include "../Model/Codon/KCM.h"
34 #include "../Model/Codon/KroneckerCodonDistanceFrequenciesSubstitutionModel.h"
35 #include "../Model/Codon/KroneckerCodonDistanceSubstitutionModel.h"
36 #include "../Model/Codon/MG94.h"
37 #include "../Model/Codon/SENCA.h"
38 #include "../Model/Codon/TripletSubstitutionModel.h"
39 #include "../Model/Codon/YN98.h"
40 #include "../Model/Codon/YNGP_M10.h"
41 #include "../Model/Codon/YNGP_M7.h"
42 #include "../Model/Codon/YNGP_M8.h"
43 #include "../Model/Codon/YNGP_M9.h"
44 #include "../Model/EquiprobableSubstitutionModel.h"
45 #include "../Model/FromMixtureSubstitutionModel.h"
46 #include "../Model/G2001.h"
47 #include "../Model/InMixedSubstitutionModel.h"
48 #include "../Model/KroneckerWordSubstitutionModel.h"
49 #include "../Model/MixtureOfATransitionModel.h"
50 #include "../Model/MixtureOfTransitionModels.h"
51 #include "../Model/Nucleotide/F81.h"
52 #include "../Model/Nucleotide/F84.h"
53 #include "../Model/Nucleotide/GTR.h"
54 #include "../Model/Nucleotide/HKY85.h"
55 #include "../Model/Nucleotide/JCnuc.h"
56 #include "../Model/Nucleotide/K80.h"
57 #include "../Model/Nucleotide/L95.h"
58 #include "../Model/Nucleotide/NucleotideSubstitutionModel.h"
59 #include "../Model/Nucleotide/RN95.h"
60 #include "../Model/Nucleotide/RN95s.h"
61 #include "../Model/Nucleotide/SSR.h"
62 #include "../Model/Nucleotide/T92.h"
63 #include "../Model/Nucleotide/TN93.h"
64 #include "../Model/Nucleotide/YpR.h"
65 #include "../Model/Nucleotide/gBGC.h"
66 #include "../Model/OneChangeRegisterTransitionModel.h"
67 #include "../Model/OneChangeTransitionModel.h"
68 #include "../Model/POMO.h"
69 #include "../Model/Protein/Coala.h"
70 #include "../Model/Protein/CoalaCore.h"
71 #include "../Model/Protein/DSO78.h"
72 #include "../Model/Protein/JCprot.h"
73 #include "../Model/Protein/JTT92.h"
74 #include "../Model/Protein/LG08.h"
75 #include "../Model/Protein/LGL08_CAT.h"
76 #include "../Model/Protein/ProteinSubstitutionModel.h"
77 #include "../Model/Protein/UserProteinSubstitutionModel.h"
78 #include "../Model/Protein/WAG01.h"
79 #include "../Model/RE08.h"
80 #include "../Model/RegisterRatesSubstitutionModel.h"
81 #include "../Model/TS98.h"
82 #include "../Model/TwoParameterBinarySubstitutionModel.h"
83 #include "../Model/WordSubstitutionModel.h"
84 #include "BppOFrequencySetFormat.h"
88 
89 // From Numeric
90 
93 
94 
96 
97 
98 using namespace bpp;
99 
100 // From the STL:
101 #include <iomanip>
102 #include <set>
103 #include <memory>
104 
105 using namespace std;
106 
107 unsigned char BppOSubstitutionModelFormat::DNA = 1;
108 unsigned char BppOSubstitutionModelFormat::RNA = 2;
109 unsigned char BppOSubstitutionModelFormat::NUCLEOTIDE = 1 | 2;
110 unsigned char BppOSubstitutionModelFormat::PROTEIN = 4;
111 unsigned char BppOSubstitutionModelFormat::CODON = 8;
112 unsigned char BppOSubstitutionModelFormat::WORD = 16;
113 unsigned char BppOSubstitutionModelFormat::BINARY = 32;
114 unsigned char BppOSubstitutionModelFormat::INTEGER = 64;
115 unsigned char BppOSubstitutionModelFormat::ALL = 1 | 2 | 4 | 8 | 16 | 32 | 64;
116 
117 
118 unique_ptr<SubstitutionModelInterface> BppOSubstitutionModelFormat::readSubstitutionModel(
119  shared_ptr<const Alphabet> alphabet,
120  const string& modelDescription,
121  const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
122  size_t nData,
123  bool parseArguments)
124 {
125  unparsedArguments_.clear();
126  unique_ptr<SubstitutionModelInterface> model;
127  string modelName = "";
128  map<string, string> args;
129  KeyvalTools::parseProcedure(modelDescription, modelName, args);
130 
131 
132  // ///////////////////////////////
133  // LINKED WITH MIXTURES
134  // //////////////////////////////
135 
136  if (modelName == "InMixed")
137  {
138  if (args.find("model") == args.end())
139  throw Exception("'model' argument missing to define the InMixed model.");
140 
141  string nameMod = "";
142  size_t numMod = 0;
143 
144  if (args.find("numMod") == args.end())
145  {
146  if (args.find("nameMod") == args.end())
147  throw Exception("'numMod' and 'nameMod' arguments missing to define the InMixed submodel.");
148  else
149  nameMod = args["nameMod"];
150  }
151  else
152  numMod = (size_t)TextTools::toInt(args["numMod"]);
153 
154  string modelDesc2 = args["model"];
155  BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
156  nestedReader.setGeneticCode(geneticCode_); // This uses the same instance as the o
157 
158  unique_ptr<TransitionModelInterface> nestedModelTmp = nestedReader.readTransitionModel(alphabet, modelDesc2, mData, nData, false);
159  unique_ptr<MixedTransitionModelInterface> nestedModel(dynamic_cast<MixedTransitionModelInterface*>(nestedModelTmp.release()));
160 
161  // Check that nestedModel is fine and has subModel of given name
162  // or number
163 
164  if (!nestedModel)
165  throw Exception("Unknown mixed model " + modelDesc2 + ".");
166 
167  if (nameMod != "")
168  {
169  try
170  {
171  nestedModel->model(nameMod);
172  }
173  catch (NullPointerException&)
174  {
175  throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'InMixed' has no submodel with name " + nameMod + ".");
176  }
177  model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), nameMod, modelDesc2);
178  }
179  else
180  {
181  if (numMod == 0 || (nestedModel->getNumberOfModels() < numMod))
182  throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'InMixed' has no submodel with number " + TextTools::toString(numMod) + ".");
183  model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), numMod - 1, modelDesc2);
184  }
185 
186  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
187  for (auto& it :unparsedParameterValuesNested)
188  {
189  unparsedArguments_[it.first] = it.second;
190  }
191 
192  if (verbose_)
193  ApplicationTools::displayResult("Number of submodel", TextTools::toString(numMod));
194  }
195 
196 
197  else if (modelName == "FromRegister")
198  {
199  // We have to parse the nested model first:
200  if (args.find("model") == args.end())
201  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'FromRegister'.");
202 
203  string nestedModelDescription = args["model"];
204  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
205  if (geneticCode_)
206  nestedReader.setGeneticCode(geneticCode_);
207 
208  auto nestedModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
209  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
210 
211  // We look for the register:
212  if (args.find("register") == args.end())
213  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
214 
215  string registerDescription = args["register"];
216  shared_ptr<AlphabetIndex2> weights = nullptr;
217  shared_ptr<AlphabetIndex2> distances = nullptr;
218 
219  auto reg = PhylogeneticsApplicationTools::getSubstitutionRegister(registerDescription, nestedModel->getStateMap(), geneticCode_, weights, distances, verbose_);
220 
221  // is it normalized (default : false)
222  bool isNorm = false;
223 
224  if (args.find("isNormalized") != args.end() && args["isNormalized"] == "true")
225  isNorm = true;
226 
227  model = make_unique<RegisterRatesSubstitutionModel>(std::move(nestedModel), *reg, isNorm);
228 
229  if (TextTools::isEmpty(registerDescription))
230  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
231 
232  // Then we update the parameter set:
233  for (auto& it : unparsedParameterValuesNested)
234  {
235  unparsedArguments_["FromRegister." + it.first] = it.second;
236  }
237  }
238 
239  else if (modelName == "POMO")
240  {
241  auto allelic = dynamic_pointer_cast<const AllelicAlphabet>(alphabet);
242  if (!allelic)
243  throw Exception("BppOSubstitutionModelFormat;;read. POMO model with no allelic alphabet.");
244 
245  // We have to parse the nested model first:
246  if (args.find("model") == args.end())
247  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'POMO'.");
248 
249  map<string, string> unparsedParameterValuesNested;
250  // fitness
251  unique_ptr<FrequencySetInterface> nestedFreq(nullptr);
252 
253  if (args.find("fitness") != args.end())
254  {
255  string nestedFreqDescription = args["fitness"];
256  BppOFrequencySetFormat nestedFreqReader(ALL, false, warningLevel_);
257 
258  nestedFreq = nestedFreqReader.readFrequencySet(allelic->getStateAlphabet(), nestedFreqDescription, mData, nData, false);
259  unparsedParameterValuesNested = nestedFreqReader.getUnparsedArguments();
260 
261  for (auto& it : unparsedParameterValuesNested)
262  {
263  unparsedParameterValuesNested["fit_" + it.first] = it.second;
264  }
265  }
266 
267  // model
268 
269  string nestedModelDescription = args["model"];
270  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
271  nestedReader.setGeneticCode(geneticCode_);
272 
273  auto nestedModel = nestedReader.readSubstitutionModel(allelic->getStateAlphabet(), nestedModelDescription, mData, nData, false);
274  unparsedParameterValuesNested.insert(nestedReader.getUnparsedArguments().begin(),
275  nestedReader.getUnparsedArguments().end());
276 
277 
278  model = make_unique<POMO>(allelic, std::move(nestedModel), std::move(nestedFreq));
279 
280  // Then we update the parameter set:
281  for (auto& it : unparsedParameterValuesNested)
282  {
283  unparsedArguments_["POMO." + it.first] = it.second;
284  }
285  }
286 
287 
288  // /////////////////////////////////
289  // / WORDS and CODONS
290  // ///////////////////////////////
291 
292  else if ((modelName == "Word") || (modelName.substr(0, 4) == "Kron") || (modelName == "Triplet") || (modelName.substr(0, 5) == "Codon") || (modelName == "SENCA") )
293  model = readWord_(alphabet, modelDescription, mData, nData);
294 
295 
296  // //////////////////
297  // PREDEFINED CODON MODELS
298 
299  else if (((modelName == "MG94") || (modelName == "YN98") || (modelName == "YNGP_M0") ||
300  (modelName == "GY94") || (modelName.substr(0, 3) == "KCM") || (modelName == "SameAARate"))
301  && (alphabetCode_ & CODON))
302  {
303  if (!(alphabetCode_ & CODON))
304  throw Exception("BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
305  if (!geneticCode_)
306  throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). No genetic code specified! Consider using 'setGeneticCode'.");
307 
308  if (!AlphabetTools::isCodonAlphabet(*alphabet))
309  throw Exception("Alphabet should be Codon Alphabet.");
310 
311  auto pCA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
312 
313  if (args.find("genetic_code") != args.end())
314  {
315  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
316  throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
317  }
318 
319  if (geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
320  throw Exception("Mismatch between genetic code and codon alphabet");
321 
322  unique_ptr<CodonFrequencySetInterface> codonFreqs(nullptr);
323 
324  if (args.find("frequencies") != args.end())
325  {
326  string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "F0", "", true, warningLevel_);
327  BppOFrequencySetFormat freqReader(BppOFrequencySetFormat::ALL, false, warningLevel_);
328  freqReader.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model.
329 
330  auto tmpFreqs = freqReader.readFrequencySet(pCA, freqOpt, mData, nData);
331  codonFreqs = unique_ptr<CodonFrequencySetInterface>(dynamic_cast<CodonFrequencySetInterface*>(tmpFreqs.release()));
332  for (const auto& it : freqReader.getUnparsedArguments())
333  {
334  unparsedArguments_[modelName + "." + it.first] = it.second;
335  }
336  }
337  else
338  // codonFreqs compulsory for all models but SameAARate
339  if (modelName != "SameAARate")
340  throw Exception("Missing 'frequencies' for model " + modelName);
341 
342  if (modelName == "MG94")
343  model = make_unique<MG94>(geneticCode_, std::move(codonFreqs));
344  else if (modelName == "GY94")
345  model = make_unique<GY94>(geneticCode_, std::move(codonFreqs));
346  else if ((modelName == "YN98") || (modelName == "YNGP_M0"))
347  model = make_unique<YN98>(geneticCode_, std::move(codonFreqs));
348  else if (modelName == "KCM7")
349  model = make_unique<KCM>(geneticCode_, true);
350  else if (modelName == "KCM19")
351  model = make_unique<KCM>(geneticCode_, false);
352  else if (modelName == "SameAARate")
353  {
354  if (args.find("protmodel") == args.end())
355  throw Exception("Missing 'protmodel in model " + modelName + ".");
356 
357  BppOSubstitutionModelFormat nestedProtReader(PROTEIN, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
358  auto tmpProtModel = nestedProtReader.readSubstitutionModel(geneticCode_->getTargetAlphabet(), args["protmodel"], mData, nData, false);
359  auto nestedProtModel = unique_ptr<ProteinSubstitutionModelInterface>(dynamic_cast<ProteinSubstitutionModelInterface*>(tmpProtModel.release()));
360 
361  auto unparsedParameterValuesNested = nestedProtReader.getUnparsedArguments();
362  unparsedArguments_.insert(unparsedParameterValuesNested.begin(), unparsedParameterValuesNested.end());
363 
364  if (args.find("codonmodel") == args.end())
365  throw Exception("Missing 'codonmodel in model " + modelName + ".");
366 
367  BppOSubstitutionModelFormat nestedCodonReader(CODON, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
368  nestedCodonReader.setGeneticCode(geneticCode_); // This uses the same instance as the o
369 
370  auto tmpSubstitutionModel = nestedCodonReader.readSubstitutionModel(alphabet, args["codonmodel"], mData, nData, false);
371  auto nestedCodonModel = unique_ptr<CodonSubstitutionModelInterface>(dynamic_cast<CodonSubstitutionModelInterface*>(tmpSubstitutionModel.release()));
372 
373  unparsedParameterValuesNested = nestedCodonReader.getUnparsedArguments();
374  for (const auto& it:unparsedParameterValuesNested)
375  {
376  unparsedArguments_["SameAARate." + it.first] = it.second;
377  }
378 
379  model = make_unique<CodonSameAARateSubstitutionModel>(std::move(nestedProtModel), std::move(nestedCodonModel), std::move(codonFreqs), geneticCode_);
380  }
381  else
382  throw Exception("Unknown Codon model: " + modelName);
383  }
384 
385 
386  // //////////////////////////////////
387  // YpR
388  // //////////////////////////////////
389 
390  else if (modelName == "YpR_Sym")
391  {
392  if (!(alphabetCode_ & NUCLEOTIDE))
393  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
394  if (!AlphabetTools::isRNYAlphabet(*alphabet))
395  throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
396  auto prny = dynamic_pointer_cast<const RNY>(alphabet);
397 
398  string nestedModelDescription = args["model"];
399  if (TextTools::isEmpty(nestedModelDescription))
400  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_sym'.");
401  if (verbose_)
402  ApplicationTools::displayResult("Symmetric YpR model", modelName);
403  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
404  auto tmpModel = nestedReader.readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData, false);
405  unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
406  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
407  for (auto& it : unparsedParameterValuesNested)
408  {
409  unparsedArguments_["YpR_Sym." + it.first] = it.second;
410  }
411 
412  model = make_unique<YpR_Sym>(prny, std::move(nestedModel));
413  }
414  else if (modelName == "YpR_Gen")
415  {
416  if (!(alphabetCode_ & NUCLEOTIDE))
417  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
418  if (!AlphabetTools::isRNYAlphabet(*alphabet))
419  throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
420  auto prny = dynamic_pointer_cast<const RNY>(alphabet);
421 
422  string nestedModelDescription = args["model"];
423  if (TextTools::isEmpty(nestedModelDescription))
424  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_gen'.");
425  if (verbose_)
426  ApplicationTools::displayResult("General YpR model", modelName);
427  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
428  auto tmpModel = nestedReader.readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData, false);
429  unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
430  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
431 
432  for (auto& it : unparsedParameterValuesNested)
433  {
434  unparsedArguments_["YpR_Gen." + it.first] = it.second;
435  }
436 
437  model = make_unique<YpR_Gen>(prny, std::move(nestedModel));
438  }
439 
440 
441  // /////////////////////////////////
442  // / RE08
443  // ///////////////////////////////
444 
445  else if (modelName == "RE08")
446  {
447  if (!allowGaps_)
448  throw Exception("BppOSubstitutionModelFormat::read. No Gap model allowed here.");
449 
450  // We have to parse the nested model first:
451  string nestedModelDescription = args["model"];
452  if (TextTools::isEmpty(nestedModelDescription))
453  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'RE08'.");
454  if (verbose_)
455  ApplicationTools::displayResult("Gap model", modelName);
456  BppOSubstitutionModelFormat nestedReader(ALL, allowCovarions_, false, false, verbose_, warningLevel_);
457  if (geneticCode_)
458  nestedReader.setGeneticCode(geneticCode_);
459  auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
460 
461  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
462 
463  // Now we create the RE08 substitution model:
464  if (AlphabetTools::isNucleicAlphabet(*alphabet))
465  {
466  if (!(alphabetCode_ & NUCLEOTIDE))
467  throw Exception("BppOSubstitutionModelFormat::read. Nucleic alphabet not supported.");
468  unique_ptr<NucleotideReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideReversibleSubstitutionModelInterface*>(tmpModel.release()));
469  model = make_unique<RE08Nucleotide>(std::move(nestedModel));
470  if (!model)
471  throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'nucleotide'.");
472  }
473  else if (AlphabetTools::isProteicAlphabet(*alphabet))
474  {
475  if (!(alphabetCode_ & PROTEIN))
476  throw Exception("BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
477  unique_ptr<ProteinReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ProteinReversibleSubstitutionModelInterface*>(tmpModel.release()));
478  model = make_unique<RE08Protein>(std::move(nestedModel));
479  if (!model)
480  throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'protein'.");
481  }
482  else if (AlphabetTools::isCodonAlphabet(*alphabet))
483  {
484  if (!(alphabetCode_ & CODON))
485  throw Exception("BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
486  unique_ptr<CodonReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<CodonReversibleSubstitutionModelInterface*>(tmpModel.release()));
487  model = make_unique<RE08Codon>(std::move(nestedModel));
488  if (!model)
489  throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'codon'.");
490  }
491  else
492  {
493  unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
494  model = make_unique<RE08>(std::move(nestedModel));
495  }
496 
497  // Then we update the parameter set:
498  for (auto& it : unparsedParameterValuesNested)
499  {
500  unparsedArguments_["RE08.model_" + it.first] = it.second;
501  }
502  }
503 
504  // /////////////////////////////////
505  // / TS98
506  // ///////////////////////////////
507 
508  else if (modelName == "TS98")
509  {
510  if (!allowCovarions_)
511  throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
512 
513  // We have to parse the nested model first:
514  string nestedModelDescription = args["model"];
515  if (TextTools::isEmpty(nestedModelDescription))
516  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'TS98'.");
517  if (verbose_)
518  ApplicationTools::displayResult("Covarion model", modelName);
519  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, false, warningLevel_);
520  if (geneticCode_)
521  nestedReader.setGeneticCode(geneticCode_);
522  auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
523  unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
524  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
525 
526  // Now we create the TS98 substitution model:
527  model = make_unique<TS98>(std::move(nestedModel));
528 
529  // Then we update the parameter set:
530  for (auto& it : unparsedParameterValuesNested)
531  {
532  unparsedArguments_["TS98.model_" + it.first] = it.second;
533  }
534  }
535 
536  // /////////////////////////////////
537  // / G01
538  // ///////////////////////////////
539 
540  else if (modelName == "G01")
541  {
542  if (!allowCovarions_)
543  throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
544 
545  // We have to parse the nested model first:
546  string nestedModelDescription = args["model"];
547  if (TextTools::isEmpty(nestedModelDescription))
548  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'G01'.");
549  string nestedRateDistDescription = args["rdist"];
550  if (TextTools::isEmpty(nestedRateDistDescription))
551  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'rdist' for model 'G01'.");
552  if (verbose_)
553  ApplicationTools::displayResult("Covarion model", modelName);
554  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
555  if (geneticCode_)
556  nestedReader.setGeneticCode(geneticCode_);
557  auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
558  unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
559  map<string, string> unparsedParameterValuesNestedModel(nestedReader.getUnparsedArguments());
560 
561  BppORateDistributionFormat rateReader(false);
562  auto nestedRDist = rateReader.readDiscreteDistribution(nestedRateDistDescription, false);
563  map<string, string> unparsedParameterValuesNestedDist(rateReader.getUnparsedArguments());
564 
565  // Now we create the G01 substitution model:
566  model = make_unique<G2001>(std::move(nestedModel), std::move(nestedRDist));
567 
568  // Then we update the parameter set:
569  for (auto& it : unparsedParameterValuesNestedModel)
570  {
571  unparsedArguments_["G01.model_" + it.first] = it.second;
572  }
573  for (auto& it : unparsedParameterValuesNestedDist)
574  {
575  unparsedArguments_["G01.rdist_" + it.first] = it.second;
576  }
577  }
578 
586 
587  else
588  {
589  // This is a 'simple' model...
590 
593  if (modelName == "Equi")
594  model.reset(new EquiprobableSubstitutionModel(alphabet));
596  // nucleotidic model
597  else if (AlphabetTools::isNucleicAlphabet(*alphabet))
598  {
599  if (!(alphabetCode_ & NUCLEOTIDE))
600  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
601  auto alpha = dynamic_pointer_cast<const NucleicAlphabet>(alphabet);
602 
603  // //////////////////////////////////
604  // gBGC
605  // //////////////////////////////////
606  if (modelName.find("+gBGC") != string::npos)
607  {
608  string subModName = modelName.substr(0, modelName.find("+gBGC"));
609 
610  if (verbose_)
611  ApplicationTools::displayResult("Biased gene conversion for", subModName);
612 
613  string nestedModelDescription = subModName;
614 
615  if (modelDescription.find_first_of("(") != string::npos)
616  {
617  string::size_type begin = modelDescription.find_first_of("(");
618  string::size_type end = modelDescription.find_last_of(")");
619 
620  nestedModelDescription += modelDescription.substr(begin, end - begin + 1);
621  }
622 
623  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, true, true, false, verbose_, warningLevel_);
624  auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
625  unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
626  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
627 
628  // Now we create the gBGC substitution model:
629  model = make_unique<gBGC>(alpha, std::move(nestedModel));
630 
631  // Then we update the parameter set:
632  for (auto& it : unparsedParameterValuesNested)
633  {
634  unparsedArguments_["gBGC." + it.first] = it.second;
635  }
636  }
637 
638 
639  // /////////////////////////////////
640  // / GTR
641  // ///////////////////////////////
642 
643  else if (modelName == "GTR")
644  {
645  model = make_unique<GTR>(alpha);
646  }
647 
648 
649  // /////////////////////////////////
650  // / SSR
651  // ///////////////////////////////
652 
653  else if (modelName == "SSR")
654  {
655  model = make_unique<SSR>(alpha);
656  }
657 
658  // /////////////////////////////////
659  // / L95
660  // ///////////////////////////////
661 
662  else if (modelName == "L95")
663  {
664  model = make_unique<L95>(alpha);
665  }
666 
667  // /////////////////////////////////
668  // / RN95
669  // ///////////////////////////////
670 
671  else if (modelName == "RN95")
672  {
673  model = make_unique<RN95>(alpha);
674  }
675 
676  // /////////////////////////////////
677  // / RN95s
678  // ///////////////////////////////
679 
680  else if (modelName == "RN95s")
681  {
682  model = make_unique<RN95s>(alpha);
683  }
684 
685  // /////////////////////////////////
686  // / TN93
687  // //////////////////////////////
688 
689  else if (modelName == "TN93")
690  {
691  model = make_unique<TN93>(alpha);
692  }
693 
694  // /////////////////////////////////
695  // / HKY85
696  // ///////////////////////////////
697 
698  else if (modelName == "HKY85")
699  {
700  model = make_unique<HKY85>(alpha);
701  }
702 
703  // /////////////////////////////////
704  // / F81
705  // ///////////////////////////////
706 
707  else if (modelName == "F81")
708  {
709  model = make_unique<F81>(alpha);
710  }
711  // /////////////////////////////////
712  // / F84
713  // ///////////////////////////////
714 
715  else if (modelName == "F84")
716  {
717  model = make_unique<F84>(alpha);
718  }
719 
720  // /////////////////////////////////
721  // / T92
722  // ///////////////////////////////
723 
724  else if (modelName == "T92")
725  {
726  model = make_unique<T92>(alpha);
727  }
728 
729  // /////////////////////////////////
730  // / K80
731  // ///////////////////////////////
732 
733  else if (modelName == "K80")
734  {
735  model = make_unique<K80>(alpha);
736  }
737 
738 
739  // /////////////////////////////////
740  // / JC69
741  // ///////////////////////////////
742 
743  else if (modelName == "JC69")
744  {
745  model = make_unique<JCnuc>(alpha);
746  }
747  else
748  throw Exception("Model '" + modelName + "' unknown, or does not fit nucleic alphabet.");
749  }
750  else if (AlphabetTools::isProteicAlphabet(*alphabet))
751  {
754 
755  if (!(alphabetCode_ & PROTEIN))
756  throw Exception("BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
757  auto alpha = dynamic_pointer_cast<const ProteicAlphabet>(alphabet);
758 
759  if (modelName.find("+F") != string::npos)
760  {
761  string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "Full", "", true, warningLevel_);
762  BppOFrequencySetFormat freqReader(BppOFrequencySetFormat::ALL, false, warningLevel_);
763  auto tmpFreq = freqReader.readFrequencySet(alpha, freqOpt, mData, nData, true);
764  unique_ptr<ProteinFrequencySetInterface> protFreq(dynamic_cast<ProteinFrequencySetInterface*>(tmpFreq.release()));
765 
766  map<string, string> unparsedParameterValuesNested(freqReader.getUnparsedArguments());
767 
768  for (auto& it : unparsedParameterValuesNested)
769  {
770  unparsedArguments_[modelName + "." + it.first] = it.second;
771  }
772 
773  if (modelName == "JC69+F")
774  model = make_unique<JCprot>(alpha, std::move(protFreq));
775  else if (modelName == "DSO78+F")
776  model = make_unique<DSO78>(alpha, std::move(protFreq));
777  else if (modelName == "JTT92+F")
778  model = make_unique<JTT92>(alpha, std::move(protFreq));
779  else if (modelName == "LG08+F")
780  model = make_unique<LG08>(alpha, std::move(protFreq));
781  else if (modelName == "WAG01+F")
782  model = make_unique<WAG01>(alpha, std::move(protFreq));
783  else if (modelName == "Empirical+F")
784  {
785  string prefix = args["name"];
786  if (TextTools::isEmpty(prefix))
787  throw Exception("'name' argument missing for user-defined substitution model.");
788  string fname = args["file"];
789  if (TextTools::isEmpty(fname))
790  throw Exception("'file' argument missing for user-defined substitution model.");
791  model = make_unique<UserProteinSubstitutionModel>(alpha, args["file"], std::move(protFreq), prefix + "+F.");
792  }
793  }
794  else if (modelName == "JC69")
795  model = make_unique<JCprot>(alpha);
796  else if (modelName == "DSO78")
797  model = make_unique<DSO78>(alpha);
798  else if (modelName == "JTT92")
799  model = make_unique<JTT92>(alpha);
800  else if (modelName == "LG08")
801  model = make_unique<LG08>(alpha);
802  else if (modelName == "WAG01")
803  model = make_unique<WAG01>(alpha);
804  // submodels of mixture models
805  else if (modelName.substr(0, 9) == "LGL08_CAT")
806  {
807  string subModelName = modelName.substr(10);
808 
809  size_t posp = modelDescription.find("(");
810 
811  string modelDesc2 = modelName.substr(0, 9) + modelDescription.substr(posp);
812 
813  BppOTransitionModelFormat nestedReader(PROTEIN, true, true, false, verbose_, warningLevel_);
814 
815  auto tmpModel = nestedReader.readTransitionModel(alphabet, modelDesc2, mData, nData, false);
816  auto nestedModel = unique_ptr<MixedTransitionModelInterface>(dynamic_cast<MixedTransitionModelInterface*>(tmpModel.release()));
817 
818  // Check that nestedModel is fine and has subModel of given name
819  if (!nestedModel)
820  throw Exception("Unknown model " + modelName + ".");
821 
822  try
823  {
824  nestedModel->model(subModelName);
825  }
826  catch (NullPointerException&)
827  {
828  throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'FromMixture' has no submodel with name " + subModelName + ".");
829  }
830 
831  // Now we create the FromMixture substitution model:
832  model = make_unique<FromMixtureSubstitutionModel>(*nestedModel, subModelName, modelDesc2);
833  }
834  else if (modelName == "Empirical")
835  {
836  string prefix = args["name"];
837  if (TextTools::isEmpty(prefix))
838  throw Exception("'name' argument missing for user-defined substitution model.");
839  model = make_unique<UserProteinSubstitutionModel>(alpha, args["file"], prefix);
840  }
841  else if (modelName == "Coala")
842  {
843  string exchangeability = args["exch"];
844  if (TextTools::isEmpty(exchangeability))
845  throw Exception("BppOSubstitutionModelFormat::read. missing argument 'exch' for model 'Coala'.");
846  string prefix = args["name"];
847  if (exchangeability == "Empirical" && TextTools::isEmpty(prefix))
848  throw Exception("'name' argument missing to specify the exchangeabilities of the user-defined empirical model.");
849  BppOSubstitutionModelFormat nestedReader(PROTEIN, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
850  auto tmpModel = nestedReader.readSubstitutionModel(alphabet, exchangeability, mData, nData, false);
851  unique_ptr<ProteinSubstitutionModelInterface> nestedModel(dynamic_cast<ProteinSubstitutionModelInterface*>(tmpModel.release()));
852  string nbrOfParametersPerBranch = args["nbrAxes"];
853  if (TextTools::isEmpty(nbrOfParametersPerBranch))
854  throw Exception("'nbrAxes' argument missing to define the number of axis of the Correspondence Analysis.");
855  // Now we create the Coala model
856  model = make_unique<Coala>(alpha, *nestedModel, TextTools::to<unsigned int>(nbrOfParametersPerBranch));
857  if (nData)
858  model->setFreqFromData(*mData.at(nData));
859  }
860  else
861  throw Exception("Model '" + modelName + "' is unknown, or does not fit proteic alphabet.");
862  }
863  else if (AlphabetTools::isBinaryAlphabet(*alphabet))
864  {
865  if (!(alphabetCode_ & BINARY))
866  throw Exception("BppOSubstitutionModelFormat::read. Binary alphabet not supported.");
867 
868  auto balpha = dynamic_pointer_cast<const BinaryAlphabet>(alphabet);
869 
870  if (modelName == "Binary")
871  model = make_unique<BinarySubstitutionModel>(balpha);
872  else if (modelName == "TwoParameterBinary")
873  model = make_unique<TwoParameterBinarySubstitutionModel>(balpha);
874  else
875  throw Exception("Model '" + modelName + "' unknown, or does not fit binary alphabet.");
876  }
877  else if (AlphabetTools::isIntegerAlphabet(*alphabet))
878  {
879  if (!(alphabetCode_ & INTEGER))
880  throw Exception("BppOSubstitutionModelFormat::read. Integer alphabet not supported.");
881 
882  auto balpha = dynamic_pointer_cast<const IntegerAlphabet>(alphabet);
883 
884  if (modelName == "D1Walk")
885  model.reset(new D1WalkSubstitutionModel(balpha));
886  else
887  throw Exception("Model '" + modelName + "' unknown, or does not fit integer alphabet.");
888  }
889  else
890  throw Exception("Model '" + modelName + "' unknown, or does not fit " + alphabet->getAlphabetType() + " alphabet.");
891  }
892 
893  if (verbose_)
894  ApplicationTools::displayResult("Substitution model", modelName);
895 
896  updateParameters_(*model, args);
897 
898 
899  if (parseArguments)
900  {
901  if (nData)
902  initialize_(*model, mData.at(nData));
903  else
904  initialize_(*model, 0);
905  }
906 
907  return model;
908 }
909 
910 
912  BranchModelInterface& model,
913  map<std::string, std::string>& args)
914 {
915  // Update parameter args:
916  vector<string> pnames = model.getParameters().getParameterNames();
917 
918  string pref = model.getNamespace();
919 
920  for (auto pname : pnames)
921  {
922  string name = model.getParameterNameWithoutNamespace(pname);
923  if (args.find(name) != args.end())
924  unparsedArguments_[pref + name] = args[name];
925  }
926 
927  // Now look if some parameters are aliased:
929  string pname, pval, pname2;
930  for (size_t i = 0; i < pl.size(); ++i)
931  {
932  pname = model.getParameterNameWithoutNamespace(pl[i].getName());
933 
934  if (args.find(pname) == args.end())
935  continue;
936  pval = args[pname];
937 
938  if (((pval.rfind("_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind("_") + 1, string::npos)))) ||
939  (pval.find("(") != string::npos))
940  continue;
941 
942  bool found = false;
943  for (size_t j = 0; j < pl.size() && !found; ++j)
944  {
945  pname2 = model.getParameterNameWithoutNamespace(pl[j].getName());
946 
947  // if (j == i || args.find(pname2) == args.end()) continue; Julien 03/03/2010: This extra condition prevents complicated (nested) models to work properly...
948  if (j == i)
949  continue;
950  if (pval == pname2)
951  {
952  // This is an alias...
953  // NB: this may throw an exception if incorrect! We leave it as is for now :s
954  model.aliasParameters(pname2, pname);
955  if (verbose_)
956  ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
957  found = true;
958  }
959  }
960  // if (!TextTools::isDecimalNumber(pval) && !found)
961  // throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
962  }
963 
964  // 2 following tests be removed in a later version
965  if (args.find("useObservedFreqs") != args.end())
966  throw Exception("useObservedFreqs argument is obsolete. Please use 'initFreqs=observed' instead.");
967  if (args.find("useObservedFreqs.pseudoCount") != args.end())
968  throw Exception("useObservedFreqs.pseudoCount argument is obsolete. Please use 'initFreqs.observedPseudoCount' instead.");
969 
970  if (args.find("initFreqs") != args.end())
971  // Specific case since already used
972  if (pref!="Coala.")
973  unparsedArguments_[pref + "initFreqs"] = args["initFreqs"];
974  if (args.find("initFreqs.observedPseudoCount") != args.end())
975  unparsedArguments_[pref + "initFreqs.observedPseudoCount"] = args["initFreqs.observedPseudoCount"];
976 
977  if (args.find("rate") != args.end())
978  {
979  model.addRateParameter();
980  unparsedArguments_[pref + "rate"] = args["rate"];
981  }
982 }
983 
984 
985 unique_ptr<SubstitutionModelInterface> BppOSubstitutionModelFormat::readWord_(
986  std::shared_ptr<const Alphabet> alphabet,
987  const std::string& modelDescription,
988  const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
989  size_t nData)
990 {
991  unique_ptr<SubstitutionModelInterface> model;
992  string modelName = "";
993  map<string, string> args;
994  KeyvalTools::parseProcedure(modelDescription, modelName, args);
995 
996  vector<string> v_nestedModelDescription;
997  vector< unique_ptr<SubstitutionModelInterface>> v_pSM;
998  shared_ptr<const CoreWordAlphabet> pWA;
999 
1000  string s, nestedModelDescription;
1001  unsigned int nbmodels;
1002 
1003  if (((modelName == "Word" || modelName == "Kron") && !AlphabetTools::isWordAlphabet(*alphabet)) ||
1004  ((!(modelName == "Word" || modelName == "Kron")) && !AlphabetTools::isCodonAlphabet(*alphabet)))
1005  throw Exception("Bad alphabet type "
1006  + alphabet->getAlphabetType() + " for model " + modelName + ".");
1007 
1008  pWA = dynamic_pointer_cast<const CoreWordAlphabet>(alphabet);
1009 
1010 
1013 
1014  if (args.find("model") != args.end())
1015  {
1016  v_nestedModelDescription.push_back(args["model"]);
1017  nbmodels = (modelName == "Word" || modelName == "Kron") ? pWA->getLength() : 3;
1018  }
1019  else
1020  {
1021  if (args.find("model1") == args.end())
1022  throw Exception("Missing argument 'model' or 'model1' for model " + modelName + ".");
1023 
1024  nbmodels = 0;
1025 
1026  while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
1027  v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
1028  }
1029 
1030  if (nbmodels < 2)
1031  throw Exception("Missing nested models for model " + modelName + ".");
1032 
1033  if (pWA->getLength() != nbmodels)
1034  throw Exception("Bad alphabet type "
1035  + alphabet->getAlphabetType() + " for model " + modelName + ".");
1036 
1037  if (v_nestedModelDescription.size() != nbmodels)
1038  {
1039  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, true, false, false, warningLevel_);
1040  model = nestedReader.readSubstitutionModel(pWA->getNAlphabet(0), v_nestedModelDescription[0], mData, nData, false);
1041  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
1042  string pref = "";
1043  for (unsigned int i = 0; i < nbmodels; ++i)
1044  {
1045  pref += TextTools::toString(i + 1);
1046  }
1047 
1048  for (auto& it : unparsedParameterValuesNested)
1049  {
1050  unparsedArguments_[modelName + "." + pref + "_" + it.first] = it.second;
1051  }
1052 
1053  v_pSM.push_back(std::move(model));
1054  }
1055  else
1056  {
1057  for (unsigned i = 0; i < v_nestedModelDescription.size(); ++i)
1058  {
1059  BppOSubstitutionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
1060  model = nestedReader.readSubstitutionModel(pWA->getNAlphabet(i), v_nestedModelDescription[i], mData, nData, false);
1061  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
1062  for (auto& it : unparsedParameterValuesNested)
1063  {
1064  unparsedArguments_[modelName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
1065  }
1066 
1067  v_pSM.push_back(std::move(model));
1068  }
1069  }
1070 
1071  // //////////////////////////////
1072  // In case of a Kron... model, mgmt of the positions
1073 
1074  // check the vector of simultaneous changing positions
1075 
1076  vector<set<size_t>> vposKron;
1077  if (modelName.substr(0, 4) == "Kron")
1078  {
1079  if (args.find("positions") != args.end())
1080  {
1081  StringTokenizer nst(args["positions"], "+");
1082 
1083  while (nst.hasMoreToken())
1084  {
1085  string spos = nst.nextToken();
1086  StringTokenizer nst2(spos, "*");
1087 
1088  set<size_t> ss;
1089 
1090  while (nst2.hasMoreToken())
1091  {
1092  ss.insert((size_t)TextTools::toInt(nst2.nextToken()));
1093  }
1094 
1095  vposKron.push_back(ss);
1096  }
1097  }
1098  }
1099 
1100  // /////////////////////////////////
1101  // / WORD
1102  // ///////////////////////////////
1103 
1104  if (modelName == "Word")
1105  {
1106  if (v_nestedModelDescription.size() != nbmodels)
1107  {
1108  model = make_unique<WordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1109  }
1110  else
1111  {
1112  ModelList ml(v_pSM);
1113  model = make_unique<WordSubstitutionModel>(ml);
1114  }
1115  }
1116 
1117  // /////////////////////////////////
1118  // / KRON
1119  // ///////////////////////////////
1120 
1121  else if (modelName == "Kron")
1122  {
1123  if (vposKron.size() == 0)
1124  {
1125  if (v_nestedModelDescription.size() != nbmodels)
1126  {
1127  model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1128  }
1129  else
1130  {
1131  ModelList ml(v_pSM);
1132  model = make_unique<KroneckerWordSubstitutionModel>(ml);
1133  }
1134  }
1135  else
1136  {
1137  if (v_nestedModelDescription.size() != nbmodels)
1138  {
1139  model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels, vposKron);
1140  }
1141  else
1142  {
1143  ModelList ml(v_pSM);
1144  model = make_unique<KroneckerWordSubstitutionModel>(ml, vposKron);
1145  }
1146  }
1147  }
1148 
1149  // /////////////////////////////////
1150  // / CODON
1151  // ///////////////////////////////
1152 
1153  else
1154  {
1155  auto pCA = dynamic_pointer_cast<const CodonAlphabet>(pWA);
1156  if (pCA == 0)
1157  throw Exception("Non codon Alphabet for model" + modelName + ".");
1158 
1159  unique_ptr<AlphabetIndex2> pai2;
1160  unique_ptr<CodonFrequencySetInterface> pFS;
1161 
1162  if ((!dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].get())) ||
1163  ((v_nestedModelDescription.size() == 3) &&
1164  (!dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].get()) || !dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].get()))))
1165  throw Exception("Non simple NucleotideSubstitutionModel embedded in " + modelName + " model.");
1166 
1167  if (args.find("genetic_code") != args.end())
1168  {
1169  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
1170  throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
1171  }
1172 
1173  if (!geneticCode_)
1174  throw Exception("BppOSubstitutionModelFormat::readWord_(). No genetic code specified! Consider using 'setGeneticCode'.");
1175 
1176 
1179 
1180  if ((modelName.find("Dist") != string::npos) || (modelName == "SENCA"))
1181  pai2 = (args.find("aadistance") == args.end()) ? nullptr : SequenceApplicationTools::getAlphabetIndex2(AlphabetTools::PROTEIN_ALPHABET, args["aadistance"]);
1182 
1185 
1186  if (modelName.find("Freq") != string::npos)
1187  {
1188  if (args.find("frequencies") == args.end())
1189  throw Exception("Missing equilibrium frequencies.");
1190 
1191  BppOFrequencySetFormat bIOFreq(alphabetCode_, verbose_, warningLevel_);
1192  bIOFreq.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model
1193  auto tmp = bIOFreq.readFrequencySet(pCA, args["frequencies"], mData, nData);
1194  pFS = unique_ptr<CodonFrequencySetInterface>(dynamic_cast<CodonFrequencySetInterface*>(tmp.release()));
1195  if (!pFS)
1196  throw IOException("BppOSubstitutionModelFormat::readWord_(). Incorrect codon frequencies.");
1197  map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
1198 
1199  for (auto& it : unparsedParameterValuesNested)
1200  {
1201  unparsedArguments_[modelName + "." + it.first] = it.second;
1202  }
1203  }
1204 
1205  // //////////////
1206  // // Triplet
1207 
1208  if (modelName == "Triplet")
1209  model = (v_nestedModelDescription.size() != 3)
1210  ? make_unique<TripletSubstitutionModel>(
1211  pCA,
1212  unique_ptr<NucleotideSubstitutionModelInterface>(
1213  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1214  )
1215  )
1216  : make_unique<TripletSubstitutionModel>(
1217  pCA,
1218  unique_ptr<NucleotideSubstitutionModelInterface>(
1219  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1220  ),
1221  unique_ptr<NucleotideSubstitutionModelInterface>(
1222  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1223  ),
1224  unique_ptr<NucleotideSubstitutionModelInterface>(
1225  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1226  )
1227  );
1228 
1229  else if (modelName.find("Codon") != string::npos)
1230  {
1231  vector< unique_ptr<CoreCodonSubstitutionModelInterface>> vCSM;
1232  string name = "Codon";
1233  map<string, string> unparsedParameterValuesNested;
1234 
1235  if (modelName.find("Dist") != string::npos)
1236  {
1237  name += "Dist";
1238 
1239  vCSM.push_back(make_unique<AbstractCodonDistanceSubstitutionModel>(std::move(pai2), geneticCode_, ""));
1240  }
1241  else if (modelName.find("BGC") != string::npos)
1242  {
1243  name += "BGC";
1244 
1245  vCSM.push_back(make_unique<AbstractCodonBGCSubstitutionModel>(geneticCode_, ""));
1246  }
1247  else if (modelName.find("Prot") != string::npos)
1248  {
1249  name += "Prot";
1250 
1251  if (args.find("protmodel") == args.end())
1252  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'protmodel' for codon model argument 'Prot'.");
1253 
1254  nestedModelDescription = args["protmodel"];
1255  BppOSubstitutionModelFormat nestedReader(PROTEIN, false, false, allowGaps_, verbose_, warningLevel_);
1256 
1257  shared_ptr<SubstitutionModelInterface> tmpModel = nestedReader.readSubstitutionModel(geneticCode_->getTargetAlphabet(), nestedModelDescription, mData, nData, false);
1258  auto nestedModel = dynamic_pointer_cast<ProteinSubstitutionModelInterface>(tmpModel);
1259 
1260  unparsedParameterValuesNested.insert(nestedReader.getUnparsedArguments().begin(), nestedReader.getUnparsedArguments().end());
1261 
1262  vCSM.push_back(make_unique<AbstractCodonAARateSubstitutionModel>(nestedModel, geneticCode_, ""));
1263  }
1264  if (modelName.find("AAClust") != string::npos)
1265  {
1266  name += "AAClust";
1267 
1268  // Initialization using the "assign" argument
1269  vector<uint> partition;
1270  if (args.find("partition") != args.end())
1271  {
1272  string rf = args["partition"];
1273 
1274  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1275  while (strtok.hasMoreToken())
1276  partition.push_back(TextTools::to<uint>(strtok.nextToken()));
1277  }
1278 
1279  unique_ptr<AbstractCodonClusterAASubstitutionModel> aca = partition.size() != 0 ?
1280  make_unique<AbstractCodonClusterAASubstitutionModel>(geneticCode_, "", partition) :
1281  make_unique<AbstractCodonClusterAASubstitutionModel>(geneticCode_, "");
1282 
1283  vCSM.push_back(std::move(aca));
1284  }
1285 
1287  if (vCSM.size() == 0)
1288  name += "Rate";
1289 
1290  if (modelName.find("CpG") != string::npos)
1291  {
1292  name += "CpG";
1293  vCSM.push_back(make_unique<AbstractCodonCpGSubstitutionModel>(pCA, ""));
1294  }
1295 
1296  if (modelName.find("AAFit") != string::npos)
1297  {
1298  name += "AAFit";
1299 
1300  if (args.find("fitness") == args.end())
1301  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'AAFit'.");
1302 
1303  string nestedFreqDescription = args["fitness"];
1304  BppOFrequencySetFormat nestedReader(PROTEIN, verbose_, warningLevel_);
1305 
1306  auto nestedFreq = nestedReader.readFrequencySet(geneticCode_->getTargetAlphabet(), nestedFreqDescription, mData, nData, false);
1307 
1308  for (auto it : nestedReader.getUnparsedArguments())
1309  {
1310  unparsedParameterValuesNested["fit_" + it.first] = it.second;
1311  }
1312 
1313  auto aca = make_unique<AbstractCodonAAFitnessSubstitutionModel>(std::move(nestedFreq), geneticCode_, "");
1314 
1315  if (args.find("Ns") != args.end())
1316  aca->addNsParameter();
1317 
1318  vCSM.push_back(std::move(aca));
1319  }
1320  else if (modelName.find("Fit") != string::npos)
1321  {
1322  if (args.find("fitness") == args.end())
1323  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'Fit'.");
1324  string nestedFreqDescription = args["fitness"];
1325 
1326  BppOFrequencySetFormat nestedReader(alphabetCode_, verbose_, warningLevel_);
1327  nestedReader.setGeneticCode(geneticCode_);
1328 
1329  auto nestedFreq = nestedReader.readFrequencySet(alphabet, nestedFreqDescription, mData, nData, false);
1330 
1331  for (auto it : nestedReader.getUnparsedArguments())
1332  {
1333  unparsedParameterValuesNested["fit_" + it.first] = it.second;
1334  }
1335 
1336  vCSM.push_back(make_unique<AbstractCodonFitnessSubstitutionModel>(std::move(nestedFreq), geneticCode_, ""));
1337  }
1338 
1339  if (modelName.find("PhasFreq") != string::npos)
1340  {
1341  name += "PhasFreq";
1342  vCSM.push_back(make_unique<AbstractCodonPhaseFrequenciesSubstitutionModel>(std::move(pFS), ""));
1343  }
1344  else if (modelName.find("Freq") != string::npos)
1345  {
1346  name += "Freq";
1347  vCSM.push_back(make_unique<AbstractCodonFrequenciesSubstitutionModel>(std::move(pFS), ""));
1348  }
1349 
1350  // Then we update the parameter set:
1351  for (auto it : unparsedParameterValuesNested)
1352  {
1353  unparsedArguments_[name + "." + it.first] = it.second;
1354  }
1355 
1356  model = (v_nestedModelDescription.size() != 3)
1357  ? make_unique<CodonAdHocSubstitutionModel>(
1358  geneticCode_,
1359  unique_ptr<NucleotideSubstitutionModelInterface>(
1360  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1361  ),
1362  vCSM,
1363  name)
1364  : make_unique<CodonAdHocSubstitutionModel>(
1365  geneticCode_,
1366  unique_ptr<NucleotideSubstitutionModelInterface>(
1367  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1368  ),
1369  unique_ptr<NucleotideSubstitutionModelInterface>(
1370  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1371  ),
1372  unique_ptr<NucleotideSubstitutionModelInterface>(
1373  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1374  ),
1375  vCSM,
1376  name);
1377  }
1378  else if (modelName == "KronDistFreq")
1379  {
1380  if (v_nestedModelDescription.size() != 3)
1381  {
1382  if (vposKron.size() == 0)
1383  model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(geneticCode_,
1384  unique_ptr<NucleotideSubstitutionModelInterface>(
1385  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1386  std::move(pFS), std::move(pai2));
1387  else
1388  model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(geneticCode_,
1389  unique_ptr<NucleotideSubstitutionModelInterface>(
1390  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1391  vposKron, std::move(pFS), std::move(pai2));
1392  }
1393  else
1394  {
1395  if (vposKron.size() != 0)
1396  model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1397  geneticCode_,
1398  unique_ptr<NucleotideSubstitutionModelInterface>(
1399  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1400  ),
1401  unique_ptr<NucleotideSubstitutionModelInterface>(
1402  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1403  ),
1404  unique_ptr<NucleotideSubstitutionModelInterface>(
1405  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1406  ),
1407  std::move(pFS),
1408  std::move(pai2));
1409  else
1410  model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1411  geneticCode_,
1412  unique_ptr<NucleotideSubstitutionModelInterface>(
1413  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1414  ),
1415  unique_ptr<NucleotideSubstitutionModelInterface>(
1416  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1417  ),
1418  unique_ptr<NucleotideSubstitutionModelInterface>(
1419  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1420  ),
1421  vposKron,
1422  std::move(pFS),
1423  std::move(pai2));
1424  }
1425  }
1426  else if (modelName == "KronDist")
1427  {
1428  if (v_nestedModelDescription.size() != 3)
1429  {
1430  if (vposKron.size() == 0)
1431  model = make_unique<KroneckerCodonDistanceSubstitutionModel>(geneticCode_,
1432  unique_ptr<NucleotideSubstitutionModelInterface>(dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1433  std::move(pai2));
1434  else
1435  model = make_unique<KroneckerCodonDistanceSubstitutionModel>(geneticCode_,
1436  unique_ptr<NucleotideSubstitutionModelInterface>(dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1437  vposKron, std::move(pai2));
1438  }
1439  else
1440  {
1441  if (vposKron.size() != 0)
1442  model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1443  geneticCode_,
1444  unique_ptr<NucleotideSubstitutionModelInterface>(
1445  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1446  ),
1447  unique_ptr<NucleotideSubstitutionModelInterface>(
1448  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1449  ),
1450  unique_ptr<NucleotideSubstitutionModelInterface>(
1451  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1452  ),
1453  std::move(pai2));
1454  else
1455  model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1456  geneticCode_,
1457  unique_ptr<NucleotideSubstitutionModelInterface>(
1458  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1459  ),
1460  unique_ptr<NucleotideSubstitutionModelInterface>(
1461  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1462  ),
1463  unique_ptr<NucleotideSubstitutionModelInterface>(
1464  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1465  ),
1466  vposKron,
1467  std::move(pai2));
1468  }
1469  }
1470 
1471  else if (modelName == "SENCA")
1472  {
1473  if (args.find("fitness") == args.end())
1474  throw Exception("Missing fitness in model " + modelName + ".");
1475 
1476  BppOFrequencySetFormat bIOFreq(alphabetCode_, verbose_, warningLevel_);
1477  bIOFreq.setGeneticCode(geneticCode_);
1478 
1479  auto pFit(bIOFreq.readFrequencySet(pCA, args["fitness"], mData, nData, false));
1480  map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
1481 
1482  for (auto& it : unparsedParameterValuesNested)
1483  {
1484  unparsedArguments_[modelName + ".fit_" + it.first] = it.second;
1485  }
1486 
1487  if (v_nestedModelDescription.size() != 3)
1488  {
1489  model = make_unique<SENCA>(geneticCode_,
1490  unique_ptr<NucleotideSubstitutionModelInterface>(
1491  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1492  ),
1493  std::move(pFit), std::move(pai2));
1494  }
1495  else
1496  model = make_unique<SENCA>(
1497  geneticCode_,
1498  unique_ptr<NucleotideSubstitutionModelInterface>(
1499  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1500  ),
1501  unique_ptr<NucleotideSubstitutionModelInterface>(
1502  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1503  ),
1504  unique_ptr<NucleotideSubstitutionModelInterface>(
1505  dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1506  ),
1507  std::move(pFit),
1508  std::move(pai2));
1509  }
1510  }
1511 
1512  return model;
1513 }
1514 
1516  OutputStream& out,
1517  std::map<std::string, std::string>& globalAliases,
1518  std::vector<std::string>& writtenNames) const
1519 {
1520  bool comma = false;
1521 
1522  // Mixed Model that are defined as "Mixture" and "Mixed"
1523 
1524  if ((dynamic_cast<const MixedTransitionModelInterface*>(&model) != nullptr) && (dynamic_cast<const AbstractBiblioMixedTransitionModel*>(&model) == nullptr))
1525  {
1526  writeMixed_(dynamic_cast<const MixedTransitionModelInterface&>(model), out, globalAliases, writtenNames);
1527  return;
1528  }
1529 
1530  auto name = model.getName();
1531  auto parend = (name.rfind(")") == name.size() - 1);
1532 
1533  out << (parend ? name.substr(0, name.size() - 1) : name + "(");
1534 
1535  // Is it a protein user defined model?
1536  try
1537  {
1538  const UserProteinSubstitutionModel& userModel = dynamic_cast<const UserProteinSubstitutionModel&>(model);
1539  out << "file=" << userModel.getPath();
1540  string ns = userModel.getNamespace();
1541 
1542  if (TextTools::hasSubstring(ns, "+F.") )
1543  ns = ns.substr(0, ns.size() - 3);
1544  else
1545  ns = ns.substr(0, ns.size() - 1);
1546 
1547  out << ", name=" << ns;
1548  comma = true;
1549  }
1550  catch (bad_cast&)
1551  {}
1552 
1553  // Is it a markov-modulated model?
1554  try
1555  {
1556  const MarkovModulatedSubstitutionModel& mmModel = dynamic_cast<const MarkovModulatedSubstitutionModel&>(model);
1557  out << "model=";
1558  write(mmModel.nestedModel(), out, globalAliases, writtenNames);
1559 
1560  try
1561  {
1562  const G2001& gModel = dynamic_cast<const G2001&>(model);
1563  // Also print distribution here:
1564  out << ", rdist=";
1565  const DiscreteDistributionInterface& nestedDist = gModel.rateDistribution();
1567  bIO.writeDiscreteDistribution(nestedDist, out, globalAliases, writtenNames);
1568  }
1569  catch (bad_cast&)
1570  {}
1571  comma = true;
1572  }
1573  catch (bad_cast&)
1574  {}
1575 
1576  // Is it a model with gaps?
1577  try
1578  {
1579  const RE08& reModel = dynamic_cast<const RE08&>(model);
1580  out << "model=";
1581  write(reModel.nestedModel(), out, globalAliases, writtenNames);
1582  comma = true;
1583  }
1584  catch (bad_cast&)
1585  {}
1586 
1587  // Is it a YpR model?
1588  try
1589  {
1590  const YpR& yprModel = dynamic_cast<const YpR&>(model);
1591  out << "model=";
1592  write(yprModel.nestedModel(), out, globalAliases, writtenNames);
1593  comma = true;
1594  }
1595  catch (bad_cast&)
1596  {}
1597 
1598  // Is it a OneChange model?
1599  try
1600  {
1601  const OneChangeTransitionModel& oneChangeTransitionModel = dynamic_cast<const OneChangeTransitionModel&>(model);
1602  out << "model=";
1603  write(oneChangeTransitionModel.model(), out, globalAliases, writtenNames);
1604  comma = true;
1605  }
1606  catch (bad_cast&)
1607  {
1608  // Is it a model with register?
1609  try
1610  {
1611  const OneChangeRegisterTransitionModel& oneChangeRegisterTransitionModel = dynamic_cast<const OneChangeRegisterTransitionModel&>(model);
1612  out << "model=";
1613  write(oneChangeRegisterTransitionModel.model(), out, globalAliases, writtenNames);
1614  comma = true;
1615  out << ", register=";
1616  out << oneChangeRegisterTransitionModel.getRegisterName();
1617  out << ", numReg=" << VectorTools::paste(oneChangeRegisterTransitionModel.getRegisterNumbers(), "+");
1618  }
1619  catch (bad_cast&)
1620  {}
1621 
1622  try
1623  {
1624  const RegisterRatesSubstitutionModel& registerRatesSubstitutionModel = dynamic_cast<const RegisterRatesSubstitutionModel&>(model);
1625  out << "model=";
1626  write(registerRatesSubstitutionModel.model(), out, globalAliases, writtenNames);
1627  comma = true;
1628  out << ", register=";
1629  out << registerRatesSubstitutionModel.getRegisterName();
1630  }
1631  catch (bad_cast&)
1632  {}
1633  }
1634 
1635  // Is it a gBGC model?
1636  try
1637  {
1638  const gBGC& gbgcModel = dynamic_cast<const gBGC&>(model);
1639  StdStr sout;
1640  write(gbgcModel.nestedModel(), sout, globalAliases, writtenNames);
1641 
1642  string ss = sout.str();
1643  auto begin = ss.find_first_of("(");
1644  auto end = ss.find_last_of(")");
1645 
1646  out << ss.substr(begin + 1, end - begin - 1);
1647 
1648  comma = true;
1649  }
1650  catch (bad_cast&)
1651  {}
1652 
1653  // Is it a word model?
1654  try
1655  {
1656  const AbstractWordSubstitutionModel& wM = dynamic_cast<const AbstractWordSubstitutionModel&>(model);
1657  size_t nmod = wM.getNumberOfModels();
1658  const TransitionModelInterface& mod0 = wM.nModel(0);
1659  if (nmod == 1)
1660  {
1661  out << "model=";
1662  write(mod0, out, globalAliases, writtenNames);
1663  }
1664  else
1665  {
1666  const TransitionModelInterface& mod1 = wM.nModel(1);
1667  if (&mod1 == &mod0)
1668  {
1669  out << "model=";
1670  write(mod0, out, globalAliases, writtenNames);
1671  }
1672  else
1673  {
1674  out << "model1=";
1675  write(mod0, out, globalAliases, writtenNames);
1676  for (unsigned int i = 1; i < nmod; ++i)
1677  {
1678  out << ", model" + TextTools::toString(i + 1) + "=";
1679  write(wM.nModel(i), out, globalAliases, writtenNames);
1680  }
1681  }
1682  }
1683  comma = true;
1684  }
1685  catch (bad_cast&)
1686  {}
1687 
1688  // Is it a COaLA model ?
1689  try
1690  {
1691  const Coala& coalaModel = dynamic_cast<const Coala&>(model);
1692  out << "exch=" << coalaModel.getExch() << ", nbrAxes=" << coalaModel.getNbrOfAxes();
1693  comma = true;
1694  }
1695  catch (bad_cast&)
1696  {}
1697 
1698  // Is it a InMixed model?
1699  try
1700  {
1701  const InMixedSubstitutionModel& inModel = dynamic_cast<const InMixedSubstitutionModel&>(model);
1702  out << "model=";
1703  write(inModel.mixedModel(), out, globalAliases, writtenNames);
1704  out << ", numMod=" << TextTools::toString(inModel.getSubModelNumber());
1705  comma = true;
1706  }
1707  catch (bad_cast&)
1708  {}
1709 
1710  // Is it a POMO model?
1711  const POMO* pomo = dynamic_cast<const POMO*>(&model);
1712  if (pomo)
1713  {
1714  out << "model=";
1715  write(pomo->mutationModel(), out, globalAliases, writtenNames);
1716 
1717  if (pomo->hasFitness())
1718  {
1719  out << ", fitness=";
1720 
1721  BppOFrequencySetFormat bIOFreq(alphabetCode_, false, warningLevel_);
1722  bIOFreq.writeFrequencySet(pomo->fitness(), out, globalAliases, writtenNames);
1723  }
1724  comma = true;
1725  }
1726 
1727  // Is it a model with FrequencySet?
1728 
1729  try
1730  {
1731  auto& pfs = model.frequencySet();
1732 
1733  StdStr freqdesc;
1734  BppOFrequencySetFormat bIOFreq(alphabetCode_, false, warningLevel_);
1735  bIOFreq.writeFrequencySet(pfs, freqdesc, globalAliases, writtenNames);
1736  string fd = freqdesc.str();
1737  if (fd.size() != 0)
1738  {
1739  if (comma)
1740  out << ", ";
1741  out << "frequencies=" + fd;
1742  }
1743  comma = true;
1744  }
1745  catch (exception&)
1746  {}
1747 
1748  // Is it a codon model with Protein Model or partition in it?
1749  try
1750  {
1751  auto& casm = dynamic_cast<const CodonAdHocSubstitutionModel&>(model);
1752  for (size_t i = 0; i < casm.getNumberOfModels(); ++i)
1753  {
1754  try
1755  {
1756  auto& acr = dynamic_cast<const AbstractCodonAARateSubstitutionModel&>(casm.layerModel(i));
1757  if (comma)
1758  out << ", ";
1759  out << "protmodel=";
1760 
1761  write(acr.aaModel(), out, globalAliases, writtenNames);
1762  comma = true;
1763  }
1764  catch (bad_cast&)
1765  {}
1766 
1767  try
1768  {
1769  auto& acf = dynamic_cast<const AbstractCodonAAFitnessSubstitutionModel&>(casm.layerModel(i));
1770  if (comma)
1771  out << ", ";
1772  out << "fitness=";
1773 
1774  BppOFrequencySetFormat bIOFreq(PROTEIN, false, warningLevel_);
1775  bIOFreq.writeFrequencySet(acf.aaFitness(), out, globalAliases, writtenNames);
1776  comma = true;
1777  }
1778  catch (bad_cast&)
1779  {}
1780 
1781  try
1782  {
1783  auto& acc = dynamic_cast<const AbstractCodonClusterAASubstitutionModel&>(casm.layerModel(i));
1784  if (comma)
1785  out << ", ";
1786  out << "partition=(";
1787  const vector<uint>& vass = acc.getAssign();
1788 
1789  for (size_t j = 0; j < vass.size(); ++j)
1790  {
1791  if (j != 0)
1792  out << ", ";
1793  out << vass[j];
1794  }
1795  out << ")";
1796  comma = true;
1797  }
1798  catch (bad_cast&)
1799  {}
1800  }
1801  }
1802  catch (bad_cast&)
1803  {}
1804 
1805  // Specific case of SENCA
1806 
1807  try
1808  {
1809  auto& pCF = dynamic_cast<const SENCA&>(model);
1810  if (comma)
1811  out << ", ";
1812  out << "fitness=";
1813 
1814  BppOFrequencySetFormat bIOFreq(alphabetCode_, false, warningLevel_);
1815  bIOFreq.writeFrequencySet(pCF.fitness(), out, globalAliases, writtenNames);
1816  comma = true;
1817  }
1818  catch (bad_cast&)
1819  {}
1820 
1821  // and bibliomixed models
1822 
1823  try
1824  {
1825  auto& pM7 = dynamic_cast<const YNGP_M7&>(model);
1826  if (comma)
1827  out << ", ";
1828  out << "n=" << pM7.getNumberOfModels();
1829  comma = true;
1830  }
1831  catch (bad_cast&)
1832  {}
1833 
1834  try
1835  {
1836  auto& pM8 = dynamic_cast<const YNGP_M8&>(model);
1837  if (comma)
1838  out << ", ";
1839  out << "n=" << pM8.getNumberOfModels() - 1;
1840  comma = true;
1841  }
1842  catch (bad_cast&)
1843  {}
1844 
1845  try
1846  {
1847  auto& pM9 = dynamic_cast<const YNGP_M9&>(model);
1848  if (comma)
1849  out << ", ";
1850  out << "nbeta=" << pM9.getNBeta() << ", ngamma=" << pM9.getNGamma();
1851 
1852  comma = true;
1853  }
1854  catch (bad_cast&)
1855  {}
1856 
1857  try
1858  {
1859  auto& pM10 = dynamic_cast<const YNGP_M10&>(model);
1860  if (comma)
1861  out << ", ";
1862  out << "nbeta=" << pM10.getNBeta() << ", ngamma=" << pM10.getNGamma();
1863 
1864  comma = true;
1865  }
1866  catch (bad_cast&)
1867  {}
1868 
1869  try
1870  {
1871  auto& pLGL = dynamic_cast<const LGL08_CAT&>(model);
1872  if (comma)
1873  out << ", ";
1874  out << "nbCat=" << pLGL.getNumberOfCategories();
1875 
1876  comma = true;
1877  }
1878  catch (bad_cast&)
1879  {}
1880 
1881  try
1882  {
1883  auto& pDFP = dynamic_cast<const DFP07&>(model);
1884  if (comma)
1885  out << ", ";
1886 
1887  out << "protmodel=";
1888  write(pDFP.proteinModel(), out, globalAliases, writtenNames);
1889 
1890  comma = true;
1891  }
1892  catch (bad_cast&)
1893  {}
1894 
1895  try
1896  {
1897  auto& pSameAA = dynamic_cast<const CodonSameAARateSubstitutionModel&>(model);
1898  if (comma)
1899  out << ", ";
1900 
1901  out << "codonmodel=";
1902  write(pSameAA.codonModel(), out, globalAliases, writtenNames);
1903 
1904  out << ", protmodel=";
1905  write(pSameAA.proteinModel(), out, globalAliases, writtenNames);
1906 
1907  comma = true;
1908  }
1909  catch (bad_cast&)
1910  {}
1911 
1912 
1913  // case of Biblio models, update writtenNames
1914 
1915  try
1916  {
1917  auto& absm = dynamic_cast<const AbstractBiblioTransitionModel&>(model);
1918  size_t wNs = writtenNames.size();
1919 
1920  for (size_t i = 0; i < wNs; ++i)
1921  {
1922  try
1923  {
1924  writtenNames.push_back(absm.getNamespace() + absm.getParNameFromPmodel(writtenNames[i]));
1925  }
1926  catch (Exception&)
1927  {}
1928  }
1929  }
1930  catch (bad_cast&)
1931  {}
1932 
1934  bIO.write(model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
1935  out << ")";
1936 }
1937 
1938 
1940  OutputStream& out,
1941  std::map<std::string, std::string>& globalAliases,
1942  std::vector<std::string>& writtenNames) const
1943 {
1944  try
1945  {
1946  auto& pMS = dynamic_cast<const MixtureOfTransitionModels&>(model);
1947 
1948  out << "Mixture(";
1949  for (unsigned int i = 0; i < pMS.getNumberOfModels(); ++i)
1950  {
1951  if (i != 0)
1952  out << ", ";
1953  out << "model" + TextTools::toString(i + 1) + "=";
1954  write(pMS.nModel(i), out, globalAliases, writtenNames);
1955  }
1956  }
1957  catch (bad_cast&)
1958  {
1959  try
1960  {
1961  auto& pMS = dynamic_cast<const MixtureOfATransitionModel&>(model);
1962  out << "MixedModel(model=";
1963  auto& eM = pMS.model(0);
1964 
1965  ParameterList pl = eM.getIndependentParameters();
1966  vector<string> vpl = pl.getParameterNames();
1967 
1968  for (auto& pn : vpl)
1969  {
1970  if (find(writtenNames.begin(), writtenNames.end(), pn) == writtenNames.end())
1971  {
1972  if (pMS.hasDistribution(pn))
1973  {
1974  auto& pDD = pMS.distribution(pn);
1975  if (!dynamic_cast<const ConstantDistribution*>(&pDD))
1976  {
1978  StdStr sout;
1979  bIO.writeDiscreteDistribution(pDD, sout, globalAliases, writtenNames);
1980  globalAliases[pn] = sout.str();
1981  }
1982  }
1983  }
1984  }
1985 
1986  write(eM, out, globalAliases, writtenNames);
1987 
1988  if (pMS.from() != -1)
1989  out << ", from=" << model.getAlphabet()->intToChar(pMS.from()) << ", to=" << model.getAlphabet()->intToChar(pMS.to());
1990  }
1991  catch (bad_cast&)
1992  {}
1993  }
1994 
1995  const BppOParametrizableFormat bIO;
1996  bIO.write(model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, true);
1997  out << ")";
1998 }
1999 
2001  BranchModelInterface& model,
2002  std::shared_ptr<const AlignmentDataInterface> data)
2003 {
2004  string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedArguments_, "", "", true, warningLevel_);
2005  if (verbose_)
2006  ApplicationTools::displayResult("External model frequencies init", (initFreqs == "") ? "None" : initFreqs);
2007 
2008  if (initFreqs != "")
2009  {
2010  try
2011  {
2012  auto& tmodel = dynamic_cast<TransitionModelInterface&>(model);
2013  if (initFreqs == "observed")
2014  {
2015  if (!data)
2016  throw Exception("BppOSubstitutionModelFormat::initialize_(). Missing data for observed frequencies");
2017  unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedArguments_, 0, "", true, warningLevel_);
2018  tmodel.setFreqFromData(*data, psi);
2019  }
2020  else if (initFreqs.substr(0, 6) == "values")
2021  {
2022  // Initialization using the "values" argument
2023  map<int, double> frequencies;
2024 
2025  string rf = initFreqs.substr(6);
2026  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
2027  int i = 0;
2028  while (strtok.hasMoreToken())
2029  frequencies[i++] = TextTools::toDouble(strtok.nextToken());
2030  tmodel.setFreq(frequencies);
2031  }
2032  else
2033  throw Exception("Unknown initFreqs argument");
2034  }
2035  catch (bad_cast&)
2036  {
2037  ApplicationTools::displayMessage("Frequencies initialization not possible for model " + model.getName());
2038  }
2039  unparsedArguments_.erase(unparsedArguments_.find(model.getNamespace() + "initFreqs"));
2040  if (unparsedArguments_.find(model.getNamespace() + "initFreqs.observedPseudoCount") != unparsedArguments_.end())
2041  unparsedArguments_.erase(unparsedArguments_.find(model.getNamespace() + "initFreqs.observedPseudoCount"));
2042  }
2043 
2045  for (size_t i = 0; i < pl.size(); ++i)
2046  {
2047  AutoParameter ap(pl[i]);
2049  pl.setParameter(i, ap);
2050  }
2051 
2052  size_t posp;
2053  for (size_t i = 0; i < pl.size(); i++)
2054  {
2055  const string pName = pl[i].getName();
2056 
2057  posp = pName.rfind(".");
2058  bool test1 = (initFreqs == "");
2059  bool test2 = (pName.size() < posp + 6) || (pName.substr(posp + 1, 5) != "theta");
2060  bool test3 = (unparsedArguments_.find(pName) != unparsedArguments_.end());
2061  try
2062  {
2063  if (test1 || test2 || test3)
2064  {
2065  if (!test1 && !test2 && test3)
2066  ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
2067 
2068  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
2069  pl[i].setValue(value);
2070 
2071  if (unparsedArguments_.find(pName) != unparsedArguments_.end())
2072  unparsedArguments_.erase(unparsedArguments_.find(pName));
2073  }
2074  if (verbose_)
2075  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
2076  }
2077 
2078  catch (Exception& e)
2079  {}
2080  }
2081 
2082  model.matchParametersValues(pl);
2083 }
Abstract class for mixture models based on the bibliography.
Partial implementation of the SubstitutionModel interface for models that are set for matching the bi...
Abstract class for modelling of ratios of substitution rates between codons, whatever they are synony...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
std::string getNamespace() const override
Abstract Basal class for words of substitution models.
const SubstitutionModelInterface & nModel(size_t i) const
returns the ith model, or throw an exception if i is not a valid number.
const BranchModelInterface & model() const override
static bool isBinaryAlphabet(const Alphabet &alphabet)
static std::shared_ptr< const ProteicAlphabet > PROTEIN_ALPHABET
static bool isRNYAlphabet(const Alphabet &alphabet)
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isIntegerAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::shared_ptr< OutputStream > warning
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
const std::map< std::string, std::string > & getUnparsedArguments() const
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
Substitution model I/O in BppO format.
void writeMixed_(const MixedTransitionModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
std::unique_ptr< SubstitutionModelInterface > readWord_(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData)
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void updateParameters_(BranchModelInterface &model, std::map< std::string, std::string > &args)
Finish parsing of parameters, taking care of aliases.
std::unique_ptr< SubstitutionModelInterface > readSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true) override
Read a substitution model from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
void initialize_(BranchModelInterface &model, std::shared_ptr< const AlignmentDataInterface > data)
Set parameter initial values of a given model according to options.
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true)
Interface for all Branch models.
virtual const FrequencySetInterface & frequencySet() const =0
virtual std::string getName() const =0
Get the name of the model.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual void addRateParameter()=0
size_t getNbrOfAxes() const
Definition: CoalaCore.h:49
The Coala branch-heterogeneous amino-acid substitution model.
Definition: Coala.h:57
std::string getExch() const
Definition: Coala.h:77
Class for substitution models of codons with several layers of codon models.
Parametrize a set of state frequencies for codons.
Interface for reversible codon models.
Class for modelling of non-synonymous rates in codon models, such that the substitution rates between...
The D1Walk substitution model for Integer alphabet [0;N-1]. In this model, substitutions are possible...
Class for non-synonymous substitution models on codons with parameterized equilibrium frequencies and...
Definition: DFP07.h:51
The EquiprobableSubstitutionModel substitution model for any kind of alphabet. Jukes-Cantor models ar...
Galtier's 2001 covarion model.
Definition: G2001.h:30
const DiscreteDistributionInterface & rateDistribution() const
Definition: G2001.h:98
SubModel taken from a MixedTransitionModel, kept in the context of the MixedTransitionModel (see From...
const MixedTransitionModelInterface & mixedModel() const
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
The Le et al (2008) CAT substitution model for proteins.
Definition: LGL08_CAT.h:40
Partial implementation of the Markov-modulated class of substitution models.
const ReversibleSubstitutionModelInterface & nestedModel() const
Interface for Transition models, defined as a mixture of "simple" transition models.
Transition models defined as a mixture of nested substitution models.
Transition models defined as a mixture of several substitution models.
A list of models, for building a WordSubstitutionModel.
Specialisation interface for rversible nucleotide substitution model.
Specialisation interface for nucleotide substitution model.
From a model, compute transition probabilities given there is at least a change of a category (ie a n...
const std::vector< size_t > & getRegisterNumbers() const
From a model, compute transition probabilities given there is at least a change in the branch.
Definition: POMO.h:87
const FrequencySetInterface & fitness() const
Definition: POMO.h:156
const SubstitutionModelInterface & mutationModel() const
Definition: POMO.h:161
bool hasFitness() const
Definition: POMO.h:151
virtual const ParameterList & getIndependentParameters() const=0
virtual void aliasParameters(const std::string &p1, const std::string &p2)=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void setParameter(size_t index, const Parameter &param)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual std::string getNamespace() const=0
virtual const ParameterList & getParameters() const=0
static std::unique_ptr< SubstitutionRegisterInterface > getSubstitutionRegister(const std::string &regTypeDesc, std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, std::shared_ptr< AlphabetIndex2 > &weights, std::shared_ptr< AlphabetIndex2 > &distances, bool verbose=true)
Get a Register instance.
Parametrize a set of state frequencies for proteins.
Specialized interface for protein reversible substitution model.
Specialized interface for protein substitution model.
The Rivas-Eddy substitution model with gap characters.
Definition: RE08.h:66
const ReversibleSubstitutionModelInterface & nestedModel() const
Definition: RE08.h:164
From a model, substitution rates are set into categories following a given register....
Interface for reversible substitution models.
Class for non-synonymous and synonymous substitution models on codons with parameterized equilibrium ...
Definition: SENCA.h:52
static std::unique_ptr< AlphabetIndex2 > getAlphabetIndex2(std::shared_ptr< const Alphabet > alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
std::string str() const
const std::string & nextToken()
bool hasMoreToken() const
Interface for all transition models.
Build an empirical protein substitution model from a file.
static std::string paste(const std::vector< T > &v, const std::string &delim=" ")
The Yang et al (2000) M10 substitution model for codons.
Definition: YNGP_M10.h:37
The Yang et al (2000) M7 substitution model for codons.
Definition: YNGP_M7.h:33
The Yang et al (2000) M8 substitution model for codons.
Definition: YNGP_M8.h:40
The Yang et al (2000) M9 substitution model for codons.
Definition: YNGP_M9.h:36
YpR model.
Definition: YpR.h:88
const NucleotideSubstitutionModelInterface & nestedModel() const
Definition: YpR.h:133
gBGC model.
Definition: gBGC.h:48
const SubstitutionModelInterface & nestedModel() const
Definition: gBGC.h:85
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool hasSubstring(const std::string &s, const std::string &pattern)
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.