bpp-phyl3 3.0.0
BppOSubstitutionModelFormat.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7#include <Bpp/Io/FileTools.h>
13#include <Bpp/Text/TextTools.h>
14
15#include "../App/PhylogeneticsApplicationTools.h"
16#include "../Model/AbstractBiblioMixedTransitionModel.h"
17#include "../Model/BinarySubstitutionModel.h"
18#include "../Model/D1WalkSubstitutionModel.h"
19#include "../Model/Codon/AbstractCodonAAFitnessSubstitutionModel.h"
20#include "../Model/Codon/AbstractCodonAARateSubstitutionModel.h"
21#include "../Model/Codon/AbstractCodonBGCSubstitutionModel.h"
22#include "../Model/Codon/AbstractCodonClusterAASubstitutionModel.h"
23#include "../Model/Codon/AbstractCodonCpGSubstitutionModel.h"
24#include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
25#include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
26#include "../Model/Codon/AbstractCodonFitnessSubstitutionModel.h"
27#include "../Model/Codon/AbstractCodonFrequenciesSubstitutionModel.h"
28#include "../Model/Codon/AbstractCodonPhaseFrequenciesSubstitutionModel.h"
29#include "../Model/Codon/CodonAdHocSubstitutionModel.h"
30#include "../Model/Codon/CodonSameAARateSubstitutionModel.h"
31#include "../Model/Codon/DFP07.h"
32#include "../Model/Codon/GY94.h"
33#include "../Model/Codon/KCM.h"
34#include "../Model/Codon/KroneckerCodonDistanceFrequenciesSubstitutionModel.h"
35#include "../Model/Codon/KroneckerCodonDistanceSubstitutionModel.h"
36#include "../Model/Codon/MG94.h"
37#include "../Model/Codon/SENCA.h"
38#include "../Model/Codon/TripletSubstitutionModel.h"
39#include "../Model/Codon/YN98.h"
40#include "../Model/Codon/YNGP_M10.h"
41#include "../Model/Codon/YNGP_M7.h"
42#include "../Model/Codon/YNGP_M8.h"
43#include "../Model/Codon/YNGP_M9.h"
44#include "../Model/EquiprobableSubstitutionModel.h"
45#include "../Model/FromMixtureSubstitutionModel.h"
46#include "../Model/G2001.h"
47#include "../Model/InMixedSubstitutionModel.h"
48#include "../Model/KroneckerWordSubstitutionModel.h"
49#include "../Model/MixtureOfATransitionModel.h"
50#include "../Model/MixtureOfTransitionModels.h"
51#include "../Model/Nucleotide/F81.h"
52#include "../Model/Nucleotide/F84.h"
53#include "../Model/Nucleotide/GTR.h"
54#include "../Model/Nucleotide/HKY85.h"
55#include "../Model/Nucleotide/JCnuc.h"
56#include "../Model/Nucleotide/K80.h"
57#include "../Model/Nucleotide/L95.h"
58#include "../Model/Nucleotide/NucleotideSubstitutionModel.h"
59#include "../Model/Nucleotide/RN95.h"
60#include "../Model/Nucleotide/RN95s.h"
61#include "../Model/Nucleotide/SSR.h"
62#include "../Model/Nucleotide/T92.h"
63#include "../Model/Nucleotide/TN93.h"
64#include "../Model/Nucleotide/YpR.h"
65#include "../Model/Nucleotide/gBGC.h"
66#include "../Model/OneChangeRegisterTransitionModel.h"
67#include "../Model/OneChangeTransitionModel.h"
68#include "../Model/POMO.h"
69#include "../Model/Protein/Coala.h"
70#include "../Model/Protein/CoalaCore.h"
71#include "../Model/Protein/DSO78.h"
72#include "../Model/Protein/JCprot.h"
73#include "../Model/Protein/JTT92.h"
74#include "../Model/Protein/LG08.h"
75#include "../Model/Protein/LGL08_CAT.h"
76#include "../Model/Protein/ProteinSubstitutionModel.h"
77#include "../Model/Protein/UserProteinSubstitutionModel.h"
78#include "../Model/Protein/WAG01.h"
79#include "../Model/RE08.h"
80#include "../Model/RegisterRatesSubstitutionModel.h"
81#include "../Model/TS98.h"
82#include "../Model/TwoParameterBinarySubstitutionModel.h"
83#include "../Model/WordSubstitutionModel.h"
88
89// From Numeric
90
93
94
96
97
98using namespace bpp;
99
100// From the STL:
101#include <iomanip>
102#include <set>
103#include <memory>
104
105using namespace std;
106
107unsigned char BppOSubstitutionModelFormat::DNA = 1;
108unsigned char BppOSubstitutionModelFormat::RNA = 2;
109unsigned char BppOSubstitutionModelFormat::NUCLEOTIDE = 1 | 2;
110unsigned char BppOSubstitutionModelFormat::PROTEIN = 4;
111unsigned char BppOSubstitutionModelFormat::CODON = 8;
112unsigned char BppOSubstitutionModelFormat::WORD = 16;
113unsigned char BppOSubstitutionModelFormat::BINARY = 32;
114unsigned char BppOSubstitutionModelFormat::INTEGER = 64;
115unsigned char BppOSubstitutionModelFormat::ALL = 1 | 2 | 4 | 8 | 16 | 32 | 64;
116
117
118unique_ptr<SubstitutionModelInterface> BppOSubstitutionModelFormat::readSubstitutionModel(
119 shared_ptr<const Alphabet> alphabet,
120 const string& modelDescription,
121 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
122 size_t nData,
123 bool parseArguments)
124{
125 unparsedArguments_.clear();
126 unique_ptr<SubstitutionModelInterface> model;
127 string modelName = "";
128 map<string, string> args;
129 KeyvalTools::parseProcedure(modelDescription, modelName, args);
130
131
132 // ///////////////////////////////
133 // LINKED WITH MIXTURES
134 // //////////////////////////////
135
136 if (modelName == "InMixed")
137 {
138 if (args.find("model") == args.end())
139 throw Exception("'model' argument missing to define the InMixed model.");
140
141 string nameMod = "";
142 size_t numMod = 0;
143
144 if (args.find("numMod") == args.end())
145 {
146 if (args.find("nameMod") == args.end())
147 throw Exception("'numMod' and 'nameMod' arguments missing to define the InMixed submodel.");
148 else
149 nameMod = args["nameMod"];
150 }
151 else
152 numMod = (size_t)TextTools::toInt(args["numMod"]);
153
154 string modelDesc2 = args["model"];
155 BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
156 nestedReader.setGeneticCode(geneticCode_); // This uses the same instance as the o
157
158 unique_ptr<TransitionModelInterface> nestedModelTmp = nestedReader.readTransitionModel(alphabet, modelDesc2, mData, nData, false);
159 unique_ptr<MixedTransitionModelInterface> nestedModel(dynamic_cast<MixedTransitionModelInterface*>(nestedModelTmp.release()));
160
161 // Check that nestedModel is fine and has subModel of given name
162 // or number
163
164 if (!nestedModel)
165 throw Exception("Unknown mixed model " + modelDesc2 + ".");
166
167 if (nameMod != "")
168 {
169 try
170 {
171 nestedModel->model(nameMod);
172 }
173 catch (NullPointerException&)
174 {
175 throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'InMixed' has no submodel with name " + nameMod + ".");
176 }
177 model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), nameMod, modelDesc2);
178 }
179 else
180 {
181 if (numMod == 0 || (nestedModel->getNumberOfModels() < numMod))
182 throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'InMixed' has no submodel with number " + TextTools::toString(numMod) + ".");
183 model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), numMod - 1, modelDesc2);
184 }
185
186 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
187 for (auto& it :unparsedParameterValuesNested)
188 {
189 unparsedArguments_[it.first] = it.second;
190 }
191
192 if (verbose_)
193 ApplicationTools::displayResult("Number of submodel", TextTools::toString(numMod));
194 }
195
196
197 else if (modelName == "FromRegister")
198 {
199 // We have to parse the nested model first:
200 if (args.find("model") == args.end())
201 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'FromRegister'.");
202
203 string nestedModelDescription = args["model"];
205 if (geneticCode_)
206 nestedReader.setGeneticCode(geneticCode_);
207
208 auto nestedModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
209 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
210
211 // We look for the register:
212 if (args.find("register") == args.end())
213 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
214
215 string registerDescription = args["register"];
216 shared_ptr<AlphabetIndex2> weights = nullptr;
217 shared_ptr<AlphabetIndex2> distances = nullptr;
218
219 auto reg = PhylogeneticsApplicationTools::getSubstitutionRegister(registerDescription, nestedModel->getStateMap(), geneticCode_, weights, distances, verbose_);
220
221 // is it normalized (default : false)
222 bool isNorm = false;
223
224 if (args.find("isNormalized") != args.end() && args["isNormalized"] == "true")
225 isNorm = true;
226
227 model = make_unique<RegisterRatesSubstitutionModel>(std::move(nestedModel), *reg, isNorm);
228
229 if (TextTools::isEmpty(registerDescription))
230 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
231
232 // Then we update the parameter set:
233 for (auto& it : unparsedParameterValuesNested)
234 {
235 unparsedArguments_["FromRegister." + it.first] = it.second;
236 }
237 }
238
239 else if (modelName == "POMO")
240 {
241 auto allelic = dynamic_pointer_cast<const AllelicAlphabet>(alphabet);
242 if (!allelic)
243 throw Exception("BppOSubstitutionModelFormat;;read. POMO model with no allelic alphabet.");
244
245 // We have to parse the nested model first:
246 if (args.find("model") == args.end())
247 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'POMO'.");
248
249 map<string, string> unparsedParameterValuesNested;
250 // fitness
251 unique_ptr<FrequencySetInterface> nestedFreq(nullptr);
252
253 if (args.find("fitness") != args.end())
254 {
255 string nestedFreqDescription = args["fitness"];
256 BppOFrequencySetFormat nestedFreqReader(ALL, false, warningLevel_);
257
258 nestedFreq = nestedFreqReader.readFrequencySet(allelic->getStateAlphabet(), nestedFreqDescription, mData, nData, false);
259 unparsedParameterValuesNested = nestedFreqReader.getUnparsedArguments();
260
261 for (auto& it : unparsedParameterValuesNested)
262 {
263 unparsedParameterValuesNested["fit_" + it.first] = it.second;
264 }
265 }
266
267 // model
268
269 string nestedModelDescription = args["model"];
271 nestedReader.setGeneticCode(geneticCode_);
272
273 auto nestedModel = nestedReader.readSubstitutionModel(allelic->getStateAlphabet(), nestedModelDescription, mData, nData, false);
274 unparsedParameterValuesNested.insert(nestedReader.getUnparsedArguments().begin(),
275 nestedReader.getUnparsedArguments().end());
276
277
278 model = make_unique<POMO>(allelic, std::move(nestedModel), std::move(nestedFreq));
279
280 // Then we update the parameter set:
281 for (auto& it : unparsedParameterValuesNested)
282 {
283 unparsedArguments_["POMO." + it.first] = it.second;
284 }
285 }
286
287
288 // /////////////////////////////////
289 // / WORDS and CODONS
290 // ///////////////////////////////
291
292 else if ((modelName == "Word") || (modelName.substr(0, 4) == "Kron") || (modelName == "Triplet") || (modelName.substr(0, 5) == "Codon") || (modelName == "SENCA") )
293 model = readWord_(alphabet, modelDescription, mData, nData);
294
295
296 // //////////////////
297 // PREDEFINED CODON MODELS
298
299 else if (((modelName == "MG94") || (modelName == "YN98") || (modelName == "YNGP_M0") ||
300 (modelName == "GY94") || (modelName.substr(0, 3) == "KCM") || (modelName == "SameAARate"))
301 && (alphabetCode_ & CODON))
302 {
303 if (!(alphabetCode_ & CODON))
304 throw Exception("BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
305 if (!geneticCode_)
306 throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). No genetic code specified! Consider using 'setGeneticCode'.");
307
308 if (!AlphabetTools::isCodonAlphabet(*alphabet))
309 throw Exception("Alphabet should be Codon Alphabet.");
310
311 auto pCA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
312
313 if (args.find("genetic_code") != args.end())
314 {
315 ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
316 throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
317 }
318
319 if (geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
320 throw Exception("Mismatch between genetic code and codon alphabet");
321
322 unique_ptr<CodonFrequencySetInterface> codonFreqs(nullptr);
323
324 if (args.find("frequencies") != args.end())
325 {
326 string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "F0", "", true, warningLevel_);
328 freqReader.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model.
329
330 auto tmpFreqs = freqReader.readFrequencySet(pCA, freqOpt, mData, nData);
331 codonFreqs = unique_ptr<CodonFrequencySetInterface>(dynamic_cast<CodonFrequencySetInterface*>(tmpFreqs.release()));
332 for (const auto& it : freqReader.getUnparsedArguments())
333 {
334 unparsedArguments_[modelName + "." + it.first] = it.second;
335 }
336 }
337 else
338 // codonFreqs compulsory for all models but SameAARate
339 if (modelName != "SameAARate")
340 throw Exception("Missing 'frequencies' for model " + modelName);
341
342 if (modelName == "MG94")
343 model = make_unique<MG94>(geneticCode_, std::move(codonFreqs));
344 else if (modelName == "GY94")
345 model = make_unique<GY94>(geneticCode_, std::move(codonFreqs));
346 else if ((modelName == "YN98") || (modelName == "YNGP_M0"))
347 model = make_unique<YN98>(geneticCode_, std::move(codonFreqs));
348 else if (modelName == "KCM7")
349 model = make_unique<KCM>(geneticCode_, true);
350 else if (modelName == "KCM19")
351 model = make_unique<KCM>(geneticCode_, false);
352 else if (modelName == "SameAARate")
353 {
354 if (args.find("protmodel") == args.end())
355 throw Exception("Missing 'protmodel in model " + modelName + ".");
356
358 auto tmpProtModel = nestedProtReader.readSubstitutionModel(geneticCode_->getTargetAlphabet(), args["protmodel"], mData, nData, false);
359 auto nestedProtModel = unique_ptr<ProteinSubstitutionModelInterface>(dynamic_cast<ProteinSubstitutionModelInterface*>(tmpProtModel.release()));
360
361 auto unparsedParameterValuesNested = nestedProtReader.getUnparsedArguments();
362 unparsedArguments_.insert(unparsedParameterValuesNested.begin(), unparsedParameterValuesNested.end());
363
364 if (args.find("codonmodel") == args.end())
365 throw Exception("Missing 'codonmodel in model " + modelName + ".");
366
368 nestedCodonReader.setGeneticCode(geneticCode_); // This uses the same instance as the o
369
370 auto tmpSubstitutionModel = nestedCodonReader.readSubstitutionModel(alphabet, args["codonmodel"], mData, nData, false);
371 auto nestedCodonModel = unique_ptr<CodonSubstitutionModelInterface>(dynamic_cast<CodonSubstitutionModelInterface*>(tmpSubstitutionModel.release()));
372
373 unparsedParameterValuesNested = nestedCodonReader.getUnparsedArguments();
374 for (const auto& it:unparsedParameterValuesNested)
375 {
376 unparsedArguments_["SameAARate." + it.first] = it.second;
377 }
378
379 model = make_unique<CodonSameAARateSubstitutionModel>(std::move(nestedProtModel), std::move(nestedCodonModel), std::move(codonFreqs), geneticCode_);
380 }
381 else
382 throw Exception("Unknown Codon model: " + modelName);
383 }
384
385
386 // //////////////////////////////////
387 // YpR
388 // //////////////////////////////////
389
390 else if (modelName == "YpR_Sym")
391 {
392 if (!(alphabetCode_ & NUCLEOTIDE))
393 throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
394 if (!AlphabetTools::isRNYAlphabet(*alphabet))
395 throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
396 auto prny = dynamic_pointer_cast<const RNY>(alphabet);
397
398 string nestedModelDescription = args["model"];
399 if (TextTools::isEmpty(nestedModelDescription))
400 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_sym'.");
401 if (verbose_)
402 ApplicationTools::displayResult("Symmetric YpR model", modelName);
403 BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
404 auto tmpModel = nestedReader.readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData, false);
405 unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
406 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
407 for (auto& it : unparsedParameterValuesNested)
408 {
409 unparsedArguments_["YpR_Sym." + it.first] = it.second;
410 }
411
412 model = make_unique<YpR_Sym>(prny, std::move(nestedModel));
413 }
414 else if (modelName == "YpR_Gen")
415 {
416 if (!(alphabetCode_ & NUCLEOTIDE))
417 throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
418 if (!AlphabetTools::isRNYAlphabet(*alphabet))
419 throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
420 auto prny = dynamic_pointer_cast<const RNY>(alphabet);
421
422 string nestedModelDescription = args["model"];
423 if (TextTools::isEmpty(nestedModelDescription))
424 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_gen'.");
425 if (verbose_)
426 ApplicationTools::displayResult("General YpR model", modelName);
427 BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
428 auto tmpModel = nestedReader.readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData, false);
429 unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
430 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
431
432 for (auto& it : unparsedParameterValuesNested)
433 {
434 unparsedArguments_["YpR_Gen." + it.first] = it.second;
435 }
436
437 model = make_unique<YpR_Gen>(prny, std::move(nestedModel));
438 }
439
440
441 // /////////////////////////////////
442 // / RE08
443 // ///////////////////////////////
444
445 else if (modelName == "RE08")
446 {
447 if (!allowGaps_)
448 throw Exception("BppOSubstitutionModelFormat::read. No Gap model allowed here.");
449
450 // We have to parse the nested model first:
451 string nestedModelDescription = args["model"];
452 if (TextTools::isEmpty(nestedModelDescription))
453 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'RE08'.");
454 if (verbose_)
455 ApplicationTools::displayResult("Gap model", modelName);
457 if (geneticCode_)
458 nestedReader.setGeneticCode(geneticCode_);
459 auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
460
461 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
462
463 // Now we create the RE08 substitution model:
465 {
466 if (!(alphabetCode_ & NUCLEOTIDE))
467 throw Exception("BppOSubstitutionModelFormat::read. Nucleic alphabet not supported.");
468 unique_ptr<NucleotideReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideReversibleSubstitutionModelInterface*>(tmpModel.release()));
469 model = make_unique<RE08Nucleotide>(std::move(nestedModel));
470 if (!model)
471 throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'nucleotide'.");
472 }
473 else if (AlphabetTools::isProteicAlphabet(*alphabet))
474 {
475 if (!(alphabetCode_ & PROTEIN))
476 throw Exception("BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
477 unique_ptr<ProteinReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ProteinReversibleSubstitutionModelInterface*>(tmpModel.release()));
478 model = make_unique<RE08Protein>(std::move(nestedModel));
479 if (!model)
480 throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'protein'.");
481 }
482 else if (AlphabetTools::isCodonAlphabet(*alphabet))
483 {
484 if (!(alphabetCode_ & CODON))
485 throw Exception("BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
486 unique_ptr<CodonReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<CodonReversibleSubstitutionModelInterface*>(tmpModel.release()));
487 model = make_unique<RE08Codon>(std::move(nestedModel));
488 if (!model)
489 throw Exception("BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'codon'.");
490 }
491 else
492 {
493 unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
494 model = make_unique<RE08>(std::move(nestedModel));
495 }
496
497 // Then we update the parameter set:
498 for (auto& it : unparsedParameterValuesNested)
499 {
500 unparsedArguments_["RE08.model_" + it.first] = it.second;
501 }
502 }
503
504 // /////////////////////////////////
505 // / TS98
506 // ///////////////////////////////
507
508 else if (modelName == "TS98")
509 {
510 if (!allowCovarions_)
511 throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
512
513 // We have to parse the nested model first:
514 string nestedModelDescription = args["model"];
515 if (TextTools::isEmpty(nestedModelDescription))
516 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'TS98'.");
517 if (verbose_)
518 ApplicationTools::displayResult("Covarion model", modelName);
520 if (geneticCode_)
521 nestedReader.setGeneticCode(geneticCode_);
522 auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
523 unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
524 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
525
526 // Now we create the TS98 substitution model:
527 model = make_unique<TS98>(std::move(nestedModel));
528
529 // Then we update the parameter set:
530 for (auto& it : unparsedParameterValuesNested)
531 {
532 unparsedArguments_["TS98.model_" + it.first] = it.second;
533 }
534 }
535
536 // /////////////////////////////////
537 // / G01
538 // ///////////////////////////////
539
540 else if (modelName == "G01")
541 {
542 if (!allowCovarions_)
543 throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
544
545 // We have to parse the nested model first:
546 string nestedModelDescription = args["model"];
547 if (TextTools::isEmpty(nestedModelDescription))
548 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'G01'.");
549 string nestedRateDistDescription = args["rdist"];
550 if (TextTools::isEmpty(nestedRateDistDescription))
551 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'rdist' for model 'G01'.");
552 if (verbose_)
553 ApplicationTools::displayResult("Covarion model", modelName);
555 if (geneticCode_)
556 nestedReader.setGeneticCode(geneticCode_);
557 auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
558 unique_ptr<ReversibleSubstitutionModelInterface> nestedModel(dynamic_cast<ReversibleSubstitutionModelInterface*>(tmpModel.release()));
559 if (!nestedModel.get())
560 throw Exception("G01 model restricted to Reversible models. Ask developpers to fix it.");
561
562 map<string, string> unparsedParameterValuesNestedModel(nestedReader.getUnparsedArguments());
563
564 BppORateDistributionFormat rateReader(false);
565 auto nestedRDist = rateReader.readDiscreteDistribution(nestedRateDistDescription, false);
566 map<string, string> unparsedParameterValuesNestedDist(rateReader.getUnparsedArguments());
567
568 // Now we create the G01 substitution model:
569 model = make_unique<G2001>(std::move(nestedModel), std::move(nestedRDist));
570
571 // Then we update the parameter set:
572 for (auto& it : unparsedParameterValuesNestedModel)
573 {
574 unparsedArguments_["G01.model_" + it.first] = it.second;
575 }
576 for (auto& it : unparsedParameterValuesNestedDist)
577 {
578 unparsedArguments_["G01.rdist_" + it.first] = it.second;
579 }
580 }
581
589
590 else
591 {
592 // This is a 'simple' model...
593
596 if (modelName == "Equi")
597 model.reset(new EquiprobableSubstitutionModel(alphabet));
599 // nucleotidic model
600 else if (AlphabetTools::isNucleicAlphabet(*alphabet))
601 {
602 if (!(alphabetCode_ & NUCLEOTIDE))
603 throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
604 auto alpha = dynamic_pointer_cast<const NucleicAlphabet>(alphabet);
605
606 // //////////////////////////////////
607 // gBGC
608 // //////////////////////////////////
609 if (modelName.find("+gBGC") != string::npos)
610 {
611 string subModName = modelName.substr(0, modelName.find("+gBGC"));
612
613 if (verbose_)
614 ApplicationTools::displayResult("Biased gene conversion for", subModName);
615
616 string nestedModelDescription = subModName;
617
618 if (modelDescription.find_first_of("(") != string::npos)
619 {
620 string::size_type begin = modelDescription.find_first_of("(");
621 string::size_type end = modelDescription.find_last_of(")");
622
623 nestedModelDescription += modelDescription.substr(begin, end - begin + 1);
624 }
625
626 BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, true, true, false, verbose_, warningLevel_);
627 auto tmpModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
628 unique_ptr<NucleotideSubstitutionModelInterface> nestedModel(dynamic_cast<NucleotideSubstitutionModelInterface*>(tmpModel.release()));
629 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
630
631 // Now we create the gBGC substitution model:
632 model = make_unique<gBGC>(alpha, std::move(nestedModel));
633
634 // Then we update the parameter set:
635 for (auto& it : unparsedParameterValuesNested)
636 {
637 unparsedArguments_["gBGC." + it.first] = it.second;
638 }
639 }
640
641
642 // /////////////////////////////////
643 // / GTR
644 // ///////////////////////////////
645
646 else if (modelName == "GTR")
647 {
648 model = make_unique<GTR>(alpha);
649 }
650
651
652 // /////////////////////////////////
653 // / SSR
654 // ///////////////////////////////
655
656 else if (modelName == "SSR")
657 {
658 model = make_unique<SSR>(alpha);
659 }
660
661 // /////////////////////////////////
662 // / L95
663 // ///////////////////////////////
664
665 else if (modelName == "L95")
666 {
667 model = make_unique<L95>(alpha);
668 }
669
670 // /////////////////////////////////
671 // / RN95
672 // ///////////////////////////////
673
674 else if (modelName == "RN95")
675 {
676 model = make_unique<RN95>(alpha);
677 }
678
679 // /////////////////////////////////
680 // / RN95s
681 // ///////////////////////////////
682
683 else if (modelName == "RN95s")
684 {
685 model = make_unique<RN95s>(alpha);
686 }
687
688 // /////////////////////////////////
689 // / TN93
690 // //////////////////////////////
691
692 else if (modelName == "TN93")
693 {
694 model = make_unique<TN93>(alpha);
695 }
696
697 // /////////////////////////////////
698 // / HKY85
699 // ///////////////////////////////
700
701 else if (modelName == "HKY85")
702 {
703 model = make_unique<HKY85>(alpha);
704 }
705
706 // /////////////////////////////////
707 // / F81
708 // ///////////////////////////////
709
710 else if (modelName == "F81")
711 {
712 model = make_unique<F81>(alpha);
713 }
714 // /////////////////////////////////
715 // / F84
716 // ///////////////////////////////
717
718 else if (modelName == "F84")
719 {
720 model = make_unique<F84>(alpha);
721 }
722
723 // /////////////////////////////////
724 // / T92
725 // ///////////////////////////////
726
727 else if (modelName == "T92")
728 {
729 model = make_unique<T92>(alpha);
730 }
731
732 // /////////////////////////////////
733 // / K80
734 // ///////////////////////////////
735
736 else if (modelName == "K80")
737 {
738 model = make_unique<K80>(alpha);
739 }
740
741
742 // /////////////////////////////////
743 // / JC69
744 // ///////////////////////////////
745
746 else if (modelName == "JC69")
747 {
748 model = make_unique<JCnuc>(alpha);
749 }
750 else
751 throw Exception("Model '" + modelName + "' unknown, or does not fit nucleic alphabet.");
752 }
753 else if (AlphabetTools::isProteicAlphabet(*alphabet))
754 {
757
758 if (!(alphabetCode_ & PROTEIN))
759 throw Exception("BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
760 auto alpha = dynamic_pointer_cast<const ProteicAlphabet>(alphabet);
761
762 if (modelName.find("+F") != string::npos)
763 {
764 string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "Full", "", true, warningLevel_);
766 auto tmpFreq = freqReader.readFrequencySet(alpha, freqOpt, mData, nData, true);
767 unique_ptr<ProteinFrequencySetInterface> protFreq(dynamic_cast<ProteinFrequencySetInterface*>(tmpFreq.release()));
768
769 map<string, string> unparsedParameterValuesNested(freqReader.getUnparsedArguments());
770
771 for (auto& it : unparsedParameterValuesNested)
772 {
773 unparsedArguments_[modelName + "." + it.first] = it.second;
774 }
775
776 if (modelName == "JC69+F")
777 model = make_unique<JCprot>(alpha, std::move(protFreq));
778 else if (modelName == "DSO78+F")
779 model = make_unique<DSO78>(alpha, std::move(protFreq));
780 else if (modelName == "JTT92+F")
781 model = make_unique<JTT92>(alpha, std::move(protFreq));
782 else if (modelName == "LG08+F")
783 model = make_unique<LG08>(alpha, std::move(protFreq));
784 else if (modelName == "WAG01+F")
785 model = make_unique<WAG01>(alpha, std::move(protFreq));
786 else if (modelName == "Empirical+F")
787 {
788 string prefix = args["name"];
789 if (TextTools::isEmpty(prefix))
790 throw Exception("'name' argument missing for user-defined substitution model.");
791 string fname = args["file"];
792 if (TextTools::isEmpty(fname))
793 throw Exception("'file' argument missing for user-defined substitution model.");
794 model = make_unique<UserProteinSubstitutionModel>(alpha, args["file"], std::move(protFreq), prefix + "+F.");
795 }
796 }
797 else if (modelName == "JC69")
798 model = make_unique<JCprot>(alpha);
799 else if (modelName == "DSO78")
800 model = make_unique<DSO78>(alpha);
801 else if (modelName == "JTT92")
802 model = make_unique<JTT92>(alpha);
803 else if (modelName == "LG08")
804 model = make_unique<LG08>(alpha);
805 else if (modelName == "WAG01")
806 model = make_unique<WAG01>(alpha);
807 // submodels of mixture models
808 else if (modelName.substr(0, 9) == "LGL08_CAT")
809 {
810 string subModelName = modelName.substr(10);
811
812 size_t posp = modelDescription.find("(");
813
814 string modelDesc2 = modelName.substr(0, 9) + modelDescription.substr(posp);
815
816 BppOTransitionModelFormat nestedReader(PROTEIN, true, true, false, verbose_, warningLevel_);
817
818 auto tmpModel = nestedReader.readTransitionModel(alphabet, modelDesc2, mData, nData, false);
819 auto nestedModel = unique_ptr<MixedTransitionModelInterface>(dynamic_cast<MixedTransitionModelInterface*>(tmpModel.release()));
820
821 // Check that nestedModel is fine and has subModel of given name
822 if (!nestedModel)
823 throw Exception("Unknown model " + modelName + ".");
824
825 try
826 {
827 nestedModel->model(subModelName);
828 }
829 catch (NullPointerException&)
830 {
831 throw Exception("BppOSubstitutionModelFormat::read. " + nestedModel->getName() + "argument for model 'FromMixture' has no submodel with name " + subModelName + ".");
832 }
833
834 // Now we create the FromMixture substitution model:
835 model = make_unique<FromMixtureSubstitutionModel>(*nestedModel, subModelName, modelDesc2);
836 }
837 else if (modelName == "Empirical")
838 {
839 string prefix = args["name"];
840 if (TextTools::isEmpty(prefix))
841 throw Exception("'name' argument missing for user-defined substitution model.");
842 model = make_unique<UserProteinSubstitutionModel>(alpha, args["file"], prefix);
843 }
844 else if (modelName == "Coala")
845 {
846 string exchangeability = args["exch"];
847 if (TextTools::isEmpty(exchangeability))
848 throw Exception("BppOSubstitutionModelFormat::read. missing argument 'exch' for model 'Coala'.");
849 string prefix = args["name"];
850 if (exchangeability == "Empirical" && TextTools::isEmpty(prefix))
851 throw Exception("'name' argument missing to specify the exchangeabilities of the user-defined empirical model.");
853 auto tmpModel = nestedReader.readSubstitutionModel(alphabet, exchangeability, mData, nData, false);
854 unique_ptr<ProteinSubstitutionModelInterface> nestedModel(dynamic_cast<ProteinSubstitutionModelInterface*>(tmpModel.release()));
855 string nbrOfParametersPerBranch = args["nbrAxes"];
856 if (TextTools::isEmpty(nbrOfParametersPerBranch))
857 throw Exception("'nbrAxes' argument missing to define the number of axis of the Correspondence Analysis.");
858 // Now we create the Coala model
859 model = make_unique<Coala>(alpha, *nestedModel, TextTools::to<unsigned int>(nbrOfParametersPerBranch));
860 if (nData)
861 model->setFreqFromData(*mData.at(nData));
862 }
863 else
864 throw Exception("Model '" + modelName + "' is unknown, or does not fit proteic alphabet.");
865 }
866 else if (AlphabetTools::isBinaryAlphabet(*alphabet))
867 {
868 if (!(alphabetCode_ & BINARY))
869 throw Exception("BppOSubstitutionModelFormat::read. Binary alphabet not supported.");
870
871 auto balpha = dynamic_pointer_cast<const BinaryAlphabet>(alphabet);
872
873 if (modelName == "Binary")
874 model = make_unique<BinarySubstitutionModel>(balpha);
875 else if (modelName == "TwoParameterBinary")
876 model = make_unique<TwoParameterBinarySubstitutionModel>(balpha);
877 else
878 throw Exception("Model '" + modelName + "' unknown, or does not fit binary alphabet.");
879 }
880 else if (AlphabetTools::isIntegerAlphabet(*alphabet))
881 {
882 if (!(alphabetCode_ & INTEGER))
883 throw Exception("BppOSubstitutionModelFormat::read. Integer alphabet not supported.");
884
885 auto balpha = dynamic_pointer_cast<const IntegerAlphabet>(alphabet);
886
887 if (modelName == "D1Walk")
888 model.reset(new D1WalkSubstitutionModel(balpha));
889 else
890 throw Exception("Model '" + modelName + "' unknown, or does not fit integer alphabet.");
891 }
892 else
893 throw Exception("Model '" + modelName + "' unknown, or does not fit " + alphabet->getAlphabetType() + " alphabet.");
894 }
895
896 if (verbose_)
897 ApplicationTools::displayResult("Substitution model", modelName);
898
899 updateParameters_(*model, args);
900
901
902 if (parseArguments)
903 {
904 if (nData)
905 {
906 if (mData.find(nData) == mData.end())
907 throw Exception("Unknown data for number " + TextTools::toString(nData));
908
909 initialize_(*model, mData.at(nData));
910 }
911 else
912 initialize_(*model, 0);
913 }
914
915 return model;
916}
917
918
921 map<std::string, std::string>& args)
922{
923 // Update parameter args:
924 vector<string> pnames = model.getParameters().getParameterNames();
925
926 string pref = model.getNamespace();
927
928 for (auto pname : pnames)
929 {
930 string name = model.getParameterNameWithoutNamespace(pname);
931 if (args.find(name) != args.end())
932 unparsedArguments_[pref + name] = args[name];
933 }
934
935 // Now look if some parameters are aliased:
937 string pname, pval, pname2;
938 for (size_t i = 0; i < pl.size(); ++i)
939 {
940 pname = model.getParameterNameWithoutNamespace(pl[i].getName());
941
942 if (args.find(pname) == args.end())
943 continue;
944 pval = args[pname];
945
946 if (((pval.rfind("_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind("_") + 1, string::npos)))) ||
947 (pval.find("(") != string::npos))
948 continue;
949
950 bool found = false;
951 for (size_t j = 0; j < pl.size() && !found; ++j)
952 {
953 pname2 = model.getParameterNameWithoutNamespace(pl[j].getName());
954
955 // if (j == i || args.find(pname2) == args.end()) continue; Julien 03/03/2010: This extra condition prevents complicated (nested) models to work properly...
956 if (j == i)
957 continue;
958 if (pval == pname2)
959 {
960 // This is an alias...
961 // NB: this may throw an exception if incorrect! We leave it as is for now :s
962 model.aliasParameters(pname2, pname);
963 if (verbose_)
964 ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
965 found = true;
966 }
967 }
968 // if (!TextTools::isDecimalNumber(pval) && !found)
969 // throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
970 }
971
972 // 2 following tests be removed in a later version
973 if (args.find("useObservedFreqs") != args.end())
974 throw Exception("useObservedFreqs argument is obsolete. Please use 'initFreqs=observed' instead.");
975 if (args.find("useObservedFreqs.pseudoCount") != args.end())
976 throw Exception("useObservedFreqs.pseudoCount argument is obsolete. Please use 'initFreqs.observedPseudoCount' instead.");
977
978 if (args.find("initFreqs") != args.end())
979 // Specific case since already used
980 if (pref != "Coala.")
981 unparsedArguments_[pref + "initFreqs"] = args["initFreqs"];
982 if (args.find("initFreqs.observedPseudoCount") != args.end())
983 unparsedArguments_[pref + "initFreqs.observedPseudoCount"] = args["initFreqs.observedPseudoCount"];
984
985 if (args.find("rate") != args.end())
986 {
987 model.addRateParameter();
988 unparsedArguments_[pref + "rate"] = args["rate"];
989 }
990}
991
992
993unique_ptr<SubstitutionModelInterface> BppOSubstitutionModelFormat::readWord_(
994 std::shared_ptr<const Alphabet> alphabet,
995 const std::string& modelDescription,
996 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
997 size_t nData)
998{
999 unique_ptr<SubstitutionModelInterface> model;
1000 string modelName = "";
1001 map<string, string> args;
1002 KeyvalTools::parseProcedure(modelDescription, modelName, args);
1003
1004 vector<string> v_nestedModelDescription;
1005 vector< unique_ptr<SubstitutionModelInterface>> v_pSM;
1006 shared_ptr<const CoreWordAlphabet> pWA;
1007
1008 string s, nestedModelDescription;
1009 unsigned int nbmodels;
1010
1011 if (((modelName == "Word" || modelName == "Kron") && !AlphabetTools::isWordAlphabet(*alphabet)) ||
1012 ((!(modelName == "Word" || modelName == "Kron")) && !AlphabetTools::isCodonAlphabet(*alphabet)))
1013 throw Exception("Bad alphabet type "
1014 + alphabet->getAlphabetType() + " for model " + modelName + ".");
1015
1016 pWA = dynamic_pointer_cast<const CoreWordAlphabet>(alphabet);
1017
1018
1021
1022 if (args.find("model") != args.end())
1023 {
1024 v_nestedModelDescription.push_back(args["model"]);
1025 nbmodels = (modelName == "Word" || modelName == "Kron") ? pWA->getLength() : 3;
1026 }
1027 else
1028 {
1029 if (args.find("model1") == args.end())
1030 throw Exception("Missing argument 'model' or 'model1' for model " + modelName + ".");
1031
1032 nbmodels = 0;
1033
1034 while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
1035 v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
1036 }
1037
1038 if (nbmodels < 2)
1039 throw Exception("Missing nested models for model " + modelName + ".");
1040
1041 if (pWA->getLength() != nbmodels)
1042 throw Exception("Bad alphabet type "
1043 + alphabet->getAlphabetType() + " for model " + modelName + ".");
1044
1045 if (v_nestedModelDescription.size() != nbmodels)
1046 {
1047 BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, true, false, false, warningLevel_);
1048 model = nestedReader.readSubstitutionModel(pWA->getNAlphabet(0), v_nestedModelDescription[0], mData, nData, false);
1049 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
1050 string pref = "";
1051 for (unsigned int i = 0; i < nbmodels; ++i)
1052 {
1053 pref += TextTools::toString(i + 1);
1054 }
1055
1056 for (auto& it : unparsedParameterValuesNested)
1057 {
1058 unparsedArguments_[modelName + "." + pref + "_" + it.first] = it.second;
1059 }
1060
1061 v_pSM.push_back(std::move(model));
1062 }
1063 else
1064 {
1065 for (unsigned i = 0; i < v_nestedModelDescription.size(); ++i)
1066 {
1067 BppOSubstitutionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
1068 model = nestedReader.readSubstitutionModel(pWA->getNAlphabet(i), v_nestedModelDescription[i], mData, nData, false);
1069 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
1070 for (auto& it : unparsedParameterValuesNested)
1071 {
1072 unparsedArguments_[modelName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
1073 }
1074
1075 v_pSM.push_back(std::move(model));
1076 }
1077 }
1078
1079 // //////////////////////////////
1080 // In case of a Kron... model, mgmt of the positions
1081
1082 // check the vector of simultaneous changing positions
1083
1084 vector<set<size_t>> vposKron;
1085 if (modelName.substr(0, 4) == "Kron")
1086 {
1087 if (args.find("positions") != args.end())
1088 {
1089 StringTokenizer nst(args["positions"], "+");
1090
1091 while (nst.hasMoreToken())
1092 {
1093 string spos = nst.nextToken();
1094 StringTokenizer nst2(spos, "*");
1095
1096 set<size_t> ss;
1097
1098 while (nst2.hasMoreToken())
1099 {
1100 ss.insert((size_t)TextTools::toInt(nst2.nextToken()));
1101 }
1102
1103 vposKron.push_back(ss);
1104 }
1105 }
1106 }
1107
1108 // /////////////////////////////////
1109 // / WORD
1110 // ///////////////////////////////
1111
1112 if (modelName == "Word")
1113 {
1114 if (v_nestedModelDescription.size() != nbmodels)
1115 {
1116 model = make_unique<WordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1117 }
1118 else
1119 {
1120 ModelList ml(v_pSM);
1121 model = make_unique<WordSubstitutionModel>(ml);
1122 }
1123 }
1124
1125 // /////////////////////////////////
1126 // / KRON
1127 // ///////////////////////////////
1128
1129 else if (modelName == "Kron")
1130 {
1131 if (vposKron.size() == 0)
1132 {
1133 if (v_nestedModelDescription.size() != nbmodels)
1134 {
1135 model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1136 }
1137 else
1138 {
1139 ModelList ml(v_pSM);
1140 model = make_unique<KroneckerWordSubstitutionModel>(ml);
1141 }
1142 }
1143 else
1144 {
1145 if (v_nestedModelDescription.size() != nbmodels)
1146 {
1147 model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels, vposKron);
1148 }
1149 else
1150 {
1151 ModelList ml(v_pSM);
1152 model = make_unique<KroneckerWordSubstitutionModel>(ml, vposKron);
1153 }
1154 }
1155 }
1156
1157 // /////////////////////////////////
1158 // / CODON
1159 // ///////////////////////////////
1160
1161 else
1162 {
1163 auto pCA = dynamic_pointer_cast<const CodonAlphabet>(pWA);
1164 if (pCA == 0)
1165 throw Exception("Non codon Alphabet for model" + modelName + ".");
1166
1167 unique_ptr<AlphabetIndex2> pai2;
1168 unique_ptr<CodonFrequencySetInterface> pFS;
1169
1170 if ((!dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].get())) ||
1171 ((v_nestedModelDescription.size() == 3) &&
1172 (!dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].get()) || !dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].get()))))
1173 throw Exception("Non simple NucleotideSubstitutionModel embedded in " + modelName + " model.");
1174
1175 if (args.find("genetic_code") != args.end())
1176 {
1177 ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
1178 throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
1179 }
1180
1181 if (!geneticCode_)
1182 throw Exception("BppOSubstitutionModelFormat::readWord_(). No genetic code specified! Consider using 'setGeneticCode'.");
1183
1184
1187
1188 if ((modelName.find("Dist") != string::npos) || (modelName == "SENCA"))
1189 pai2 = (args.find("aadistance") == args.end()) ? nullptr : SequenceApplicationTools::getAlphabetIndex2(AlphabetTools::PROTEIN_ALPHABET, args["aadistance"]);
1190
1193
1194 if (modelName.find("Freq") != string::npos)
1195 {
1196 if (args.find("frequencies") == args.end())
1197 throw Exception("Missing equilibrium frequencies.");
1198
1200 bIOFreq.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model
1201 auto tmp = bIOFreq.readFrequencySet(pCA, args["frequencies"], mData, nData);
1202 pFS = unique_ptr<CodonFrequencySetInterface>(dynamic_cast<CodonFrequencySetInterface*>(tmp.release()));
1203 if (!pFS)
1204 throw IOException("BppOSubstitutionModelFormat::readWord_(). Incorrect codon frequencies.");
1205 map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
1206
1207 for (auto& it : unparsedParameterValuesNested)
1208 {
1209 unparsedArguments_[modelName + "." + it.first] = it.second;
1210 }
1211 }
1212
1213 // //////////////
1214 // // Triplet
1215
1216 if (modelName == "Triplet")
1217 model = (v_nestedModelDescription.size() != 3)
1218 ? make_unique<TripletSubstitutionModel>(
1219 pCA,
1220 unique_ptr<NucleotideSubstitutionModelInterface>(
1221 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1222 )
1223 )
1224 : make_unique<TripletSubstitutionModel>(
1225 pCA,
1226 unique_ptr<NucleotideSubstitutionModelInterface>(
1227 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1228 ),
1229 unique_ptr<NucleotideSubstitutionModelInterface>(
1230 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1231 ),
1232 unique_ptr<NucleotideSubstitutionModelInterface>(
1233 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1234 )
1235 );
1236
1237 else if (modelName.find("Codon") != string::npos)
1238 {
1239 vector< unique_ptr<CoreCodonSubstitutionModelInterface>> vCSM;
1240 string name = "Codon";
1241 map<string, string> unparsedParameterValuesNested;
1242
1243 if (modelName.find("Dist") != string::npos)
1244 {
1245 name += "Dist";
1246
1247 vCSM.push_back(make_unique<AbstractCodonDistanceSubstitutionModel>(std::move(pai2), geneticCode_, ""));
1248 }
1249 else if (modelName.find("BGC") != string::npos)
1250 {
1251 name += "BGC";
1252
1253 vCSM.push_back(make_unique<AbstractCodonBGCSubstitutionModel>(geneticCode_, ""));
1254 }
1255 else if (modelName.find("Prot") != string::npos)
1256 {
1257 name += "Prot";
1258
1259 if (args.find("protmodel") == args.end())
1260 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'protmodel' for codon model argument 'Prot'.");
1261
1262 nestedModelDescription = args["protmodel"];
1264
1265 shared_ptr<SubstitutionModelInterface> tmpModel = nestedReader.readSubstitutionModel(geneticCode_->getTargetAlphabet(), nestedModelDescription, mData, nData, false);
1266 auto nestedModel = dynamic_pointer_cast<ProteinSubstitutionModelInterface>(tmpModel);
1267
1268 unparsedParameterValuesNested.insert(nestedReader.getUnparsedArguments().begin(), nestedReader.getUnparsedArguments().end());
1269
1270 vCSM.push_back(make_unique<AbstractCodonAARateSubstitutionModel>(nestedModel, geneticCode_, ""));
1271 }
1272 if (modelName.find("AAClust") != string::npos)
1273 {
1274 name += "AAClust";
1275
1276 // Initialization using the "assign" argument
1277 vector<uint> partition;
1278 if (args.find("partition") != args.end())
1279 {
1280 string rf = args["partition"];
1281
1282 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1283 while (strtok.hasMoreToken())
1284 partition.push_back(TextTools::to<uint>(strtok.nextToken()));
1285 }
1286
1287 unique_ptr<AbstractCodonClusterAASubstitutionModel> aca = partition.size() != 0 ?
1288 make_unique<AbstractCodonClusterAASubstitutionModel>(geneticCode_, "", partition) :
1289 make_unique<AbstractCodonClusterAASubstitutionModel>(geneticCode_, "");
1290
1291 vCSM.push_back(std::move(aca));
1292 }
1293
1295 if (vCSM.size() == 0)
1296 name += "Rate";
1297
1298 if (modelName.find("CpG") != string::npos)
1299 {
1300 name += "CpG";
1301 vCSM.push_back(make_unique<AbstractCodonCpGSubstitutionModel>(pCA, ""));
1302 }
1303
1304 if (modelName.find("AAFit") != string::npos)
1305 {
1306 name += "AAFit";
1307
1308 if (args.find("fitness") == args.end())
1309 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'AAFit'.");
1310
1311 string nestedFreqDescription = args["fitness"];
1313
1314 auto nestedFreq = nestedReader.readFrequencySet(geneticCode_->getTargetAlphabet(), nestedFreqDescription, mData, nData, false);
1315
1316 for (auto it : nestedReader.getUnparsedArguments())
1317 {
1318 unparsedParameterValuesNested["fit_" + it.first] = it.second;
1319 }
1320
1321 auto aca = make_unique<AbstractCodonAAFitnessSubstitutionModel>(std::move(nestedFreq), geneticCode_, "");
1322
1323 if (args.find("Ns") != args.end())
1324 aca->addNsParameter();
1325
1326 vCSM.push_back(std::move(aca));
1327 }
1328 else if (modelName.find("Fit") != string::npos)
1329 {
1330 if (args.find("fitness") == args.end())
1331 throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'Fit'.");
1332 string nestedFreqDescription = args["fitness"];
1333
1335 nestedReader.setGeneticCode(geneticCode_);
1336
1337 auto nestedFreq = nestedReader.readFrequencySet(alphabet, nestedFreqDescription, mData, nData, false);
1338
1339 for (auto it : nestedReader.getUnparsedArguments())
1340 {
1341 unparsedParameterValuesNested["fit_" + it.first] = it.second;
1342 }
1343
1344 vCSM.push_back(make_unique<AbstractCodonFitnessSubstitutionModel>(std::move(nestedFreq), geneticCode_, ""));
1345 }
1346
1347 if (modelName.find("PhasFreq") != string::npos)
1348 {
1349 name += "PhasFreq";
1350 vCSM.push_back(make_unique<AbstractCodonPhaseFrequenciesSubstitutionModel>(std::move(pFS), ""));
1351 }
1352 else if (modelName.find("Freq") != string::npos)
1353 {
1354 name += "Freq";
1355 vCSM.push_back(make_unique<AbstractCodonFrequenciesSubstitutionModel>(std::move(pFS), ""));
1356 }
1357
1358 // Then we update the parameter set:
1359 for (auto it : unparsedParameterValuesNested)
1360 {
1361 unparsedArguments_[name + "." + it.first] = it.second;
1362 }
1363
1364 model = (v_nestedModelDescription.size() != 3)
1365 ? make_unique<CodonAdHocSubstitutionModel>(
1367 unique_ptr<NucleotideSubstitutionModelInterface>(
1368 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1369 ),
1370 vCSM,
1371 name)
1372 : make_unique<CodonAdHocSubstitutionModel>(
1374 unique_ptr<NucleotideSubstitutionModelInterface>(
1375 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1376 ),
1377 unique_ptr<NucleotideSubstitutionModelInterface>(
1378 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1379 ),
1380 unique_ptr<NucleotideSubstitutionModelInterface>(
1381 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1382 ),
1383 vCSM,
1384 name);
1385 }
1386 else if (modelName == "KronDistFreq")
1387 {
1388 if (v_nestedModelDescription.size() != 3)
1389 {
1390 if (vposKron.size() == 0)
1391 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(geneticCode_,
1392 unique_ptr<NucleotideSubstitutionModelInterface>(
1393 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1394 std::move(pFS), std::move(pai2));
1395 else
1396 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(geneticCode_,
1397 unique_ptr<NucleotideSubstitutionModelInterface>(
1398 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1399 vposKron, std::move(pFS), std::move(pai2));
1400 }
1401 else
1402 {
1403 if (vposKron.size() != 0)
1404 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1406 unique_ptr<NucleotideSubstitutionModelInterface>(
1407 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1408 ),
1409 unique_ptr<NucleotideSubstitutionModelInterface>(
1410 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1411 ),
1412 unique_ptr<NucleotideSubstitutionModelInterface>(
1413 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1414 ),
1415 std::move(pFS),
1416 std::move(pai2));
1417 else
1418 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1420 unique_ptr<NucleotideSubstitutionModelInterface>(
1421 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1422 ),
1423 unique_ptr<NucleotideSubstitutionModelInterface>(
1424 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1425 ),
1426 unique_ptr<NucleotideSubstitutionModelInterface>(
1427 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1428 ),
1429 vposKron,
1430 std::move(pFS),
1431 std::move(pai2));
1432 }
1433 }
1434 else if (modelName == "KronDist")
1435 {
1436 if (v_nestedModelDescription.size() != 3)
1437 {
1438 if (vposKron.size() == 0)
1439 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(geneticCode_,
1440 unique_ptr<NucleotideSubstitutionModelInterface>(dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1441 std::move(pai2));
1442 else
1443 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(geneticCode_,
1444 unique_ptr<NucleotideSubstitutionModelInterface>(dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())),
1445 vposKron, std::move(pai2));
1446 }
1447 else
1448 {
1449 if (vposKron.size() != 0)
1450 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1452 unique_ptr<NucleotideSubstitutionModelInterface>(
1453 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1454 ),
1455 unique_ptr<NucleotideSubstitutionModelInterface>(
1456 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1457 ),
1458 unique_ptr<NucleotideSubstitutionModelInterface>(
1459 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1460 ),
1461 std::move(pai2));
1462 else
1463 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1465 unique_ptr<NucleotideSubstitutionModelInterface>(
1466 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1467 ),
1468 unique_ptr<NucleotideSubstitutionModelInterface>(
1469 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1470 ),
1471 unique_ptr<NucleotideSubstitutionModelInterface>(
1472 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1473 ),
1474 vposKron,
1475 std::move(pai2));
1476 }
1477 }
1478
1479 else if (modelName == "SENCA")
1480 {
1481 if (args.find("fitness") == args.end())
1482 throw Exception("Missing fitness in model " + modelName + ".");
1483
1486
1487 auto pFit(bIOFreq.readFrequencySet(pCA, args["fitness"], mData, nData, false));
1488 map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
1489
1490 for (auto& it : unparsedParameterValuesNested)
1491 {
1492 unparsedArguments_[modelName + ".fit_" + it.first] = it.second;
1493 }
1494
1495 if (v_nestedModelDescription.size() != 3)
1496 {
1497 model = make_unique<SENCA>(geneticCode_,
1498 unique_ptr<NucleotideSubstitutionModelInterface>(
1499 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1500 ),
1501 std::move(pFit), std::move(pai2));
1502 }
1503 else
1504 model = make_unique<SENCA>(
1506 unique_ptr<NucleotideSubstitutionModelInterface>(
1507 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[0].release())
1508 ),
1509 unique_ptr<NucleotideSubstitutionModelInterface>(
1510 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[1].release())
1511 ),
1512 unique_ptr<NucleotideSubstitutionModelInterface>(
1513 dynamic_cast<NucleotideSubstitutionModelInterface*>(v_pSM[2].release())
1514 ),
1515 std::move(pFit),
1516 std::move(pai2));
1517 }
1518 }
1519
1520 return model;
1521}
1522
1524 OutputStream& out,
1525 std::map<std::string, std::string>& globalAliases,
1526 std::vector<std::string>& writtenNames) const
1527{
1528 bool comma = false;
1529
1530 // Mixed Model that are defined as "Mixture" and "Mixed"
1531
1532 if ((dynamic_cast<const MixedTransitionModelInterface*>(&model) != nullptr) && (dynamic_cast<const AbstractBiblioMixedTransitionModel*>(&model) == nullptr))
1533 {
1534 writeMixed_(dynamic_cast<const MixedTransitionModelInterface&>(model), out, globalAliases, writtenNames);
1535 return;
1536 }
1537
1538 auto name = model.getName();
1539 auto parend = (name.rfind(")") == name.size() - 1);
1540
1541 out << (parend ? name.substr(0, name.size() - 1) : name + "(");
1542
1543 // Is it a protein user defined model?
1544 try
1545 {
1546 const UserProteinSubstitutionModel& userModel = dynamic_cast<const UserProteinSubstitutionModel&>(model);
1547 out << "file=" << userModel.getPath();
1548 string ns = userModel.getNamespace();
1549
1550 if (TextTools::hasSubstring(ns, "+F.") )
1551 ns = ns.substr(0, ns.size() - 3);
1552 else
1553 ns = ns.substr(0, ns.size() - 1);
1554
1555 out << ", name=" << ns;
1556 comma = true;
1557 }
1558 catch (bad_cast&)
1559 {}
1560
1561 // Is it a markov-modulated model?
1562 try
1563 {
1564 const MarkovModulatedSubstitutionModel& mmModel = dynamic_cast<const MarkovModulatedSubstitutionModel&>(model);
1565 out << "model=";
1566 write(mmModel.nestedModel(), out, globalAliases, writtenNames);
1567
1568 try
1569 {
1570 const G2001& gModel = dynamic_cast<const G2001&>(model);
1571 // Also print distribution here:
1572 out << ", rdist=";
1573 const DiscreteDistributionInterface& nestedDist = gModel.rateDistribution();
1575 bIO.writeDiscreteDistribution(nestedDist, out, globalAliases, writtenNames);
1576 }
1577 catch (bad_cast&)
1578 {}
1579 comma = true;
1580 }
1581 catch (bad_cast&)
1582 {}
1583
1584 // Is it a model with gaps?
1585 try
1586 {
1587 const RE08& reModel = dynamic_cast<const RE08&>(model);
1588 out << "model=";
1589 write(reModel.nestedModel(), out, globalAliases, writtenNames);
1590 comma = true;
1591 }
1592 catch (bad_cast&)
1593 {}
1594
1595 // Is it a YpR model?
1596 try
1597 {
1598 const YpR& yprModel = dynamic_cast<const YpR&>(model);
1599 out << "model=";
1600 write(yprModel.nestedModel(), out, globalAliases, writtenNames);
1601 comma = true;
1602 }
1603 catch (bad_cast&)
1604 {}
1605
1606 // Is it a OneChange model?
1607 try
1608 {
1609 const OneChangeTransitionModel& oneChangeTransitionModel = dynamic_cast<const OneChangeTransitionModel&>(model);
1610 out << "model=";
1611 write(oneChangeTransitionModel.model(), out, globalAliases, writtenNames);
1612 comma = true;
1613 }
1614 catch (bad_cast&)
1615 {
1616 // Is it a model with register?
1617 try
1618 {
1619 const OneChangeRegisterTransitionModel& oneChangeRegisterTransitionModel = dynamic_cast<const OneChangeRegisterTransitionModel&>(model);
1620 out << "model=";
1621 write(oneChangeRegisterTransitionModel.model(), out, globalAliases, writtenNames);
1622 comma = true;
1623 out << ", register=";
1624 out << oneChangeRegisterTransitionModel.getRegisterName();
1625 out << ", numReg=" << VectorTools::paste(oneChangeRegisterTransitionModel.getRegisterNumbers(), "+");
1626 }
1627 catch (bad_cast&)
1628 {}
1629
1630 try
1631 {
1632 const RegisterRatesSubstitutionModel& registerRatesSubstitutionModel = dynamic_cast<const RegisterRatesSubstitutionModel&>(model);
1633 out << "model=";
1634 write(registerRatesSubstitutionModel.model(), out, globalAliases, writtenNames);
1635 comma = true;
1636 out << ", register=";
1637 out << registerRatesSubstitutionModel.getRegisterName();
1638 }
1639 catch (bad_cast&)
1640 {}
1641 }
1642
1643 // Is it a gBGC model?
1644 try
1645 {
1646 const gBGC& gbgcModel = dynamic_cast<const gBGC&>(model);
1647 StdStr sout;
1648 write(gbgcModel.nestedModel(), sout, globalAliases, writtenNames);
1649
1650 string ss = sout.str();
1651 auto begin = ss.find_first_of("(");
1652 auto end = ss.find_last_of(")");
1653
1654 out << ss.substr(begin + 1, end - begin - 1);
1655
1656 comma = true;
1657 }
1658 catch (bad_cast&)
1659 {}
1660
1661 // Is it a word model?
1662 try
1663 {
1664 const AbstractWordSubstitutionModel& wM = dynamic_cast<const AbstractWordSubstitutionModel&>(model);
1665 size_t nmod = wM.getNumberOfModels();
1666 const TransitionModelInterface& mod0 = wM.nModel(0);
1667 if (nmod == 1)
1668 {
1669 out << "model=";
1670 write(mod0, out, globalAliases, writtenNames);
1671 }
1672 else
1673 {
1674 const TransitionModelInterface& mod1 = wM.nModel(1);
1675 if (&mod1 == &mod0)
1676 {
1677 out << "model=";
1678 write(mod0, out, globalAliases, writtenNames);
1679 }
1680 else
1681 {
1682 out << "model1=";
1683 write(mod0, out, globalAliases, writtenNames);
1684 for (unsigned int i = 1; i < nmod; ++i)
1685 {
1686 out << ", model" + TextTools::toString(i + 1) + "=";
1687 write(wM.nModel(i), out, globalAliases, writtenNames);
1688 }
1689 }
1690 }
1691 comma = true;
1692 }
1693 catch (bad_cast&)
1694 {}
1695
1696 // Is it a COaLA model ?
1697 try
1698 {
1699 const Coala& coalaModel = dynamic_cast<const Coala&>(model);
1700 out << "exch=" << coalaModel.getExch() << ", nbrAxes=" << coalaModel.getNbrOfAxes();
1701 comma = true;
1702 }
1703 catch (bad_cast&)
1704 {}
1705
1706 // Is it a InMixed model?
1707 try
1708 {
1709 const InMixedSubstitutionModel& inModel = dynamic_cast<const InMixedSubstitutionModel&>(model);
1710 out << "model=";
1711 write(inModel.mixedModel(), out, globalAliases, writtenNames);
1712 out << ", numMod=" << TextTools::toString(inModel.getSubModelNumber());
1713 comma = true;
1714 }
1715 catch (bad_cast&)
1716 {}
1717
1718 // Is it a POMO model?
1719 const POMO* pomo = dynamic_cast<const POMO*>(&model);
1720 if (pomo)
1721 {
1722 out << "model=";
1723 write(pomo->mutationModel(), out, globalAliases, writtenNames);
1724
1725 if (pomo->hasFitness())
1726 {
1727 out << ", fitness=";
1728
1730 bIOFreq.writeFrequencySet(pomo->fitness(), out, globalAliases, writtenNames);
1731 }
1732 comma = true;
1733 }
1734
1735 // Is it a model with FrequencySet?
1736
1737 try
1738 {
1739 auto& pfs = model.frequencySet();
1740
1741 StdStr freqdesc;
1743 bIOFreq.writeFrequencySet(pfs, freqdesc, globalAliases, writtenNames);
1744 string fd = freqdesc.str();
1745 if (fd.size() != 0)
1746 {
1747 if (comma)
1748 out << ", ";
1749 out << "frequencies=" + fd;
1750 }
1751 comma = true;
1752 }
1753 catch (exception&)
1754 {}
1755
1756 // Is it a codon model with Protein Model or partition in it?
1757 try
1758 {
1759 auto& casm = dynamic_cast<const CodonAdHocSubstitutionModel&>(model);
1760 for (size_t i = 0; i < casm.getNumberOfModels(); ++i)
1761 {
1762 try
1763 {
1764 auto& acr = dynamic_cast<const AbstractCodonAARateSubstitutionModel&>(casm.layerModel(i));
1765 if (comma)
1766 out << ", ";
1767 out << "protmodel=";
1768
1769 write(acr.aaModel(), out, globalAliases, writtenNames);
1770 comma = true;
1771 }
1772 catch (bad_cast&)
1773 {}
1774
1775 try
1776 {
1777 auto& acf = dynamic_cast<const AbstractCodonAAFitnessSubstitutionModel&>(casm.layerModel(i));
1778 if (comma)
1779 out << ", ";
1780 out << "fitness=";
1781
1783 bIOFreq.writeFrequencySet(acf.aaFitness(), out, globalAliases, writtenNames);
1784 comma = true;
1785 }
1786 catch (bad_cast&)
1787 {}
1788
1789 try
1790 {
1791 auto& acc = dynamic_cast<const AbstractCodonClusterAASubstitutionModel&>(casm.layerModel(i));
1792 if (comma)
1793 out << ", ";
1794 out << "partition=(";
1795 const vector<uint>& vass = acc.getAssign();
1796
1797 for (size_t j = 0; j < vass.size(); ++j)
1798 {
1799 if (j != 0)
1800 out << ", ";
1801 out << vass[j];
1802 }
1803 out << ")";
1804 comma = true;
1805 }
1806 catch (bad_cast&)
1807 {}
1808 }
1809 }
1810 catch (bad_cast&)
1811 {}
1812
1813 // Specific case of SENCA
1814
1815 try
1816 {
1817 auto& pCF = dynamic_cast<const SENCA&>(model);
1818 if (comma)
1819 out << ", ";
1820 out << "fitness=";
1821
1823 bIOFreq.writeFrequencySet(pCF.fitness(), out, globalAliases, writtenNames);
1824 comma = true;
1825 }
1826 catch (bad_cast&)
1827 {}
1828
1829 // and bibliomixed models
1830
1831 try
1832 {
1833 auto& pM7 = dynamic_cast<const YNGP_M7&>(model);
1834 if (comma)
1835 out << ", ";
1836 out << "n=" << pM7.getNumberOfModels();
1837 comma = true;
1838 }
1839 catch (bad_cast&)
1840 {}
1841
1842 try
1843 {
1844 auto& pM8 = dynamic_cast<const YNGP_M8&>(model);
1845 if (comma)
1846 out << ", ";
1847 out << "n=" << pM8.getNumberOfModels() - 1;
1848 comma = true;
1849 }
1850 catch (bad_cast&)
1851 {}
1852
1853 try
1854 {
1855 auto& pM9 = dynamic_cast<const YNGP_M9&>(model);
1856 if (comma)
1857 out << ", ";
1858 out << "nbeta=" << pM9.getNBeta() << ", ngamma=" << pM9.getNGamma();
1859
1860 comma = true;
1861 }
1862 catch (bad_cast&)
1863 {}
1864
1865 try
1866 {
1867 auto& pM10 = dynamic_cast<const YNGP_M10&>(model);
1868 if (comma)
1869 out << ", ";
1870 out << "nbeta=" << pM10.getNBeta() << ", ngamma=" << pM10.getNGamma();
1871
1872 comma = true;
1873 }
1874 catch (bad_cast&)
1875 {}
1876
1877 try
1878 {
1879 auto& pLGL = dynamic_cast<const LGL08_CAT&>(model);
1880 if (comma)
1881 out << ", ";
1882 out << "nbCat=" << pLGL.getNumberOfCategories();
1883
1884 comma = true;
1885 }
1886 catch (bad_cast&)
1887 {}
1888
1889 try
1890 {
1891 auto& pDFP = dynamic_cast<const DFP07&>(model);
1892 if (comma)
1893 out << ", ";
1894
1895 out << "protmodel=";
1896 write(pDFP.proteinModel(), out, globalAliases, writtenNames);
1897
1898 comma = true;
1899 }
1900 catch (bad_cast&)
1901 {}
1902
1903 try
1904 {
1905 auto& pSameAA = dynamic_cast<const CodonSameAARateSubstitutionModel&>(model);
1906 if (comma)
1907 out << ", ";
1908
1909 out << "codonmodel=";
1910 write(pSameAA.codonModel(), out, globalAliases, writtenNames);
1911
1912 out << ", protmodel=";
1913 write(pSameAA.proteinModel(), out, globalAliases, writtenNames);
1914
1915 comma = true;
1916 }
1917 catch (bad_cast&)
1918 {}
1919
1920
1921 // case of Biblio models, update writtenNames
1922
1923 try
1924 {
1925 auto& absm = dynamic_cast<const AbstractBiblioTransitionModel&>(model);
1926 size_t wNs = writtenNames.size();
1927
1928 for (size_t i = 0; i < wNs; ++i)
1929 {
1930 try
1931 {
1932 writtenNames.push_back(absm.getNamespace() + absm.getParNameFromPmodel(writtenNames[i]));
1933 }
1934 catch (Exception&)
1935 {}
1936 }
1937 }
1938 catch (bad_cast&)
1939 {}
1940
1942 bIO.write(model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
1943 out << ")";
1944}
1945
1946
1948 OutputStream& out,
1949 std::map<std::string, std::string>& globalAliases,
1950 std::vector<std::string>& writtenNames) const
1951{
1952 try
1953 {
1954 auto& pMS = dynamic_cast<const MixtureOfTransitionModels&>(model);
1955
1956 out << "Mixture(";
1957 for (unsigned int i = 0; i < pMS.getNumberOfModels(); ++i)
1958 {
1959 if (i != 0)
1960 out << ", ";
1961 out << "model" + TextTools::toString(i + 1) + "=";
1962 write(pMS.nModel(i), out, globalAliases, writtenNames);
1963 }
1964 }
1965 catch (bad_cast&)
1966 {
1967 try
1968 {
1969 auto& pMS = dynamic_cast<const MixtureOfATransitionModel&>(model);
1970 out << "MixedModel(model=";
1971 auto& eM = pMS.model(0);
1972
1973 ParameterList pl = eM.getIndependentParameters();
1974 vector<string> vpl = pl.getParameterNames();
1975
1976 for (auto& pn : vpl)
1977 {
1978 if (find(writtenNames.begin(), writtenNames.end(), pn) == writtenNames.end())
1979 {
1980 if (pMS.hasDistribution(pn))
1981 {
1982 auto& pDD = pMS.distribution(pn);
1983 if (!dynamic_cast<const ConstantDistribution*>(&pDD))
1984 {
1986 StdStr sout;
1987 bIO.writeDiscreteDistribution(pDD, sout, globalAliases, writtenNames);
1988 globalAliases[pn] = sout.str();
1989 }
1990 }
1991 }
1992 }
1993
1994 write(eM, out, globalAliases, writtenNames);
1995
1996 if (pMS.from() != -1)
1997 out << ", from=" << model.getAlphabet()->intToChar(pMS.from()) << ", to=" << model.getAlphabet()->intToChar(pMS.to());
1998 }
1999 catch (bad_cast&)
2000 {}
2001 }
2002
2003 const BppOParametrizableFormat bIO;
2004 bIO.write(model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, true);
2005 out << ")";
2006}
2007
2009 BranchModelInterface& model,
2010 std::shared_ptr<const AlignmentDataInterface> data)
2011{
2012 string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedArguments_, "", "", true, warningLevel_);
2013 if (verbose_)
2014 ApplicationTools::displayResult("External model frequencies init", (initFreqs == "") ? "None" : initFreqs);
2015
2016 if (initFreqs != "")
2017 {
2018 try
2019 {
2020 auto& tmodel = dynamic_cast<TransitionModelInterface&>(model);
2021 if (initFreqs == "observed")
2022 {
2023 if (!data)
2024 throw Exception("BppOSubstitutionModelFormat::initialize_(). Missing data for observed frequencies");
2025 unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedArguments_, 0, "", true, warningLevel_);
2026 tmodel.setFreqFromData(*data, psi);
2027 }
2028 else if (initFreqs.substr(0, 6) == "values")
2029 {
2030 // Initialization using the "values" argument
2031 map<int, double> frequencies;
2032
2033 string rf = initFreqs.substr(6);
2034 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
2035 int i = 0;
2036 while (strtok.hasMoreToken())
2037 frequencies[i++] = TextTools::toDouble(strtok.nextToken());
2038 tmodel.setFreq(frequencies);
2039 }
2040 else
2041 throw Exception("Unknown initFreqs argument");
2042 }
2043 catch (bad_cast&)
2044 {
2045 ApplicationTools::displayMessage("Frequencies initialization not possible for model " + model.getName());
2046 }
2047 unparsedArguments_.erase(unparsedArguments_.find(model.getNamespace() + "initFreqs"));
2048 if (unparsedArguments_.find(model.getNamespace() + "initFreqs.observedPseudoCount") != unparsedArguments_.end())
2049 unparsedArguments_.erase(unparsedArguments_.find(model.getNamespace() + "initFreqs.observedPseudoCount"));
2050 }
2051
2053 for (size_t i = 0; i < pl.size(); ++i)
2054 {
2055 AutoParameter ap(pl[i]);
2057 pl.setParameter(i, ap);
2058 }
2059
2060 size_t posp;
2061 for (size_t i = 0; i < pl.size(); i++)
2062 {
2063 const string pName = pl[i].getName();
2064
2065 posp = pName.rfind(".");
2066 bool test1 = (initFreqs == "");
2067 bool test2 = (pName.size() < posp + 6) || (pName.substr(posp + 1, 5) != "theta");
2068 bool test3 = (unparsedArguments_.find(pName) != unparsedArguments_.end());
2069 try
2070 {
2071 if (test1 || test2 || test3)
2072 {
2073 if (!test1 && !test2 && test3)
2074 ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
2075
2076 double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
2077 pl[i].setValue(value);
2078
2079 if (unparsedArguments_.find(pName) != unparsedArguments_.end())
2080 unparsedArguments_.erase(unparsedArguments_.find(pName));
2081 }
2082 if (verbose_)
2083 ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
2084 }
2085
2086 catch (Exception& e)
2087 {}
2088 }
2089
2090 model.matchParametersValues(pl);
2091}
Abstract class for mixture models based on the bibliography.
Partial implementation of the SubstitutionModel interface for models that are set for matching the bi...
Abstract class for modelling of ratios of substitution rates between codons, whatever they are synony...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
std::string getNamespace() const override
Abstract Basal class for words of substitution models.
const SubstitutionModelInterface & nModel(size_t i) const
returns the ith model, or throw an exception if i is not a valid number.
const BranchModelInterface & model() const override
static bool isBinaryAlphabet(const Alphabet &alphabet)
static std::shared_ptr< const ProteicAlphabet > PROTEIN_ALPHABET
static bool isRNYAlphabet(const Alphabet &alphabet)
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isIntegerAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::shared_ptr< OutputStream > warning
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
const std::map< std::string, std::string > & getUnparsedArguments() const
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
void write(const Parametrizable &parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const override
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
Substitution model I/O in BppO format.
void writeMixed_(const MixedTransitionModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
std::unique_ptr< SubstitutionModelInterface > readWord_(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData)
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< SubstitutionModelInterface > readSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a substitution model from a string.
void updateParameters_(BranchModelInterface &model, std::map< std::string, std::string > &args)
Finish parsing of parameters, taking care of aliases.
std::shared_ptr< const GeneticCode > geneticCode_
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
std::map< std::string, std::string > unparsedArguments_
const std::map< std::string, std::string > & getUnparsedArguments() const override
void initialize_(BranchModelInterface &model, std::shared_ptr< const AlignmentDataInterface > data)
Set parameter initial values of a given model according to options.
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true)
Interface for all Branch models.
virtual const FrequencySetInterface & frequencySet() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual std::string getName() const =0
Get the name of the model.
virtual void addRateParameter()=0
size_t getNbrOfAxes() const
Definition: CoalaCore.h:49
The Coala branch-heterogeneous amino-acid substitution model.
Definition: Coala.h:57
std::string getExch() const
Definition: Coala.h:77
Class for substitution models of codons with several layers of codon models.
Parametrize a set of state frequencies for codons.
Interface for reversible codon models.
Class for modelling of non-synonymous rates in codon models, such that the substitution rates between...
The D1Walk substitution model for Integer alphabet [0;N-1]. In this model, substitutions are possible...
Class for non-synonymous substitution models on codons with parameterized equilibrium frequencies and...
Definition: DFP07.h:51
The EquiprobableSubstitutionModel substitution model for any kind of alphabet. Jukes-Cantor models ar...
Galtier's 2001 covarion model.
Definition: G2001.h:30
const DiscreteDistributionInterface & rateDistribution() const
Definition: G2001.h:98
SubModel taken from a MixedTransitionModel, kept in the context of the MixedTransitionModel (see From...
const MixedTransitionModelInterface & mixedModel() const
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
The Le et al (2008) CAT substitution model for proteins.
Definition: LGL08_CAT.h:40
Partial implementation of the Markov-modulated class of substitution models.
const ReversibleSubstitutionModelInterface & nestedModel() const
Interface for Transition models, defined as a mixture of "simple" transition models.
Transition models defined as a mixture of nested substitution models.
Transition models defined as a mixture of several substitution models.
A list of models, for building a WordSubstitutionModel.
Specialisation interface for rversible nucleotide substitution model.
Specialisation interface for nucleotide substitution model.
From a model, compute transition probabilities given there is at least a change of a category (ie a n...
const std::vector< size_t > & getRegisterNumbers() const
From a model, compute transition probabilities given there is at least a change in the branch.
Definition: POMO.h:87
bool hasFitness() const
Definition: POMO.h:151
const SubstitutionModelInterface & mutationModel() const
Definition: POMO.h:161
const FrequencySetInterface & fitness() const
Definition: POMO.h:156
virtual const ParameterList & getIndependentParameters() const=0
virtual void aliasParameters(const std::string &p1, const std::string &p2)=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void setParameter(size_t index, const Parameter &param)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual std::string getNamespace() const=0
virtual const ParameterList & getParameters() const=0
static std::unique_ptr< SubstitutionRegisterInterface > getSubstitutionRegister(const std::string &regTypeDesc, std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, std::shared_ptr< AlphabetIndex2 > &weights, std::shared_ptr< AlphabetIndex2 > &distances, bool verbose=true)
Get a Register instance.
Parametrize a set of state frequencies for proteins.
Specialized interface for protein reversible substitution model.
Specialized interface for protein substitution model.
The Rivas-Eddy substitution model with gap characters.
Definition: RE08.h:66
const ReversibleSubstitutionModelInterface & nestedModel() const
Definition: RE08.h:164
From a model, substitution rates are set into categories following a given register....
Interface for reversible substitution models.
Class for non-synonymous and synonymous substitution models on codons with parameterized equilibrium ...
Definition: SENCA.h:52
static std::unique_ptr< AlphabetIndex2 > getAlphabetIndex2(std::shared_ptr< const Alphabet > alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
std::string str() const
const std::string & nextToken()
bool hasMoreToken() const
Interface for all transition models.
Build an empirical protein substitution model from a file.
static std::string paste(const std::vector< T > &v, const std::string &delim=" ")
The Yang et al (2000) M10 substitution model for codons.
Definition: YNGP_M10.h:37
The Yang et al (2000) M7 substitution model for codons.
Definition: YNGP_M7.h:33
The Yang et al (2000) M8 substitution model for codons.
Definition: YNGP_M8.h:40
The Yang et al (2000) M9 substitution model for codons.
Definition: YNGP_M9.h:36
YpR model.
Definition: YpR.h:88
const NucleotideSubstitutionModelInterface & nestedModel() const
Definition: YpR.h:133
gBGC model.
Definition: gBGC.h:47
const SubstitutionModelInterface & nestedModel() const
Definition: gBGC.h:84
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool hasSubstring(const std::string &s, const std::string &pattern)
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.