15#include "../App/PhylogeneticsApplicationTools.h"
16#include "../Model/AbstractBiblioMixedTransitionModel.h"
17#include "../Model/BinarySubstitutionModel.h"
18#include "../Model/D1WalkSubstitutionModel.h"
19#include "../Model/Codon/AbstractCodonAAFitnessSubstitutionModel.h"
20#include "../Model/Codon/AbstractCodonAARateSubstitutionModel.h"
21#include "../Model/Codon/AbstractCodonBGCSubstitutionModel.h"
22#include "../Model/Codon/AbstractCodonClusterAASubstitutionModel.h"
23#include "../Model/Codon/AbstractCodonCpGSubstitutionModel.h"
24#include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
25#include "../Model/Codon/AbstractCodonDistanceSubstitutionModel.h"
26#include "../Model/Codon/AbstractCodonFitnessSubstitutionModel.h"
27#include "../Model/Codon/AbstractCodonFrequenciesSubstitutionModel.h"
28#include "../Model/Codon/AbstractCodonPhaseFrequenciesSubstitutionModel.h"
29#include "../Model/Codon/CodonAdHocSubstitutionModel.h"
30#include "../Model/Codon/CodonSameAARateSubstitutionModel.h"
31#include "../Model/Codon/DFP07.h"
32#include "../Model/Codon/GY94.h"
33#include "../Model/Codon/KCM.h"
34#include "../Model/Codon/KroneckerCodonDistanceFrequenciesSubstitutionModel.h"
35#include "../Model/Codon/KroneckerCodonDistanceSubstitutionModel.h"
36#include "../Model/Codon/MG94.h"
37#include "../Model/Codon/SENCA.h"
38#include "../Model/Codon/TripletSubstitutionModel.h"
39#include "../Model/Codon/YN98.h"
40#include "../Model/Codon/YNGP_M10.h"
41#include "../Model/Codon/YNGP_M7.h"
42#include "../Model/Codon/YNGP_M8.h"
43#include "../Model/Codon/YNGP_M9.h"
44#include "../Model/EquiprobableSubstitutionModel.h"
45#include "../Model/FromMixtureSubstitutionModel.h"
46#include "../Model/G2001.h"
47#include "../Model/InMixedSubstitutionModel.h"
48#include "../Model/KroneckerWordSubstitutionModel.h"
49#include "../Model/MixtureOfATransitionModel.h"
50#include "../Model/MixtureOfTransitionModels.h"
51#include "../Model/Nucleotide/F81.h"
52#include "../Model/Nucleotide/F84.h"
53#include "../Model/Nucleotide/GTR.h"
54#include "../Model/Nucleotide/HKY85.h"
55#include "../Model/Nucleotide/JCnuc.h"
56#include "../Model/Nucleotide/K80.h"
57#include "../Model/Nucleotide/L95.h"
58#include "../Model/Nucleotide/NucleotideSubstitutionModel.h"
59#include "../Model/Nucleotide/RN95.h"
60#include "../Model/Nucleotide/RN95s.h"
61#include "../Model/Nucleotide/SSR.h"
62#include "../Model/Nucleotide/T92.h"
63#include "../Model/Nucleotide/TN93.h"
64#include "../Model/Nucleotide/YpR.h"
65#include "../Model/Nucleotide/gBGC.h"
66#include "../Model/OneChangeRegisterTransitionModel.h"
67#include "../Model/OneChangeTransitionModel.h"
68#include "../Model/POMO.h"
69#include "../Model/Protein/Coala.h"
70#include "../Model/Protein/CoalaCore.h"
71#include "../Model/Protein/DSO78.h"
72#include "../Model/Protein/JCprot.h"
73#include "../Model/Protein/JTT92.h"
74#include "../Model/Protein/LG08.h"
75#include "../Model/Protein/LGL08_CAT.h"
76#include "../Model/Protein/ProteinSubstitutionModel.h"
77#include "../Model/Protein/UserProteinSubstitutionModel.h"
78#include "../Model/Protein/WAG01.h"
79#include "../Model/RE08.h"
80#include "../Model/RegisterRatesSubstitutionModel.h"
81#include "../Model/TS98.h"
82#include "../Model/TwoParameterBinarySubstitutionModel.h"
83#include "../Model/WordSubstitutionModel.h"
107unsigned char BppOSubstitutionModelFormat::DNA = 1;
108unsigned char BppOSubstitutionModelFormat::RNA = 2;
109unsigned char BppOSubstitutionModelFormat::NUCLEOTIDE = 1 | 2;
110unsigned char BppOSubstitutionModelFormat::PROTEIN = 4;
111unsigned char BppOSubstitutionModelFormat::CODON = 8;
112unsigned char BppOSubstitutionModelFormat::WORD = 16;
113unsigned char BppOSubstitutionModelFormat::BINARY = 32;
114unsigned char BppOSubstitutionModelFormat::INTEGER = 64;
115unsigned char BppOSubstitutionModelFormat::ALL = 1 | 2 | 4 | 8 | 16 | 32 | 64;
118unique_ptr<SubstitutionModelInterface> BppOSubstitutionModelFormat::readSubstitutionModel(
119 shared_ptr<const Alphabet> alphabet,
120 const string& modelDescription,
121 const std::map<
size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
126 unique_ptr<SubstitutionModelInterface> model;
127 string modelName =
"";
128 map<string, string> args;
136 if (modelName ==
"InMixed")
138 if (args.find(
"model") == args.end())
139 throw Exception(
"'model' argument missing to define the InMixed model.");
144 if (args.find(
"numMod") == args.end())
146 if (args.find(
"nameMod") == args.end())
147 throw Exception(
"'numMod' and 'nameMod' arguments missing to define the InMixed submodel.");
149 nameMod = args[
"nameMod"];
154 string modelDesc2 = args[
"model"];
158 unique_ptr<TransitionModelInterface> nestedModelTmp = nestedReader.
readTransitionModel(alphabet, modelDesc2, mData, nData,
false);
165 throw Exception(
"Unknown mixed model " + modelDesc2 +
".");
171 nestedModel->model(nameMod);
175 throw Exception(
"BppOSubstitutionModelFormat::read. " + nestedModel->getName() +
"argument for model 'InMixed' has no submodel with name " + nameMod +
".");
177 model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), nameMod, modelDesc2);
181 if (numMod == 0 || (nestedModel->getNumberOfModels() < numMod))
182 throw Exception(
"BppOSubstitutionModelFormat::read. " + nestedModel->getName() +
"argument for model 'InMixed' has no submodel with number " +
TextTools::toString(numMod) +
".");
183 model = make_unique<InMixedSubstitutionModel>(std::move(nestedModel), numMod - 1, modelDesc2);
187 for (
auto& it :unparsedParameterValuesNested)
197 else if (modelName ==
"FromRegister")
200 if (args.find(
"model") == args.end())
201 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'FromRegister'.");
203 string nestedModelDescription = args[
"model"];
208 auto nestedModel = nestedReader.
readSubstitutionModel(alphabet, nestedModelDescription, mData, nData,
false);
212 if (args.find(
"register") == args.end())
213 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
215 string registerDescription = args[
"register"];
216 shared_ptr<AlphabetIndex2> weights =
nullptr;
217 shared_ptr<AlphabetIndex2> distances =
nullptr;
224 if (args.find(
"isNormalized") != args.end() && args[
"isNormalized"] ==
"true")
227 model = make_unique<RegisterRatesSubstitutionModel>(std::move(nestedModel), *reg, isNorm);
230 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'register' for model 'FromRegister'.");
233 for (
auto& it : unparsedParameterValuesNested)
239 else if (modelName ==
"POMO")
241 auto allelic = dynamic_pointer_cast<const AllelicAlphabet>(alphabet);
243 throw Exception(
"BppOSubstitutionModelFormat;;read. POMO model with no allelic alphabet.");
246 if (args.find(
"model") == args.end())
247 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'POMO'.");
249 map<string, string> unparsedParameterValuesNested;
251 unique_ptr<FrequencySetInterface> nestedFreq(
nullptr);
253 if (args.find(
"fitness") != args.end())
255 string nestedFreqDescription = args[
"fitness"];
258 nestedFreq = nestedFreqReader.
readFrequencySet(allelic->getStateAlphabet(), nestedFreqDescription, mData, nData,
false);
261 for (
auto& it : unparsedParameterValuesNested)
263 unparsedParameterValuesNested[
"fit_" + it.first] = it.second;
269 string nestedModelDescription = args[
"model"];
273 auto nestedModel = nestedReader.
readSubstitutionModel(allelic->getStateAlphabet(), nestedModelDescription, mData, nData,
false);
278 model = make_unique<POMO>(allelic, std::move(nestedModel), std::move(nestedFreq));
281 for (
auto& it : unparsedParameterValuesNested)
292 else if ((modelName ==
"Word") || (modelName.substr(0, 4) ==
"Kron") || (modelName ==
"Triplet") || (modelName.substr(0, 5) ==
"Codon") || (modelName ==
"SENCA") )
293 model =
readWord_(alphabet, modelDescription, mData, nData);
299 else if (((modelName ==
"MG94") || (modelName ==
"YN98") || (modelName ==
"YNGP_M0") ||
300 (modelName ==
"GY94") || (modelName.substr(0, 3) ==
"KCM") || (modelName ==
"SameAARate"))
304 throw Exception(
"BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
306 throw Exception(
"BppOSubstitutionModelFormat::readSubstitionModel(). No genetic code specified! Consider using 'setGeneticCode'.");
309 throw Exception(
"Alphabet should be Codon Alphabet.");
311 auto pCA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
313 if (args.find(
"genetic_code") != args.end())
315 ApplicationTools::displayWarning(
"'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
316 throw Exception(
"BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
319 if (
geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
320 throw Exception(
"Mismatch between genetic code and codon alphabet");
322 unique_ptr<CodonFrequencySetInterface> codonFreqs(
nullptr);
324 if (args.find(
"frequencies") != args.end())
339 if (modelName !=
"SameAARate")
340 throw Exception(
"Missing 'frequencies' for model " + modelName);
342 if (modelName ==
"MG94")
343 model = make_unique<MG94>(
geneticCode_, std::move(codonFreqs));
344 else if (modelName ==
"GY94")
345 model = make_unique<GY94>(
geneticCode_, std::move(codonFreqs));
346 else if ((modelName ==
"YN98") || (modelName ==
"YNGP_M0"))
347 model = make_unique<YN98>(
geneticCode_, std::move(codonFreqs));
348 else if (modelName ==
"KCM7")
350 else if (modelName ==
"KCM19")
352 else if (modelName ==
"SameAARate")
354 if (args.find(
"protmodel") == args.end())
355 throw Exception(
"Missing 'protmodel in model " + modelName +
".");
362 unparsedArguments_.insert(unparsedParameterValuesNested.begin(), unparsedParameterValuesNested.end());
364 if (args.find(
"codonmodel") == args.end())
365 throw Exception(
"Missing 'codonmodel in model " + modelName +
".");
370 auto tmpSubstitutionModel = nestedCodonReader.
readSubstitutionModel(alphabet, args[
"codonmodel"], mData, nData,
false);
374 for (
const auto& it:unparsedParameterValuesNested)
379 model = make_unique<CodonSameAARateSubstitutionModel>(std::move(nestedProtModel), std::move(nestedCodonModel), std::move(codonFreqs),
geneticCode_);
382 throw Exception(
"Unknown Codon model: " + modelName);
390 else if (modelName ==
"YpR_Sym")
393 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
395 throw Exception(
"Mismatch alphabet: " + alphabet->getAlphabetType() +
" for model: " + modelName);
396 auto prny = dynamic_pointer_cast<const RNY>(alphabet);
398 string nestedModelDescription = args[
"model"];
400 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_sym'.");
404 auto tmpModel = nestedReader.
readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData,
false);
407 for (
auto& it : unparsedParameterValuesNested)
412 model = make_unique<YpR_Sym>(prny, std::move(nestedModel));
414 else if (modelName ==
"YpR_Gen")
417 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
419 throw Exception(
"Mismatch alphabet: " + alphabet->getAlphabetType() +
" for model: " + modelName);
420 auto prny = dynamic_pointer_cast<const RNY>(alphabet);
422 string nestedModelDescription = args[
"model"];
424 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_gen'.");
428 auto tmpModel = nestedReader.
readSubstitutionModel(prny->getLetterAlphabet(), nestedModelDescription, mData, nData,
false);
432 for (
auto& it : unparsedParameterValuesNested)
437 model = make_unique<YpR_Gen>(prny, std::move(nestedModel));
445 else if (modelName ==
"RE08")
448 throw Exception(
"BppOSubstitutionModelFormat::read. No Gap model allowed here.");
451 string nestedModelDescription = args[
"model"];
453 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'RE08'.");
459 auto tmpModel = nestedReader.
readSubstitutionModel(alphabet, nestedModelDescription, mData, nData,
false);
467 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleic alphabet not supported.");
469 model = make_unique<RE08Nucleotide>(std::move(nestedModel));
471 throw Exception(
"BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'nucleotide'.");
476 throw Exception(
"BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
478 model = make_unique<RE08Protein>(std::move(nestedModel));
480 throw Exception(
"BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'protein'.");
485 throw Exception(
"BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
487 model = make_unique<RE08Codon>(std::move(nestedModel));
489 throw Exception(
"BppOSubstitutionModelFormat::readSubstitionModel(). Invalid submodel, must be 'reversible' and 'codon'.");
494 model = make_unique<RE08>(std::move(nestedModel));
498 for (
auto& it : unparsedParameterValuesNested)
508 else if (modelName ==
"TS98")
511 throw Exception(
"BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
514 string nestedModelDescription = args[
"model"];
516 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'TS98'.");
522 auto tmpModel = nestedReader.
readSubstitutionModel(alphabet, nestedModelDescription, mData, nData,
false);
527 model = make_unique<TS98>(std::move(nestedModel));
530 for (
auto& it : unparsedParameterValuesNested)
540 else if (modelName ==
"G01")
543 throw Exception(
"BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
546 string nestedModelDescription = args[
"model"];
548 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'G01'.");
549 string nestedRateDistDescription = args[
"rdist"];
551 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'rdist' for model 'G01'.");
557 auto tmpModel = nestedReader.
readSubstitutionModel(alphabet, nestedModelDescription, mData, nData,
false);
559 if (!nestedModel.get())
560 throw Exception(
"G01 model restricted to Reversible models. Ask developpers to fix it.");
569 model = make_unique<G2001>(std::move(nestedModel), std::move(nestedRDist));
572 for (
auto& it : unparsedParameterValuesNestedModel)
576 for (
auto& it : unparsedParameterValuesNestedDist)
596 if (modelName ==
"Equi")
603 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
604 auto alpha = dynamic_pointer_cast<const NucleicAlphabet>(alphabet);
609 if (modelName.find(
"+gBGC") != string::npos)
611 string subModName = modelName.substr(0, modelName.find(
"+gBGC"));
616 string nestedModelDescription = subModName;
618 if (modelDescription.find_first_of(
"(") != string::npos)
620 string::size_type begin = modelDescription.find_first_of(
"(");
621 string::size_type end = modelDescription.find_last_of(
")");
623 nestedModelDescription += modelDescription.substr(begin, end - begin + 1);
627 auto tmpModel = nestedReader.
readSubstitutionModel(alphabet, nestedModelDescription, mData, nData,
false);
632 model = make_unique<gBGC>(alpha, std::move(nestedModel));
635 for (
auto& it : unparsedParameterValuesNested)
646 else if (modelName ==
"GTR")
648 model = make_unique<GTR>(alpha);
656 else if (modelName ==
"SSR")
658 model = make_unique<SSR>(alpha);
665 else if (modelName ==
"L95")
667 model = make_unique<L95>(alpha);
674 else if (modelName ==
"RN95")
676 model = make_unique<RN95>(alpha);
683 else if (modelName ==
"RN95s")
685 model = make_unique<RN95s>(alpha);
692 else if (modelName ==
"TN93")
694 model = make_unique<TN93>(alpha);
701 else if (modelName ==
"HKY85")
703 model = make_unique<HKY85>(alpha);
710 else if (modelName ==
"F81")
712 model = make_unique<F81>(alpha);
718 else if (modelName ==
"F84")
720 model = make_unique<F84>(alpha);
727 else if (modelName ==
"T92")
729 model = make_unique<T92>(alpha);
736 else if (modelName ==
"K80")
738 model = make_unique<K80>(alpha);
746 else if (modelName ==
"JC69")
748 model = make_unique<JCnuc>(alpha);
751 throw Exception(
"Model '" + modelName +
"' unknown, or does not fit nucleic alphabet.");
759 throw Exception(
"BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
760 auto alpha = dynamic_pointer_cast<const ProteicAlphabet>(alphabet);
762 if (modelName.find(
"+F") != string::npos)
766 auto tmpFreq = freqReader.
readFrequencySet(alpha, freqOpt, mData, nData,
true);
771 for (
auto& it : unparsedParameterValuesNested)
776 if (modelName ==
"JC69+F")
777 model = make_unique<JCprot>(alpha, std::move(protFreq));
778 else if (modelName ==
"DSO78+F")
779 model = make_unique<DSO78>(alpha, std::move(protFreq));
780 else if (modelName ==
"JTT92+F")
781 model = make_unique<JTT92>(alpha, std::move(protFreq));
782 else if (modelName ==
"LG08+F")
783 model = make_unique<LG08>(alpha, std::move(protFreq));
784 else if (modelName ==
"WAG01+F")
785 model = make_unique<WAG01>(alpha, std::move(protFreq));
786 else if (modelName ==
"Empirical+F")
788 string prefix = args[
"name"];
790 throw Exception(
"'name' argument missing for user-defined substitution model.");
791 string fname = args[
"file"];
793 throw Exception(
"'file' argument missing for user-defined substitution model.");
794 model = make_unique<UserProteinSubstitutionModel>(alpha, args[
"file"], std::move(protFreq), prefix +
"+F.");
797 else if (modelName ==
"JC69")
798 model = make_unique<JCprot>(alpha);
799 else if (modelName ==
"DSO78")
800 model = make_unique<DSO78>(alpha);
801 else if (modelName ==
"JTT92")
802 model = make_unique<JTT92>(alpha);
803 else if (modelName ==
"LG08")
804 model = make_unique<LG08>(alpha);
805 else if (modelName ==
"WAG01")
806 model = make_unique<WAG01>(alpha);
808 else if (modelName.substr(0, 9) ==
"LGL08_CAT")
810 string subModelName = modelName.substr(10);
812 size_t posp = modelDescription.find(
"(");
814 string modelDesc2 = modelName.substr(0, 9) + modelDescription.substr(posp);
818 auto tmpModel = nestedReader.
readTransitionModel(alphabet, modelDesc2, mData, nData,
false);
823 throw Exception(
"Unknown model " + modelName +
".");
827 nestedModel->model(subModelName);
831 throw Exception(
"BppOSubstitutionModelFormat::read. " + nestedModel->getName() +
"argument for model 'FromMixture' has no submodel with name " + subModelName +
".");
835 model = make_unique<FromMixtureSubstitutionModel>(*nestedModel, subModelName, modelDesc2);
837 else if (modelName ==
"Empirical")
839 string prefix = args[
"name"];
841 throw Exception(
"'name' argument missing for user-defined substitution model.");
842 model = make_unique<UserProteinSubstitutionModel>(alpha, args[
"file"], prefix);
844 else if (modelName ==
"Coala")
846 string exchangeability = args[
"exch"];
848 throw Exception(
"BppOSubstitutionModelFormat::read. missing argument 'exch' for model 'Coala'.");
849 string prefix = args[
"name"];
851 throw Exception(
"'name' argument missing to specify the exchangeabilities of the user-defined empirical model.");
855 string nbrOfParametersPerBranch = args[
"nbrAxes"];
857 throw Exception(
"'nbrAxes' argument missing to define the number of axis of the Correspondence Analysis.");
859 model = make_unique<Coala>(alpha, *nestedModel, TextTools::to<unsigned int>(nbrOfParametersPerBranch));
861 model->setFreqFromData(*mData.at(nData));
864 throw Exception(
"Model '" + modelName +
"' is unknown, or does not fit proteic alphabet.");
869 throw Exception(
"BppOSubstitutionModelFormat::read. Binary alphabet not supported.");
871 auto balpha = dynamic_pointer_cast<const BinaryAlphabet>(alphabet);
873 if (modelName ==
"Binary")
874 model = make_unique<BinarySubstitutionModel>(balpha);
875 else if (modelName ==
"TwoParameterBinary")
876 model = make_unique<TwoParameterBinarySubstitutionModel>(balpha);
878 throw Exception(
"Model '" + modelName +
"' unknown, or does not fit binary alphabet.");
883 throw Exception(
"BppOSubstitutionModelFormat::read. Integer alphabet not supported.");
885 auto balpha = dynamic_pointer_cast<const IntegerAlphabet>(alphabet);
887 if (modelName ==
"D1Walk")
890 throw Exception(
"Model '" + modelName +
"' unknown, or does not fit integer alphabet.");
893 throw Exception(
"Model '" + modelName +
"' unknown, or does not fit " + alphabet->getAlphabetType() +
" alphabet.");
906 if (mData.find(nData) == mData.end())
921 map<std::string, std::string>& args)
928 for (
auto pname : pnames)
931 if (args.find(name) != args.end())
937 string pname, pval, pname2;
938 for (
size_t i = 0; i < pl.
size(); ++i)
942 if (args.find(pname) == args.end())
947 (pval.find(
"(") != string::npos))
951 for (
size_t j = 0; j < pl.
size() && !found; ++j)
973 if (args.find(
"useObservedFreqs") != args.end())
974 throw Exception(
"useObservedFreqs argument is obsolete. Please use 'initFreqs=observed' instead.");
975 if (args.find(
"useObservedFreqs.pseudoCount") != args.end())
976 throw Exception(
"useObservedFreqs.pseudoCount argument is obsolete. Please use 'initFreqs.observedPseudoCount' instead.");
978 if (args.find(
"initFreqs") != args.end())
980 if (pref !=
"Coala.")
982 if (args.find(
"initFreqs.observedPseudoCount") != args.end())
983 unparsedArguments_[pref +
"initFreqs.observedPseudoCount"] = args[
"initFreqs.observedPseudoCount"];
985 if (args.find(
"rate") != args.end())
994 std::shared_ptr<const Alphabet> alphabet,
995 const std::string& modelDescription,
996 const std::map<
size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
999 unique_ptr<SubstitutionModelInterface> model;
1000 string modelName =
"";
1001 map<string, string> args;
1004 vector<string> v_nestedModelDescription;
1005 vector< unique_ptr<SubstitutionModelInterface>> v_pSM;
1006 shared_ptr<const CoreWordAlphabet> pWA;
1008 string s, nestedModelDescription;
1009 unsigned int nbmodels;
1014 + alphabet->getAlphabetType() +
" for model " + modelName +
".");
1016 pWA = dynamic_pointer_cast<const CoreWordAlphabet>(alphabet);
1022 if (args.find(
"model") != args.end())
1024 v_nestedModelDescription.push_back(args[
"model"]);
1025 nbmodels = (modelName ==
"Word" || modelName ==
"Kron") ? pWA->getLength() : 3;
1029 if (args.find(
"model1") == args.end())
1030 throw Exception(
"Missing argument 'model' or 'model1' for model " + modelName +
".");
1039 throw Exception(
"Missing nested models for model " + modelName +
".");
1041 if (pWA->getLength() != nbmodels)
1043 + alphabet->getAlphabetType() +
" for model " + modelName +
".");
1045 if (v_nestedModelDescription.size() != nbmodels)
1048 model = nestedReader.
readSubstitutionModel(pWA->getNAlphabet(0), v_nestedModelDescription[0], mData, nData,
false);
1051 for (
unsigned int i = 0; i < nbmodels; ++i)
1056 for (
auto& it : unparsedParameterValuesNested)
1061 v_pSM.push_back(std::move(model));
1065 for (
unsigned i = 0; i < v_nestedModelDescription.size(); ++i)
1068 model = nestedReader.
readSubstitutionModel(pWA->getNAlphabet(i), v_nestedModelDescription[i], mData, nData,
false);
1070 for (
auto& it : unparsedParameterValuesNested)
1075 v_pSM.push_back(std::move(model));
1084 vector<set<size_t>> vposKron;
1085 if (modelName.substr(0, 4) ==
"Kron")
1087 if (args.find(
"positions") != args.end())
1103 vposKron.push_back(ss);
1112 if (modelName ==
"Word")
1114 if (v_nestedModelDescription.size() != nbmodels)
1116 model = make_unique<WordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1121 model = make_unique<WordSubstitutionModel>(ml);
1129 else if (modelName ==
"Kron")
1131 if (vposKron.size() == 0)
1133 if (v_nestedModelDescription.size() != nbmodels)
1135 model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels);
1140 model = make_unique<KroneckerWordSubstitutionModel>(ml);
1145 if (v_nestedModelDescription.size() != nbmodels)
1147 model = make_unique<KroneckerWordSubstitutionModel>(std::move(v_pSM[0]), nbmodels, vposKron);
1152 model = make_unique<KroneckerWordSubstitutionModel>(ml, vposKron);
1163 auto pCA = dynamic_pointer_cast<const CodonAlphabet>(pWA);
1165 throw Exception(
"Non codon Alphabet for model" + modelName +
".");
1167 unique_ptr<AlphabetIndex2> pai2;
1168 unique_ptr<CodonFrequencySetInterface> pFS;
1171 ((v_nestedModelDescription.size() == 3) &&
1173 throw Exception(
"Non simple NucleotideSubstitutionModel embedded in " + modelName +
" model.");
1175 if (args.find(
"genetic_code") != args.end())
1177 ApplicationTools::displayWarning(
"'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
1178 throw Exception(
"BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
1182 throw Exception(
"BppOSubstitutionModelFormat::readWord_(). No genetic code specified! Consider using 'setGeneticCode'.");
1188 if ((modelName.find(
"Dist") != string::npos) || (modelName ==
"SENCA"))
1194 if (modelName.find(
"Freq") != string::npos)
1196 if (args.find(
"frequencies") == args.end())
1197 throw Exception(
"Missing equilibrium frequencies.");
1201 auto tmp = bIOFreq.
readFrequencySet(pCA, args[
"frequencies"], mData, nData);
1204 throw IOException(
"BppOSubstitutionModelFormat::readWord_(). Incorrect codon frequencies.");
1207 for (
auto& it : unparsedParameterValuesNested)
1216 if (modelName ==
"Triplet")
1217 model = (v_nestedModelDescription.size() != 3)
1218 ? make_unique<TripletSubstitutionModel>(
1220 unique_ptr<NucleotideSubstitutionModelInterface>(
1224 : make_unique<TripletSubstitutionModel>(
1226 unique_ptr<NucleotideSubstitutionModelInterface>(
1229 unique_ptr<NucleotideSubstitutionModelInterface>(
1232 unique_ptr<NucleotideSubstitutionModelInterface>(
1237 else if (modelName.find(
"Codon") != string::npos)
1239 vector< unique_ptr<CoreCodonSubstitutionModelInterface>> vCSM;
1240 string name =
"Codon";
1241 map<string, string> unparsedParameterValuesNested;
1243 if (modelName.find(
"Dist") != string::npos)
1247 vCSM.push_back(make_unique<AbstractCodonDistanceSubstitutionModel>(std::move(pai2),
geneticCode_,
""));
1249 else if (modelName.find(
"BGC") != string::npos)
1253 vCSM.push_back(make_unique<AbstractCodonBGCSubstitutionModel>(
geneticCode_,
""));
1255 else if (modelName.find(
"Prot") != string::npos)
1259 if (args.find(
"protmodel") == args.end())
1260 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'protmodel' for codon model argument 'Prot'.");
1262 nestedModelDescription = args[
"protmodel"];
1265 shared_ptr<SubstitutionModelInterface> tmpModel = nestedReader.
readSubstitutionModel(
geneticCode_->getTargetAlphabet(), nestedModelDescription, mData, nData,
false);
1266 auto nestedModel = dynamic_pointer_cast<ProteinSubstitutionModelInterface>(tmpModel);
1270 vCSM.push_back(make_unique<AbstractCodonAARateSubstitutionModel>(nestedModel,
geneticCode_,
""));
1272 if (modelName.find(
"AAClust") != string::npos)
1277 vector<uint> partition;
1278 if (args.find(
"partition") != args.end())
1280 string rf = args[
"partition"];
1284 partition.push_back(TextTools::to<uint>(strtok.
nextToken()));
1287 unique_ptr<AbstractCodonClusterAASubstitutionModel> aca = partition.size() != 0 ?
1288 make_unique<AbstractCodonClusterAASubstitutionModel>(
geneticCode_,
"", partition) :
1289 make_unique<AbstractCodonClusterAASubstitutionModel>(
geneticCode_,
"");
1291 vCSM.push_back(std::move(aca));
1295 if (vCSM.size() == 0)
1298 if (modelName.find(
"CpG") != string::npos)
1301 vCSM.push_back(make_unique<AbstractCodonCpGSubstitutionModel>(pCA,
""));
1304 if (modelName.find(
"AAFit") != string::npos)
1308 if (args.find(
"fitness") == args.end())
1309 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'AAFit'.");
1311 string nestedFreqDescription = args[
"fitness"];
1318 unparsedParameterValuesNested[
"fit_" + it.first] = it.second;
1321 auto aca = make_unique<AbstractCodonAAFitnessSubstitutionModel>(std::move(nestedFreq),
geneticCode_,
"");
1323 if (args.find(
"Ns") != args.end())
1324 aca->addNsParameter();
1326 vCSM.push_back(std::move(aca));
1328 else if (modelName.find(
"Fit") != string::npos)
1330 if (args.find(
"fitness") == args.end())
1331 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'fitness' for codon model argument 'Fit'.");
1332 string nestedFreqDescription = args[
"fitness"];
1337 auto nestedFreq = nestedReader.
readFrequencySet(alphabet, nestedFreqDescription, mData, nData,
false);
1341 unparsedParameterValuesNested[
"fit_" + it.first] = it.second;
1344 vCSM.push_back(make_unique<AbstractCodonFitnessSubstitutionModel>(std::move(nestedFreq),
geneticCode_,
""));
1347 if (modelName.find(
"PhasFreq") != string::npos)
1350 vCSM.push_back(make_unique<AbstractCodonPhaseFrequenciesSubstitutionModel>(std::move(pFS),
""));
1352 else if (modelName.find(
"Freq") != string::npos)
1355 vCSM.push_back(make_unique<AbstractCodonFrequenciesSubstitutionModel>(std::move(pFS),
""));
1359 for (
auto it : unparsedParameterValuesNested)
1364 model = (v_nestedModelDescription.size() != 3)
1365 ? make_unique<CodonAdHocSubstitutionModel>(
1367 unique_ptr<NucleotideSubstitutionModelInterface>(
1372 : make_unique<CodonAdHocSubstitutionModel>(
1374 unique_ptr<NucleotideSubstitutionModelInterface>(
1377 unique_ptr<NucleotideSubstitutionModelInterface>(
1380 unique_ptr<NucleotideSubstitutionModelInterface>(
1386 else if (modelName ==
"KronDistFreq")
1388 if (v_nestedModelDescription.size() != 3)
1390 if (vposKron.size() == 0)
1391 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
geneticCode_,
1392 unique_ptr<NucleotideSubstitutionModelInterface>(
1394 std::move(pFS), std::move(pai2));
1396 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
geneticCode_,
1397 unique_ptr<NucleotideSubstitutionModelInterface>(
1399 vposKron, std::move(pFS), std::move(pai2));
1403 if (vposKron.size() != 0)
1404 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1406 unique_ptr<NucleotideSubstitutionModelInterface>(
1409 unique_ptr<NucleotideSubstitutionModelInterface>(
1412 unique_ptr<NucleotideSubstitutionModelInterface>(
1418 model = make_unique<KroneckerCodonDistanceFrequenciesSubstitutionModel>(
1420 unique_ptr<NucleotideSubstitutionModelInterface>(
1423 unique_ptr<NucleotideSubstitutionModelInterface>(
1426 unique_ptr<NucleotideSubstitutionModelInterface>(
1434 else if (modelName ==
"KronDist")
1436 if (v_nestedModelDescription.size() != 3)
1438 if (vposKron.size() == 0)
1439 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
geneticCode_,
1443 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
geneticCode_,
1445 vposKron, std::move(pai2));
1449 if (vposKron.size() != 0)
1450 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1452 unique_ptr<NucleotideSubstitutionModelInterface>(
1455 unique_ptr<NucleotideSubstitutionModelInterface>(
1458 unique_ptr<NucleotideSubstitutionModelInterface>(
1463 model = make_unique<KroneckerCodonDistanceSubstitutionModel>(
1465 unique_ptr<NucleotideSubstitutionModelInterface>(
1468 unique_ptr<NucleotideSubstitutionModelInterface>(
1471 unique_ptr<NucleotideSubstitutionModelInterface>(
1479 else if (modelName ==
"SENCA")
1481 if (args.find(
"fitness") == args.end())
1482 throw Exception(
"Missing fitness in model " + modelName +
".");
1487 auto pFit(bIOFreq.
readFrequencySet(pCA, args[
"fitness"], mData, nData,
false));
1490 for (
auto& it : unparsedParameterValuesNested)
1495 if (v_nestedModelDescription.size() != 3)
1498 unique_ptr<NucleotideSubstitutionModelInterface>(
1501 std::move(pFit), std::move(pai2));
1504 model = make_unique<SENCA>(
1506 unique_ptr<NucleotideSubstitutionModelInterface>(
1509 unique_ptr<NucleotideSubstitutionModelInterface>(
1512 unique_ptr<NucleotideSubstitutionModelInterface>(
1525 std::map<std::string, std::string>& globalAliases,
1526 std::vector<std::string>& writtenNames)
const
1539 auto parend = (name.rfind(
")") == name.size() - 1);
1541 out << (parend ? name.substr(0, name.size() - 1) : name +
"(");
1547 out <<
"file=" << userModel.
getPath();
1551 ns = ns.substr(0, ns.size() - 3);
1553 ns = ns.substr(0, ns.size() - 1);
1555 out <<
", name=" << ns;
1570 const G2001& gModel =
dynamic_cast<const G2001&
>(model);
1587 const RE08& reModel =
dynamic_cast<const RE08&
>(model);
1598 const YpR& yprModel =
dynamic_cast<const YpR&
>(model);
1611 write(oneChangeTransitionModel.
model(), out, globalAliases, writtenNames);
1621 write(oneChangeRegisterTransitionModel.
model(), out, globalAliases, writtenNames);
1623 out <<
", register=";
1634 write(registerRatesSubstitutionModel.
model(), out, globalAliases, writtenNames);
1636 out <<
", register=";
1646 const gBGC& gbgcModel =
dynamic_cast<const gBGC&
>(model);
1650 string ss = sout.
str();
1651 auto begin = ss.find_first_of(
"(");
1652 auto end = ss.find_last_of(
")");
1654 out << ss.substr(begin + 1, end - begin - 1);
1670 write(mod0, out, globalAliases, writtenNames);
1678 write(mod0, out, globalAliases, writtenNames);
1683 write(mod0, out, globalAliases, writtenNames);
1684 for (
unsigned int i = 1; i < nmod; ++i)
1687 write(wM.
nModel(i), out, globalAliases, writtenNames);
1699 const Coala& coalaModel =
dynamic_cast<const Coala&
>(model);
1719 const POMO* pomo =
dynamic_cast<const POMO*
>(&model);
1727 out <<
", fitness=";
1744 string fd = freqdesc.
str();
1749 out <<
"frequencies=" + fd;
1760 for (
size_t i = 0; i < casm.getNumberOfModels(); ++i)
1767 out <<
"protmodel=";
1769 write(acr.aaModel(), out, globalAliases, writtenNames);
1794 out <<
"partition=(";
1795 const vector<uint>& vass = acc.getAssign();
1797 for (
size_t j = 0; j < vass.size(); ++j)
1817 auto& pCF =
dynamic_cast<const SENCA&
>(model);
1833 auto& pM7 =
dynamic_cast<const YNGP_M7&
>(model);
1836 out <<
"n=" << pM7.getNumberOfModels();
1844 auto& pM8 =
dynamic_cast<const YNGP_M8&
>(model);
1847 out <<
"n=" << pM8.getNumberOfModels() - 1;
1855 auto& pM9 =
dynamic_cast<const YNGP_M9&
>(model);
1858 out <<
"nbeta=" << pM9.getNBeta() <<
", ngamma=" << pM9.getNGamma();
1867 auto& pM10 =
dynamic_cast<const YNGP_M10&
>(model);
1870 out <<
"nbeta=" << pM10.getNBeta() <<
", ngamma=" << pM10.getNGamma();
1879 auto& pLGL =
dynamic_cast<const LGL08_CAT&
>(model);
1882 out <<
"nbCat=" << pLGL.getNumberOfCategories();
1891 auto& pDFP =
dynamic_cast<const DFP07&
>(model);
1895 out <<
"protmodel=";
1896 write(pDFP.proteinModel(), out, globalAliases, writtenNames);
1909 out <<
"codonmodel=";
1910 write(pSameAA.codonModel(), out, globalAliases, writtenNames);
1912 out <<
", protmodel=";
1913 write(pSameAA.proteinModel(), out, globalAliases, writtenNames);
1926 size_t wNs = writtenNames.size();
1928 for (
size_t i = 0; i < wNs; ++i)
1932 writtenNames.push_back(absm.getNamespace() + absm.getParNameFromPmodel(writtenNames[i]));
1949 std::map<std::string, std::string>& globalAliases,
1950 std::vector<std::string>& writtenNames)
const
1957 for (
unsigned int i = 0; i < pMS.getNumberOfModels(); ++i)
1962 write(pMS.nModel(i), out, globalAliases, writtenNames);
1970 out <<
"MixedModel(model=";
1971 auto& eM = pMS.model(0);
1976 for (
auto& pn : vpl)
1978 if (find(writtenNames.begin(), writtenNames.end(), pn) == writtenNames.end())
1980 if (pMS.hasDistribution(pn))
1982 auto& pDD = pMS.distribution(pn);
1988 globalAliases[pn] = sout.
str();
1994 write(eM, out, globalAliases, writtenNames);
1996 if (pMS.from() != -1)
1997 out <<
", from=" << model.
getAlphabet()->intToChar(pMS.from()) <<
", to=" << model.
getAlphabet()->intToChar(pMS.to());
2010 std::shared_ptr<const AlignmentDataInterface> data)
2016 if (initFreqs !=
"")
2021 if (initFreqs ==
"observed")
2024 throw Exception(
"BppOSubstitutionModelFormat::initialize_(). Missing data for observed frequencies");
2026 tmodel.setFreqFromData(*data, psi);
2028 else if (initFreqs.substr(0, 6) ==
"values")
2031 map<int, double> frequencies;
2033 string rf = initFreqs.substr(6);
2038 tmodel.setFreq(frequencies);
2041 throw Exception(
"Unknown initFreqs argument");
2053 for (
size_t i = 0; i < pl.
size(); ++i)
2061 for (
size_t i = 0; i < pl.
size(); i++)
2063 const string pName = pl[i].getName();
2065 posp = pName.rfind(
".");
2066 bool test1 = (initFreqs ==
"");
2067 bool test2 = (pName.size() < posp + 6) || (pName.substr(posp + 1, 5) !=
"theta");
2071 if (test1 || test2 || test3)
2073 if (!test1 && !test2 && test3)
2077 pl[i].setValue(value);
Abstract class for mixture models based on the bibliography.
Partial implementation of the SubstitutionModel interface for models that are set for matching the bi...
Abstract class for modelling of ratios of substitution rates between codons, whatever they are synony...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
Abstract class for modelling of non-synonymous and synonymous substitution rates in codon models,...
const BranchModelInterface & model() const override
std::string getNamespace() const override
Abstract Basal class for words of substitution models.
const SubstitutionModelInterface & nModel(size_t i) const
returns the ith model, or throw an exception if i is not a valid number.
size_t getNumberOfModels() const
const BranchModelInterface & model() const override
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Interface for all Branch models.
virtual const FrequencySetInterface & frequencySet() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual std::string getName() const =0
Get the name of the model.
virtual void addRateParameter()=0
size_t getNbrOfAxes() const
The Coala branch-heterogeneous amino-acid substitution model.
std::string getExch() const
Class for substitution models of codons with several layers of codon models.
Parametrize a set of state frequencies for codons.
Interface for reversible codon models.
Class for modelling of non-synonymous rates in codon models, such that the substitution rates between...
The D1Walk substitution model for Integer alphabet [0;N-1]. In this model, substitutions are possible...
Class for non-synonymous substitution models on codons with parameterized equilibrium frequencies and...
The EquiprobableSubstitutionModel substitution model for any kind of alphabet. Jukes-Cantor models ar...
Galtier's 2001 covarion model.
const DiscreteDistributionInterface & rateDistribution() const
SubModel taken from a MixedTransitionModel, kept in the context of the MixedTransitionModel (see From...
size_t getSubModelNumber() const
const MixedTransitionModelInterface & mixedModel() const
The Le et al (2008) CAT substitution model for proteins.
Partial implementation of the Markov-modulated class of substitution models.
const ReversibleSubstitutionModelInterface & nestedModel() const
Interface for Transition models, defined as a mixture of "simple" transition models.
Transition models defined as a mixture of nested substitution models.
Transition models defined as a mixture of several substitution models.
A list of models, for building a WordSubstitutionModel.
Specialisation interface for rversible nucleotide substitution model.
Specialisation interface for nucleotide substitution model.
From a model, compute transition probabilities given there is at least a change of a category (ie a n...
const std::string & getRegisterName() const
const std::vector< size_t > & getRegisterNumbers() const
From a model, compute transition probabilities given there is at least a change in the branch.
const SubstitutionModelInterface & mutationModel() const
const FrequencySetInterface & fitness() const
virtual const ParameterList & getIndependentParameters() const=0
virtual void aliasParameters(const std::string &p1, const std::string &p2)=0
virtual std::vector< std::string > getParameterNames() const
virtual void setParameter(size_t index, const Parameter ¶m)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual bool matchParametersValues(const ParameterList ¶meters)=0
virtual std::string getNamespace() const=0
virtual const ParameterList & getParameters() const=0
Parametrize a set of state frequencies for proteins.
Specialized interface for protein reversible substitution model.
Specialized interface for protein substitution model.
The Rivas-Eddy substitution model with gap characters.
const ReversibleSubstitutionModelInterface & nestedModel() const
From a model, substitution rates are set into categories following a given register....
const std::string & getRegisterName() const
Interface for reversible substitution models.
Class for non-synonymous and synonymous substitution models on codons with parameterized equilibrium ...
const std::string & nextToken()
bool hasMoreToken() const
Interface for all transition models.
Build an empirical protein substitution model from a file.
const std::string & getPath() const
The Yang et al (2000) M10 substitution model for codons.
The Yang et al (2000) M7 substitution model for codons.
The Yang et al (2000) M8 substitution model for codons.
The Yang et al (2000) M9 substitution model for codons.
const NucleotideSubstitutionModelInterface & nestedModel() const
const SubstitutionModelInterface & nestedModel() const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool hasSubstring(const std::string &s, const std::string &pattern)
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.