bpp-phyl3 3.0.0
BppOTransitionModelFormat.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7#include <Bpp/Io/FileTools.h>
13#include <Bpp/Text/TextTools.h>
14
15#include "../App/PhylogeneticsApplicationTools.h"
16#include "../Model/Codon/DFP07.h"
17#include "../Model/Codon/RELAX.h"
18#include "../Model/Codon/YNGP_M1.h"
19#include "../Model/Codon/YNGP_M10.h"
20#include "../Model/Codon/YNGP_M2.h"
21#include "../Model/Codon/YNGP_M3.h"
22#include "../Model/Codon/YNGP_M7.h"
23#include "../Model/Codon/YNGP_M8.h"
24#include "../Model/Codon/YNGP_M9.h"
25#include "../Model/MixedTransitionModel.h"
26#include "../Model/MixtureOfATransitionModel.h"
27#include "../Model/MixtureOfTransitionModels.h"
28#include "../Model/OneChangeRegisterTransitionModel.h"
29#include "../Model/OneChangeTransitionModel.h"
30#include "../Model/Protein/LG10_EX_EHO.h"
31#include "../Model/Protein/LGL08_CAT.h"
32#include "../Model/Protein/LLG08_EHO.h"
33#include "../Model/Protein/LLG08_EX2.h"
34#include "../Model/Protein/LLG08_EX3.h"
35#include "../Model/Protein/LLG08_UL2.h"
36#include "../Model/Protein/LLG08_UL3.h"
40
41// From Numeric
42
45
46
48
49using namespace bpp;
50
51// From the STL:
52#include <iomanip>
53#include <set>
54
55using namespace std;
56
57unique_ptr<TransitionModelInterface> BppOTransitionModelFormat::readTransitionModel(
58 std::shared_ptr<const Alphabet> alphabet,
59 const std::string& modelDescription,
60 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
61 size_t nData,
62 bool parseArguments)
63{
64 unparsedArguments_.clear();
65 unique_ptr<TransitionModelInterface> model;
66 string modelName = "";
67 map<string, string> args;
68 KeyvalTools::parseProcedure(modelDescription, modelName, args);
69
70 // get data number
71
72 if (args.find("data") != args.end())
73 nData = TextTools::to<size_t>(args["data"]);
74
75 // //////////////////////////////////
76 // / MIXED MODELS
77 // ////////////////////////////////
78
79 if ((modelName == "MixedModel" || (modelName == "Mixture")) && allowMixed_)
80 model = readMixed_(alphabet, modelDescription, mData, nData);
81 else if (modelName == "OneChange")
82 {
83 // We have to parse the nested model first:
84 if (args.find("model") == args.end())
85 throw Exception("BppOTransitionModelFormat::read. Missing argument 'model' for model 'OneChange'.");
86 string nestedModelDescription = args["model"];
88 if (geneticCode_)
89 nestedReader.setGeneticCode(geneticCode_);
90
91 auto nestedModel = nestedReader.readSubstitutionModel(alphabet, nestedModelDescription, mData, nData, false);
92 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
93
94 // We look for the register:
95 if (args.find("register") == args.end())
96 model = make_unique<OneChangeTransitionModel>(std::move(nestedModel));
97 else
98 {
99 shared_ptr<AlphabetIndex2> weights;
100 shared_ptr<AlphabetIndex2> distances;
101 string registerDescription = args["register"];
102 auto reg = PhylogeneticsApplicationTools::getSubstitutionRegister(registerDescription, nestedModel->getStateMap(), geneticCode_, weights, distances);
103
104 if (args.find("numReg") == args.end())
105 throw Exception("Missing argument 'numReg' (number of event for register in model " + modelName);
106
107 vector<size_t> vNumRegs;
108
109 StringTokenizer nst(args["numReg"], "+");
110
111 bool out = true;
112
113 while (nst.hasMoreToken())
114 {
115 size_t n = TextTools::to<size_t>(nst.nextToken());
116 vNumRegs.push_back(n);
117 if (verbose_)
118 {
119 ApplicationTools::displayResult(out ? "Register types" : "", reg->getTypeName(n));
120 out = false;
121 }
122 }
123
124 model = make_unique<OneChangeRegisterTransitionModel>(std::move(nestedModel), *reg, vNumRegs);
125 }
126
127 // Then we update the parameter set:
128 for (auto& it : unparsedParameterValuesNested)
129 {
130 unparsedArguments_["OneChange." + it.first] = it.second;
131 }
132 }
133 // //////////////////
134 // PREDEFINED CODON MODELS
135 else if (((modelName.substr(0, 4) == "YNGP") || (modelName == "DFP07") || (modelName == "RELAX")) && (alphabetCode_ & CODON))
136 {
137 if (!(alphabetCode_ & CODON))
138 throw Exception("BppOTransitionModelFormat::read. Codon alphabet not supported.");
139 if (!geneticCode_)
140 throw Exception("BppOTransitionModelFormat::readTransitionModel(). No genetic code specified! Consider using 'setGeneticCode'.");
141
142 if (!AlphabetTools::isCodonAlphabet(*alphabet))
143 throw Exception("Alphabet should be Codon Alphabet.");
144 auto pCA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
145
146 if (args.find("genetic_code") != args.end())
147 {
148 ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
149 throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
150 }
151
152 if (geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
153 throw Exception("Mismatch between genetic code and codon alphabet");
154
155
156 unique_ptr<CodonFrequencySetInterface> codonFreqs;
157
158 if (args.find("frequencies") != args.end())
159 {
160 string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "F0", "", true, warningLevel_);
162 freqReader.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model.
163
164 auto tmpP = freqReader.readFrequencySet(pCA, freqOpt, mData, nData, false);
165 codonFreqs = unique_ptr<CodonFrequencySetInterface>(dynamic_cast<CodonFrequencySetInterface*>(tmpP.release()));
166 for (const auto& it : freqReader.getUnparsedArguments())
167 {
168 unparsedArguments_[modelName + "." + it.first] = it.second;
169 }
170 }
171 else
172 throw Exception("Missing 'frequencies' for model " + modelName);
173
174 if (modelName == "YNGP_M1")
175 model = make_unique<YNGP_M1>(geneticCode_, std::move(codonFreqs));
176 else if (modelName == "YNGP_M2")
177 model = make_unique<YNGP_M2>(geneticCode_, std::move(codonFreqs));
178 else if (modelName == "RELAX")
179 model = make_unique<RELAX>(geneticCode_, std::move(codonFreqs));
180 else if (modelName == "YNGP_M3")
181 if (args.find("n") == args.end())
182 model = make_unique<YNGP_M3>(geneticCode_, std::move(codonFreqs));
183 else
184 model = make_unique<YNGP_M3>(geneticCode_, std::move(codonFreqs), TextTools::to<unsigned int>(args["n"]));
185 else if ((modelName == "YNGP_M7") || modelName == "YNGP_M8" || modelName == "YNGP_M8a")
186 {
187 if (args.find("n") == args.end())
188 throw Exception("Missing argument 'n' (number of classes) in " + modelName + " distribution");
189 unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
190 if (verbose_)
191 ApplicationTools::displayResult("Number of classes in model", nbClasses);
192
193 if (modelName == "YNGP_M7")
194 model = make_unique<YNGP_M7>(geneticCode_, std::move(codonFreqs), nbClasses);
195 else if (modelName == "YNGP_M8")
196 model = make_unique<YNGP_M8>(geneticCode_, std::move(codonFreqs), nbClasses);
197 else if (modelName == "YNGP_M8a")
198 model = make_unique<YNGP_M8>(geneticCode_, std::move(codonFreqs), nbClasses, true);
199 }
200 else if (modelName == "YNGP_M9" || modelName == "YNGP_M10")
201 {
202 if (args.find("nbeta") == args.end())
203 throw Exception("Missing argument 'nbeta' (number of classes of beta distribution) in " + modelName + " distribution");
204 unsigned int nbBeta = TextTools::to<unsigned int>(args["nbeta"]);
205 if (args.find("ngamma") == args.end())
206 throw Exception("Missing argument 'ngamma' (number of classes of gamma distribution) in " + modelName + " distribution");
207 unsigned int nbGamma = TextTools::to<unsigned int>(args["ngamma"]);
208 if (verbose_)
209 ApplicationTools::displayResult("Number of classes in model", nbBeta + nbGamma);
210
211 if (modelName == "YNGP_M9")
212 model = make_unique<YNGP_M9>(geneticCode_, std::move(codonFreqs), nbBeta, nbGamma);
213 else
214 model = make_unique<YNGP_M10>(geneticCode_, std::move(codonFreqs), nbBeta, nbGamma);
215 }
216 else if (modelName == "DFP07")
217 {
218 if (args.find("protmodel") == args.end())
219 throw Exception("Missing 'protmodel in model " + modelName + ".");
220
222 auto tmpP = nestedProtReader.readSubstitutionModel(
223 geneticCode_->getTargetAlphabet(),
224 args["protmodel"], mData, nData, false);
225 auto nestedProtModel = unique_ptr<ProteinSubstitutionModelInterface>(
226 dynamic_cast<ProteinSubstitutionModelInterface*>(tmpP.release())
227 );
228
229 auto unparsedParameterValuesNested = nestedProtReader.getUnparsedArguments();
230 unparsedArguments_.insert(unparsedParameterValuesNested.begin(), unparsedParameterValuesNested.end());
231
232 model = make_unique<DFP07>(geneticCode_, std::move(nestedProtModel), std::move(codonFreqs));
233 }
234 }
235 else if (AlphabetTools::isProteicAlphabet(*alphabet))
236 {
237 if (!(alphabetCode_ & PROTEIN))
238 throw Exception("BppOTransitionModelFormat::read. Protein alphabet not supported.");
239 auto alpha = dynamic_pointer_cast<const ProteicAlphabet>(alphabet);
240
241 if (modelName == "LLG08_EHO")
242 model = make_unique<LLG08_EHO>(alpha);
243 else if (modelName == "LLG08_EX2")
244 model = make_unique<LLG08_EX2>(alpha);
245 else if (modelName == "LLG08_EX3")
246 model = make_unique<LLG08_EX3>(alpha);
247 else if (modelName == "LLG08_UL2")
248 model = make_unique<LLG08_UL2>(alpha);
249 else if (modelName == "LLG08_UL3")
250 model = make_unique<LLG08_UL3>(alpha);
251 else if (modelName == "LG10_EX_EHO")
252 model = make_unique<LG10_EX_EHO>(alpha);
253 else if (modelName == "LGL08_CAT")
254 {
255 if (args.find("nbCat") == args.end())
256 throw Exception("'nbCat' argument is compulsory for model 'LGL08_CAT'");
257
258 unsigned int nbCat = TextTools::to<unsigned int>(args["nbCat"]);
259 if (nbCat == 0)
260 throw Exception("nbCat argument has to be 10, 20, 30, 40, 50 or 60.");
261 model = make_unique<LGL08_CAT>(alpha, nbCat);
262 }
263 }
264
265 if (!model)
266 model = readSubstitutionModel(alphabet, modelDescription, mData, nData, parseArguments);
267 else
268 {
269 if (verbose_)
270 ApplicationTools::displayResult("Transition model", modelName);
271
272 updateParameters_(*model, args);
273
274 if (parseArguments)
275 {
276 if (nData)
277 initialize_(*model, mData.at(nData));
278 else
279 initialize_(*model, 0);
280 }
281 }
282
283 return model;
284}
285
286unique_ptr<MixedTransitionModelInterface> BppOTransitionModelFormat::readMixed_(
287 std::shared_ptr<const Alphabet> alphabet,
288 const std::string& modelDescription,
289 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
290 size_t nData)
291{
292 unique_ptr<MixedTransitionModelInterface> model;
293
294 string modelName = "";
295 map<string, string> args;
296 KeyvalTools::parseProcedure(modelDescription, modelName, args);
297 unique_ptr<TransitionModelInterface> pSM;
298
299 if (modelName == "MixedModel")
300 {
301 if (args.find("model") == args.end())
302 throw Exception("The argument 'model' is missing from MixedModel description");
303 string nestedModelDescription = args["model"];
305 if (geneticCode_)
306 nestedReader.setGeneticCode(geneticCode_); // This uses the same
307 // instance as the one
308 // that will be used
309 // by the model.
310 pSM = nestedReader.readTransitionModel(alphabet, nestedModelDescription, mData, nData, false);
311
312 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
313
314 map<string, unique_ptr<DiscreteDistributionInterface>> mdist;
315 map<string, string> unparsedParameterValuesNested2;
316
317 for (auto& it : unparsedParameterValuesNested)
318 {
319 if (it.second.find("(") != string::npos)
320 {
322 mdist[pSM->getParameterNameWithoutNamespace(it.first)] = bIO.readDiscreteDistribution(it.second, false);
323 map<string, string> unparsedParameterValuesNested3(bIO.getUnparsedArguments());
324 for (auto& it2 : unparsedParameterValuesNested3)
325 {
326 unparsedParameterValuesNested2[it.first + "_" + it2.first] = it2.second;
327 }
328 }
329 else
330 unparsedParameterValuesNested2[it.first] = it.second;
331 }
332
333 for (auto& it : unparsedParameterValuesNested2)
334 {
335 unparsedArguments_[it.first] = it.second;
336 }
337
338
339 int fi(-1), ti(-1);
340
341 if (args.find("from") != args.end())
342 fi = alphabet->charToInt(args["from"]);
343 if (args.find("to") != args.end())
344 ti = alphabet->charToInt(args["to"]);
345
346 string sModN = pSM->getName();
347 model = make_unique<MixtureOfATransitionModel>(alphabet, std::move(pSM), mdist, fi, ti);
348
349 vector<string> v = model->getParameters().getParameterNames();
350
351 if (verbose_)
352 {
353 ApplicationTools::displayResult("Mixture Of A TransitionModel Model", sModN);
354 ApplicationTools::displayResult("Number of classes", model->getNumberOfModels());
355 }
356 }
357 else if (modelName == "Mixture")
358 {
359 vector<string> v_nestedModelDescription;
360 vector< std::unique_ptr<TransitionModelInterface>> v_pSM;
361
362 if (args.find("model1") == args.end())
363 {
364 throw Exception("Missing argument 'model1' for model " + modelName + ".");
365 }
366 unsigned int nbmodels = 0;
367
368 while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
369 {
370 v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
371 }
372
373 if (nbmodels < 2)
374 throw Exception("Missing nested models for model " + modelName + ".");
375
376 for (unsigned i = 0; i < v_nestedModelDescription.size(); ++i)
377 {
378 BppOTransitionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
379 if (geneticCode_)
380 nestedReader.setGeneticCode(geneticCode_); // This uses the same instance as the one that will be used by the model.
381
382 pSM = nestedReader.readTransitionModel(alphabet, v_nestedModelDescription[i], mData, nData, false);
383
384 map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
385 for (auto& it : unparsedParameterValuesNested)
386 {
387 unparsedArguments_[modelName + "." + TextTools::toString(i + 1) + "_" + it.first] = it.second;
388 }
389
390 v_pSM.push_back(std::move(pSM));
391 }
392
393 model = make_unique<MixtureOfTransitionModels>(alphabet, v_pSM);
394 if (verbose_)
395 ApplicationTools::displayResult("Mixture Of TransitionModel Models", modelName );
396 }
397 else
398 throw Exception("Unknown model name for mixture " + modelName);
399
400 return model;
401}
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static void displayResult(const std::string &text, const T &result)
const std::map< std::string, std::string > & getUnparsedArguments() const
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments=true)
Frequencies set I/O in BppO format.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
Substitution model I/O in BppO format.
std::unique_ptr< SubstitutionModelInterface > readSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a substitution model from a string.
void updateParameters_(BranchModelInterface &model, std::map< std::string, std::string > &args)
Finish parsing of parameters, taking care of aliases.
std::shared_ptr< const GeneticCode > geneticCode_
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
std::map< std::string, std::string > unparsedArguments_
const std::map< std::string, std::string > & getUnparsedArguments() const override
void initialize_(BranchModelInterface &model, std::shared_ptr< const AlignmentDataInterface > data)
Set parameter initial values of a given model according to options.
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true)
std::unique_ptr< MixedTransitionModelInterface > readMixed_(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData)
Parametrize a set of state frequencies for codons.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static std::unique_ptr< SubstitutionRegisterInterface > getSubstitutionRegister(const std::string &regTypeDesc, std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, std::shared_ptr< AlphabetIndex2 > &weights, std::shared_ptr< AlphabetIndex2 > &distances, bool verbose=true)
Get a Register instance.
Specialized interface for protein substitution model.
const std::string & nextToken()
bool hasMoreToken() const
std::string toString(T t)
Defines the basic types of data flow nodes.