bpp-phyl3 3.0.0
PhylogeneticsApplicationTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../../App/PhylogeneticsApplicationTools.h"
6#include "../../Io/BppOBranchModelFormat.h"
7#include "../../Io/BppOFrequencySetFormat.h"
8#include "../../Io/BppOMultiTreeReaderFormat.h"
9#include "../../Io/BppOSubstitutionModelFormat.h"
10#include "../../Io/BppOTransitionModelFormat.h"
11#include "../../Io/BppOTreeReaderFormat.h"
12#include "../../Io/Newick.h"
13#include "../../Io/NexusIoTree.h"
14#include "../../Io/Nhx.h"
15#include "../../Model/FrequencySet/MvaFrequencySet.h"
16#include "../../Model/MixedTransitionModel.h"
17#include "../../Model/Protein/Coala.h"
18#include "../../Model/SubstitutionModel.h"
19#include "../../Model/WrappedModel.h"
20#include "../../Tree/Tree.h"
21#include "../../Tree/TreeTools.h"
22#include "../OptimizationTools.h"
24
25// From bpp-core
28#include <Bpp/Io/FileTools.h>
29#include <Bpp/Text/TextTools.h>
41
42// From bpp-seq:
47
48using namespace bpp;
49
50// From the STL:
51#include <fstream>
52#include <memory>
53#include <set>
54#include <vector>
55
56using namespace std;
57
58/*************************************************************/
59/***************** TREES ************************************/
60/*************************************************************/
61
62
63/******************************************************************************/
64
65
66map<size_t, shared_ptr<Tree>> LegacyPhylogeneticsApplicationTools::getTrees(
67 const map<string, string>& params,
68 const map<size_t, std::shared_ptr<AlignmentDataInterface>>& mSeq,
69 map<string, string>& unparsedParams,
70 const string& prefix,
71 const string& suffix,
72 bool suffixIsOptional,
73 bool verbose,
74 int warn)
75{
76 vector<string> vTreesName = ApplicationTools::matchingParameters(prefix + "tree*", params);
77
78 map<size_t, shared_ptr<Tree>> mTree;
79
80 for (size_t nT = 0; nT < vTreesName.size(); nT++)
81 {
82 size_t poseq = vTreesName[nT].find("=");
83 size_t num = 0;
84 size_t len = (prefix + "tree").size();
85 string suff = vTreesName[nT].substr(len, poseq - len);
86 bool flag = 0;
87 size_t nbTree = 1;
88
89 if (TextTools::isDecimalInteger(suff, '$'))
90 num = static_cast<size_t>(TextTools::toInt(suff));
91 else
92 {
93 flag = 1;
94 num = 1;
95 }
96
97 if (!flag)
98 {
101 }
102
103 string treeDesc = ApplicationTools::getStringParameter(vTreesName[nT], params, "", suffix, suffixIsOptional);
104
105 string treeName;
106
107 map<string, string> args;
108
109 KeyvalTools::parseProcedure(treeDesc, treeName, args);
110
111 if (treeName == "user")
112 {
113 string format;
114
115 if (args.find("format") != args.end())
116 format = args["format"];
117 else
118 {
119 format = "Newick";
120 ApplicationTools::displayWarning("Warning, " + vTreesName[nT] + " format set to Newick");
121 }
122
123 string treeFilePath = ApplicationTools::getAFilePath("file", args, true, true, suffix, suffixIsOptional, "none", warn);
124
125 unique_ptr<IMultiTree> treeReader;
126 if (format == "Newick")
127 treeReader.reset(new Newick(true));
128 else if (format == "Nexus")
129 treeReader.reset(new NexusIOTree());
130 else if (format == "NHX")
131 treeReader.reset(new Nhx());
132 else
133 throw Exception("Unknown format for tree reading: " + format);
134
135 vector<unique_ptr<Tree>> trees;
136 treeReader->readTrees(treeFilePath, trees);
137
138 if (verbose)
139 {
140 if (flag)
141 {
143 ApplicationTools::displayResult("Tree file", treeFilePath);
144 }
145
146 ApplicationTools::displayResult("Number of trees in file", trees.size());
147 }
148
149 if (flag)
150 {
151 nbTree = trees.size();
152
153 for (size_t i2 = 0; i2 < trees.size(); i2++)
154 {
155 if (mTree.find(i2 + 1) != mTree.end())
156 {
157 ApplicationTools::displayWarning("Tree " + TextTools::toString(i2 + 1) + " already assigned, replaced by new one.");
158 }
159
160 mTree[i2 + 1] = std::move(trees[i2]);
161 ApplicationTools::displayResult("Number of leaves", trees[i2]->getNumberOfLeaves());
162 }
163 }
164 else
165 {
166 if (trees.size() > 1)
167 throw Exception("Error : Several trees for description of " + vTreesName[nT] + ".");
168
169 if (trees.size() == 1)
170 {
171 if (mTree.find(num) != mTree.end())
172 {
173 ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
174 }
175 mTree[num] = std::move(trees[0]);
176 ApplicationTools::displayResult("Number of leaves", trees[0]->getNumberOfLeaves());
177 }
178 }
179 }
180 else if (treeName == "random")
181 {
182 size_t seqNum;
183
184 if (args.find("data") == args.end())
185 {
186 ApplicationTools::displayWarning("Random tree set from data 1");
187 seqNum = 1;
188 }
189 else
190 seqNum = (size_t) TextTools::toInt(args["data"]);
191
192
193 if (mSeq.find(seqNum) == mSeq.end())
194 throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
195
196 vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
197 auto tree = TreeTemplateTools::getRandomTree(names);
198 tree->setBranchLengths(1.);
199
200 if (mTree.find(num) != mTree.end())
201 {
202 ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
203 }
204 mTree[num] = std::move(tree);
205 ApplicationTools::displayResult("Number of leaves", tree->getNumberOfLeaves());
206 }
207
208 // //////////
209 // Setting branch lengths?
210 string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", args, "Input", "", true, 1);
211 string cmdName;
212 map<string, string> cmdArgs;
213
214 KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
215 if (cmdName == "Input")
216 {
217 // Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
218 string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, 2);
219 if (midPointRootBrLengths == "yes")
220 {
221 if (flag)
222 {
223 for (size_t i = 0; i < nbTree; i++)
224 {
226 }
227 }
228 else
230 }
231 }
232 else if (cmdName == "Equal")
233 {
234 double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, 2);
235 if (value <= 0)
236 throw Exception("Value for branch length must be superior to 0");
237 ApplicationTools::displayResult("Branch lengths set to", value);
238 if (flag)
239 {
240 for (size_t i = 0; i < nbTree; i++)
241 {
242 mTree[i + 1]->setBranchLengths(value);
243 }
244 }
245 else
246 mTree[num]->setBranchLengths(value);
247 }
248 else if (cmdName == "Clock")
249 {
250 if (flag)
251 {
252 for (size_t i = 0; i < nbTree; i++)
253 {
254 TreeTools::convertToClockTree(*mTree[i + 1], mTree[i + 1]->getRootId(), true);
255 }
256 }
257 else
258 TreeTools::convertToClockTree(*mTree[num], mTree[num]->getRootId(), true);
259 }
260 else if (cmdName == "Grafen")
261 {
262 string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, 2);
263 double h;
264 if (flag)
265 {
266 for (size_t i = 0; i < nbTree; i++)
267 {
268 auto& tree = *mTree[i + 1];
269 if (grafenHeight == "input")
270 {
271 h = TreeTools::getHeight(tree, tree.getRootId());
272 }
273 else
274 {
275 h = TextTools::toDouble(grafenHeight);
276 if (h <= 0)
277 throw Exception("Height must be positive in Grafen's method.");
278 }
280
281 double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
282 ApplicationTools::displayResult("Grafen's rho", rho);
284 double nh = TreeTools::getHeight(tree, tree.getRootId());
285 tree.scaleTree(h / nh);
286 }
287 }
288 else
289 {
290 auto& tree = *mTree[num];
291 if (grafenHeight == "input")
292 {
293 h = TreeTools::getHeight(tree, tree.getRootId());
294 }
295 else
296 {
297 h = TextTools::toDouble(grafenHeight);
298 if (h <= 0)
299 throw Exception("Height must be positive in Grafen's method.");
300 }
302
303 double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
304 ApplicationTools::displayResult("Grafen's rho", rho);
306 double nh = TreeTools::getHeight(tree, tree.getRootId());
307 tree.scaleTree(h / nh);
308 }
309 }
310 else
311 throw Exception("Method '" + initBrLenMethod + "' unknown for computing branch lengths.");
312
313 // //////////// Setting branch lengths with aliases
314
315 vector<string> vBrNb = ApplicationTools::matchingParameters("BrLen*", args);
316
317 for (size_t ib = 0; ib < vBrNb.size(); ib++)
318 {
319 string apeq = args[vBrNb[ib]];
320 string aveq = vBrNb[ib];
321
323 mTree[num]->setDistanceToFather(TextTools::toInt(aveq.substr(5, string::npos)), TextTools::toDouble(apeq));
324 else
325 {
326 size_t posun = apeq.find("_");
327 size_t posd = aveq.find("_");
328 unparsedParams[aveq + (posd != string::npos ? "" : "_" + TextTools::toString(num))] = apeq + (posun != string::npos ? "" : "_" + TextTools::toString(num));
329 }
330 }
331
332 ApplicationTools::displayResult("Branch lengths", cmdName);
333 }
334
335 return mTree;
336}
337
338
339/******************************************************/
340/**** SUBSTITUTION MODEL SET **************************/
341/******************************************************/
342
343/******************************************************************************/
344
347 map<string, string>& unparsedParameterValues,
348 size_t modelNumber,
349 std::shared_ptr<const AlignmentDataInterface> data,
350 map<string, string>& sharedParams,
351 bool verbose)
352{
353 string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedParameterValues, "", "", true, 2);
354
355 if (verbose)
356 ApplicationTools::displayResult("Frequencies Initialization for model", (initFreqs == "") ? "None" : initFreqs);
357
358 if (initFreqs != "")
359 {
360 try
361 {
362 auto& tmodel = dynamic_cast<TransitionModelInterface&>(model);
363 if (initFreqs == "observed")
364 {
365 if (!data)
366 throw Exception("Missing data for observed frequencies");
367 unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedParameterValues, 0);
368 tmodel.setFreqFromData(*data, psi);
369 }
370 else if (initFreqs.substr(0, 6) == "values")
371 {
372 // Initialization using the "values" argument
373 map<int, double> frequencies;
374
375 string rf = initFreqs.substr(6);
376 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
377 int i = 0;
378 while (strtok.hasMoreToken())
379 frequencies[i++] = TextTools::toDouble(strtok.nextToken());
380 tmodel.setFreq(frequencies);
381 }
382 else
383 throw Exception("Unknown initFreqs argument");
384 }
385 catch (exception& e)
386 {
387 ApplicationTools::displayMessage("Frequencies initialization not possible for model " + model.getName());
388 }
389 }
390
392 for (size_t i = 0; i < pl.size(); ++i)
393 {
394 AutoParameter ap(pl[i]);
396 pl.setParameter(i, ap);
397 }
398 for (size_t i = 0; i < pl.size(); ++i)
399 {
400 const string pName = pl[i].getName();
401 size_t posp = model.getParameterNameWithoutNamespace(pName).rfind(".");
402 string value;
403 bool test1 = (initFreqs == "");
404 bool test2 = (model.getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) != "theta");
405 bool test3 = (unparsedParameterValues.find(pName) != unparsedParameterValues.end());
406
407 if (test1 || test2 || test3)
408 {
409 if (!test1 && !test2 && test3)
410 ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
411
412 value = ApplicationTools::getStringParameter(pName, unparsedParameterValues, TextTools::toString(pl[i].getValue()));
413
414 try
415 {
416 pl[i].setValue(TextTools::toDouble(value));
417 if (verbose)
418 ApplicationTools::displayResult("Parameter found", pName + +"_" + TextTools::toString(modelNumber) + "=" + TextTools::toString(pl[i].getValue()));
419 }
420 catch (Exception& e)
421 {
422 sharedParams[pl[i].getName() + "_" + TextTools::toString(modelNumber)] = value;
423 }
424 }
425 }
426
427 model.matchParametersValues(pl);
428}
429
430/******************************************************************************/
431
433 shared_ptr<const Alphabet> alphabet,
434 shared_ptr<const GeneticCode> gCode,
435 shared_ptr<const AlignmentDataInterface> data,
436 const map<string, string>& params,
437 const string& suffix,
438 bool suffixIsOptional,
439 bool verbose,
440 int warn)
441{
442 if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
443 throw Exception("A value is needed for this parameter: nonhomogeneous.number_of_models .");
444 size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
445 if (nbModels == 0)
446 throw Exception("The number of models can't be 0 !");
447
448 bool nomix = true;
449 for (size_t i = 0; nomix& (i < nbModels); i++)
450 {
451 string prefix = "model" + TextTools::toString(i + 1);
452 string modelDesc;
453 modelDesc = ApplicationTools::getStringParameter(prefix, params, "", suffix, suffixIsOptional, warn);
454
455 if (modelDesc.find("Mixed") != string::npos)
456 nomix = false;
457 }
458
459 unique_ptr<SubstitutionModelSet> modelSet = nullptr;
460 auto modelSet1 = make_unique<SubstitutionModelSet>(alphabet);
461 setSubstitutionModelSet(*modelSet1, alphabet, gCode, data, params, suffix, suffixIsOptional, verbose, warn);
462
463 if (modelSet1->hasMixedTransitionModel())
464 {
465 modelSet = make_unique<MixedSubstitutionModelSet>(*modelSet1);
466 completeMixedSubstitutionModelSet(dynamic_cast<MixedSubstitutionModelSet&>(*modelSet), alphabet, data, params, suffix, suffixIsOptional, verbose);
467 }
468 else
469 modelSet = std::move(modelSet1);
470
471 return modelSet;
472}
473
474/******************************************************************************/
475
477 SubstitutionModelSet& modelSet,
478 shared_ptr<const Alphabet> alphabet,
479 shared_ptr<const GeneticCode> gCode,
480 shared_ptr<const AlignmentDataInterface> data,
481 const map<string, string>& params,
482 const string& suffix,
483 bool suffixIsOptional,
484 bool verbose,
485 int warn)
486{
487 modelSet.clear();
488 if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
489 throw Exception("You must specify this parameter: 'nonhomogeneous.number_of_models'.");
490 size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
491 if (nbModels == 0)
492 throw Exception("The number of models can't be 0 !");
493
494 if (verbose)
495 ApplicationTools::displayResult("Number of distinct models", TextTools::toString(nbModels));
496
497 BppOTransitionModelFormat bIO(BppOTransitionModelFormat::ALL, true, true, true, false, warn);
498
499 // ///////////////////////////////////////////
500 // Build a new model set object:
501
502 vector<double> rateFreqs;
503 string tmpDesc;
504 if (AlphabetTools::isCodonAlphabet(*alphabet))
505 {
506 if (!gCode)
507 throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet(): a GeneticCode instance is required for instantiating a codon model.");
508 bIO.setGeneticCode(gCode);
509 tmpDesc = ApplicationTools::getStringParameter("model1", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
510 }
511 else if (AlphabetTools::isWordAlphabet(*alphabet))
512 tmpDesc = ApplicationTools::getStringParameter("model1", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
513 else
514 tmpDesc = ApplicationTools::getStringParameter("model1", params, "JC69", suffix, suffixIsOptional, warn);
515
516 std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
517 mData[1] = data;
518
519 shared_ptr<TransitionModelInterface> tmp = bIO.readTransitionModel(alphabet, tmpDesc, mData, 1, false);
520
521 if (tmp->getNumberOfStates() != alphabet->getSize())
522 {
523 // Markov-Modulated Markov Model...
524
525 size_t n = static_cast<size_t>(tmp->getNumberOfStates() / alphabet->getSize());
526 rateFreqs = vector<double>(n, 1. / static_cast<double>(n));
527 // Equal rates assumed for now, may be changed later
528 }
529
530 // ////////////////////////////////////
531 // Deal with root frequencies
532
533 map<string, string> unparsedParameters;
534
535 bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", params, false, "", true, warn);
536 std::shared_ptr<FrequencySetInterface> rootFrequencies(0);
537 if (!stationarity)
538 {
539 rootFrequencies = PhylogeneticsApplicationTools::getRootFrequencySet(alphabet, gCode, mData, 1, params, unparsedParameters, rateFreqs, suffix, suffixIsOptional, verbose);
540 stationarity = !rootFrequencies;
541 string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "", suffix, suffixIsOptional, warn);
542 if (freqDescription.substr(0, 10) == "MVAprotein")
543 {
544 auto tmp2 = dynamic_pointer_cast<Coala>(tmp);
545 if (tmp2)
546 dynamic_pointer_cast<MvaFrequencySet>(rootFrequencies)->initSet(*tmp2);
547 else
548 throw Exception("The MVAprotein frequencies set at the root can only be used if a Coala model is used on branches.");
549 }
550 }
551
552 ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
553
554 if (!stationarity)
555 modelSet.setRootFrequencies(rootFrequencies);
556
557
558 // //////////////////////////////////////
559 // Now parse all models:
560
561 bIO.setVerbose(true);
562
563 for (size_t i = 0; i < nbModels; ++i)
564 {
565 string prefix = "model" + TextTools::toString(i + 1);
566 string modelDesc;
567 if (AlphabetTools::isCodonAlphabet(*alphabet))
568 modelDesc = ApplicationTools::getStringParameter(prefix, params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
569 else if (AlphabetTools::isWordAlphabet(*alphabet))
570 modelDesc = ApplicationTools::getStringParameter(prefix, params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
571 else
572 modelDesc = ApplicationTools::getStringParameter(prefix, params, "JC69", suffix, suffixIsOptional, warn);
573
574 auto model = bIO.readTransitionModel(alphabet, modelDesc, mData, 1, false);
575
576 map<string, string> unparsedModelParameters = bIO.getUnparsedArguments();
577 map<string, string> sharedParameters;
578
579
581 *model,
582 unparsedModelParameters, i + 1, data,
583 sharedParameters,
584 verbose);
585
586 unparsedParameters.insert(sharedParameters.begin(), sharedParameters.end());
587
588 vector<int> nodesId = ApplicationTools::getVectorParameter<int>(prefix + ".nodes_id", params, ',', ':', "", suffix, suffixIsOptional, warn);
589
590 if (verbose)
591 ApplicationTools::displayResult("Model" + TextTools::toString(i + 1) + " is associated to", TextTools::toString(nodesId.size()) + " node(s).");
592
593 modelSet.addModel(std::move(model), nodesId);
594 }
595
596 // Finally check parameter aliasing:
597
598 string aliasDesc = ApplicationTools::getStringParameter("nonhomogeneous.alias", params, "", suffix, suffixIsOptional, warn);
599 StringTokenizer st(aliasDesc, ",");
600 while (st.hasMoreToken())
601 {
602 string alias = st.nextToken();
603 string::size_type index = alias.find("->");
604 if (index == string::npos)
605 throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet. Bad alias syntax, should contain `->' symbol: " + alias);
606 unparsedParameters[alias.substr(0, index)] = alias.substr(index + 2);
607 }
608
609 // alias unparsedParameters
610
611 modelSet.aliasParameters(unparsedParameters, verbose);
612}
613
614/******************************************************************************/
615
617 MixedSubstitutionModelSet& mixedModelSet,
618 shared_ptr<const Alphabet> alphabet,
619 shared_ptr<const AlignmentDataInterface> data,
620 const map<string, string>& params,
621 const string& suffix,
622 bool suffixIsOptional,
623 bool verbose,
624 int warn)
625{
626 // /////////////////////////////////////////
627 // Looks for the allowed paths
628
629 size_t numd;
630 if (!ApplicationTools::parameterExists("site.number_of_paths", params))
631 numd = 0;
632 else
633 numd = ApplicationTools::getParameter<size_t>("site.number_of_paths", params, 1, suffix, suffixIsOptional, warn);
634
635 if (verbose)
636 ApplicationTools::displayResult("Number of distinct paths", TextTools::toString(numd));
637
638 vector<string> vdesc;
639 while (numd)
640 {
641 string desc = ApplicationTools::getStringParameter("site.path" + TextTools::toString(numd), params, "", suffix, suffixIsOptional, warn);
642 if (desc.size() == 0)
643 break;
644 else
645 vdesc.push_back(desc);
646 numd--;
647 }
648
649 if (vdesc.size() == 0)
650 {
651 mixedModelSet.complete();
652 mixedModelSet.computeHyperNodesProbabilities();
653 return;
654 }
655
656 for (vector<string>::iterator it(vdesc.begin()); it != vdesc.end(); it++)
657 {
658 mixedModelSet.addEmptyHyperNode();
659 StringTokenizer st(*it, "&");
660 while (st.hasMoreToken())
661 {
662 string submodel = st.nextToken();
663 string::size_type indexo = submodel.find("[");
664 string::size_type indexf = submodel.find("]");
665 if ((indexo == string::npos) | (indexf == string::npos))
666 throw Exception("PhylogeneticsApplicationTools::setMixedSubstitutionModelSet. Bad path syntax, should contain `[]' symbols: " + submodel);
667 int num = TextTools::toInt(submodel.substr(5, indexo - 5));
668 string p2 = submodel.substr(indexo + 1, indexf - indexo - 1);
669
670 auto pSM = dynamic_pointer_cast<const MixedTransitionModelInterface>(mixedModelSet.getModel(static_cast<size_t>(num - 1)));
671 if (!pSM)
672 throw BadIntegerException("LegacyPhylogeneticsApplicationTools::setMixedSubstitutionModelSet: Wrong model for number", num - 1);
673 Vuint submodnb = pSM->getSubmodelNumbers(p2);
674
675 mixedModelSet.addToHyperNode(static_cast<size_t>(num - 1), submodnb);
676 }
677
678 if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
679 throw Exception("A path should own at least a submodel of each mixed model: " + *it);
680
681 if (verbose)
682 ApplicationTools::displayResult("Site Path", *it);
683 }
684
685 // / Checks if the paths are separate
686 if (!mixedModelSet.hasExclusivePaths())
687 throw Exception("All paths must be disjoint.");
688
689 // / Puts all the remaining models in a new path
690 string st;
691 st = (mixedModelSet.complete()) ? "Yes" : "No";
692
693 if (verbose)
694 ApplicationTools::displayResult("Site Path Completion", st);
695
696 mixedModelSet.computeHyperNodesProbabilities();
697
698 if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
699 throw Exception("The remaining submodels can not create a complete path.");
700}
701
702
703/*************************************************************/
704/***** OPTIMIZATORS *****************************************/
705/*************************************************************/
706
707/******************************************************************************/
708
710 shared_ptr<TreeLikelihoodInterface> tl,
711 const ParameterList& parameters,
712 const map<string, string>& params,
713 const string& suffix,
714 bool suffixIsOptional,
715 bool verbose,
716 int warn)
717{
718 string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
719 if (optimization == "None")
720 return tl;
721 string optName;
722 map<string, string> optArgs;
723 KeyvalTools::parseProcedure(optimization, optName, optArgs);
724
725 unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn);
726
727 string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional);
728 shared_ptr<OutputStream> messageHandler =
729 (mhPath == "none") ? nullptr :
730 (mhPath == "std") ? ApplicationTools::message :
731 make_shared<StlOutputStream>(make_unique<ofstream>(mhPath.c_str(), ios::out));
732 if (verbose)
733 ApplicationTools::displayResult("Message handler", mhPath);
734
735 string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional);
736 shared_ptr<OutputStream> profiler =
737 (prPath == "none") ? nullptr :
738 (prPath == "std") ? ApplicationTools::message :
739 make_shared<StlOutputStream>(make_unique<ofstream>(prPath.c_str(), ios::out));
740 if (profiler)
741 profiler->setPrecision(20);
742 if (verbose)
743 ApplicationTools::displayResult("Profiler", prPath);
744
745 bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, warn);
746 if (scaleFirst)
747 {
748 // We scale the tree before optimizing each branch length separately:
749 if (verbose)
750 ApplicationTools::displayMessage("Scaling the tree before optimizing each branch length separately.");
751 double tolerance = ApplicationTools::getDoubleParameter("optimization.scale_first.tolerance", params, .0001, suffix, suffixIsOptional, warn + 1);
752 if (verbose)
753 ApplicationTools::displayResult("Scaling tolerance", TextTools::toString(tolerance));
754 unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.scale_first.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
755 if (verbose)
756 ApplicationTools::displayResult("Scaling max # f eval", TextTools::toString(nbEvalMax));
758 tl,
759 tolerance,
760 nbEvalMax,
761 messageHandler,
762 profiler);
763 if (verbose)
764 ApplicationTools::displayResult("New tree likelihood", -tl->getValue());
765 }
766
767 // Should I ignore some parameters?
768 ParameterList parametersToEstimate = parameters;
769 vector<string> parNames = parametersToEstimate.getParameterNames();
770
771 string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", params, "", suffix, suffixIsOptional, warn + 1);
772 if (paramListDesc.length() == 0)
773 paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
774 StringTokenizer st(paramListDesc, ",");
775 while (st.hasMoreToken())
776 {
777 try
778 {
779 string param = st.nextToken();
780 if (param == "BrLen")
781 {
782 vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
783 parametersToEstimate.deleteParameters(vs);
784 if (verbose)
785 ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
786 }
787 else if (param == "Ancient")
788 {
789 auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
790 if (!nhtl)
791 ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
792 else
793 {
794 vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
795 parametersToEstimate.deleteParameters(vs);
796 }
797 if (verbose)
798 ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
799 }
800 else if (param == "Model")
801 {
802 vector<string> vs;
803 vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
804 auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
805 if (nhtl)
806 {
807 vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
808 VectorTools::diff(vs1, vs2, vs);
809 }
810 else
811 vs = vs1;
812
813 parametersToEstimate.deleteParameters(vs);
814 if (verbose)
815 ApplicationTools::displayResult("Parameter ignored", string("Model"));
816 }
817 else if (param == "*")
818 {
819 parametersToEstimate.reset();
820 if (verbose)
821 ApplicationTools::displayResult("Parameter ignored", string("All"));
822 }
823 else if (param.find("*") != string::npos)
824 {
825 vector<string> vs = ApplicationTools::matchingParameters(param, parNames);
826
827 bool verbhere = verbose;
828
829 if (vs.size() >= 20)
830 {
831 if (verbose)
832 ApplicationTools::displayResult("Number of parameters ignored", vs.size());
833 verbhere = false;
834 }
835
836 for (auto& it : vs)
837 {
838 parametersToEstimate.deleteParameter(it);
839 if (verbhere)
840 ApplicationTools::displayResult("Parameter ignored", it);
841 }
842 }
843 else
844 {
845 parametersToEstimate.deleteParameter(param);
846 if (verbose)
847 ApplicationTools::displayResult("Parameter ignored", param);
848 }
849 }
850 catch (ParameterNotFoundException& pnfe)
851 {
852 ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be ignored!");
853 }
854 }
855
856 // Should I constrain some parameters?
857 vector<string> parToEstNames = parametersToEstimate.getParameterNames();
858
859 paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameter", params, "", suffix, suffixIsOptional, warn);
860 if (paramListDesc.length() == 0)
861 paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameters", params, "", suffix, suffixIsOptional, warn);
862
863 string constraint = "";
864 string pc, param = "";
865
866 StringTokenizer st2(paramListDesc, ",");
867 while (st2.hasMoreToken())
868 {
869 try
870 {
871 pc = st2.nextToken();
872 string::size_type index = pc.find("=");
873 if (index == string::npos)
874 throw Exception("PhylogeneticsApplicationTools::optimizeParameters. Bad constrain syntax, should contain `=' symbol: " + pc);
875 param = pc.substr(0, index);
876 constraint = pc.substr(index + 1);
877 std::shared_ptr<IntervalConstraint> ic(new IntervalConstraint(constraint));
878
879 vector<string> parNames2;
880
881 if (param == "BrLen")
882 parNames2 = tl->getBranchLengthsParameters().getParameterNames();
883 else if (param == "Ancient")
884 {
885 auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
886 if (!nhtl)
887 ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
888 else
889 parNames2 = nhtl->getRootFrequenciesParameters().getParameterNames();
890 }
891 else if (param == "Model")
892 {
893 vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
894 auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
895 if (nhtl)
896 {
897 vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
898 VectorTools::diff(vs1, vs2, parNames2);
899 }
900 else
901 parNames2 = vs1;
902 }
903 else if (param == "*")
904 parNames2 = parToEstNames;
905 else if (param.find("*") != string::npos)
906 parNames2 = ApplicationTools::matchingParameters(param, parToEstNames);
907 else
908 parNames2.push_back(param);
909
910 for (size_t i = 0; i < parNames2.size(); ++i)
911 {
912 Parameter& par = parametersToEstimate.parameter(parNames2[i]);
913 if (par.hasConstraint())
914 {
915 par.setConstraint(std::shared_ptr<ConstraintInterface>(*ic & (*par.getConstraint())));
916 if (par.constraint().isEmpty())
917 throw Exception("Empty interval for parameter " + parNames[i] + par.constraint().getDescription());
918 }
919 else
920 par.setConstraint(ic);
921
922 if (verbose)
923 ApplicationTools::displayResult("Parameter constrained " + par.getName(), par.constraint().getDescription());
924 }
925 }
926 catch (ParameterNotFoundException& pnfe)
927 {
928 ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be constrained!");
929 }
930 catch (ConstraintException& pnfe)
931 {
932 throw Exception("Parameter '" + param + "' does not fit the constraint " + constraint);
933 }
934 }
935
936
937 // /////
938 // / optimization options
939
940 unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
941 if (verbose)
942 ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
943
944 double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
945 if (verbose)
947
948 // Backing up or restoring?
949 shared_ptr<BackupListener> backupListener;
950 string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false);
951 if (backupFile != "none")
952 {
953 ApplicationTools::displayResult("Parameters will be backup to", backupFile);
954 backupListener.reset(new BackupListener(backupFile));
955 if (FileTools::fileExists(backupFile))
956 {
957 ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
958 ifstream bck(backupFile.c_str(), ios::in);
959 vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
960 double fval = TextTools::toDouble(lines[0].substr(5));
961 ParameterList pl = tl->getParameters();
962 for (size_t l = 1; l < lines.size(); ++l)
963 {
964 if (!TextTools::isEmpty(lines[l]))
965 {
966 StringTokenizer stp(lines[l], "=");
967 if (stp.numberOfRemainingTokens() != 2)
968 {
969 cerr << "Corrupted backup file!!!" << endl;
970 cerr << "at line " << l << ": " << lines[l] << endl;
971 }
972 string pname = stp.nextToken();
973 string pvalue = stp.nextToken();
974 if (pl.hasParameter(pname))
975 {
976 size_t p = pl.whichParameterHasName(pname);
977 pl.setParameter(p, AutoParameter(pl[p]));
978 pl[p].setValue(TextTools::toDouble(pvalue));
979 }
980 else
981 ApplicationTools::displayMessage("Warning: unknown parameter in backup file : " + pname);
982 }
983 }
984 bck.close();
985 tl->setParameters(pl);
986 if (convert(abs(tl->getValue() - fval)) > 0.000001)
987 ApplicationTools::displayWarning("Warning, incorrect likelihood value after restoring from backup file.");
988 ApplicationTools::displayResult("Restoring log-likelihood", -tl->getValue());
989 }
990 }
991
992 // There it goes...
993 bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", params, false, suffix, suffixIsOptional, warn + 1);
994 if (verbose)
995 ApplicationTools::displayResult("Optimize topology", optimizeTopo ? "yes" : "no");
996 string nniMethod = ApplicationTools::getStringParameter("optimization.topology.algorithm_nni.method", params, "phyml", suffix, suffixIsOptional, warn + 1);
997 string nniAlgo;
998 if (nniMethod == "fast")
999 {
1000 nniAlgo = NNITopologySearch::FAST;
1001 }
1002 else if (nniMethod == "better")
1003 {
1004 nniAlgo = NNITopologySearch::BETTER;
1005 }
1006 else if (nniMethod == "phyml")
1007 {
1008 nniAlgo = NNITopologySearch::PHYML;
1009 }
1010 else
1011 throw Exception("Unknown NNI algorithm: '" + nniMethod + "'.");
1012
1013
1014 string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, warn + 1);
1015 string optMethodDeriv;
1016 if (order == "Gradient")
1017 {
1019 }
1020 else if (order == "Newton")
1021 {
1023 }
1024 else if (order == "BFGS")
1025 {
1026 optMethodDeriv = OptimizationTools::OPTIMIZATION_BFGS;
1027 }
1028 else
1029 throw Exception("Unknown derivatives algorithm: '" + order + "'.");
1030 if (verbose)
1031 ApplicationTools::displayResult("Optimization method", optName);
1032 if (verbose)
1033 ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
1034
1035 // See if we should reparametrize:
1036 bool reparam = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, warn + 1);
1037 if (verbose)
1038 ApplicationTools::displayResult("Reparametrization", (reparam ? "yes" : "no"));
1039
1040 // See if we should use a molecular clock constraint:
1041 string clock = ApplicationTools::getStringParameter("optimization.clock", params, "None", "", true, warn + 1);
1042 if (clock != "None" && clock != "Global")
1043 throw Exception("Molecular clock option not recognized, should be one of 'Global' or 'None'.");
1044 bool useClock = (clock == "Global");
1045 if (useClock && optimizeTopo)
1046 throw Exception("PhylogeneticsApplicationTools::optimizeParameters. Cannot optimize topology with a molecular clock.");
1047 if (verbose)
1048 ApplicationTools::displayResult("Molecular clock", clock);
1049
1050 unsigned int n = 0;
1051 if ((optName == "D-Brent") || (optName == "D-BFGS"))
1052 {
1053 // Uses Newton-Brent method or Newton-BFGS method
1054 string optMethodModel;
1055 if (optName == "D-Brent")
1057 else
1058 optMethodModel = OptimizationTools::OPTIMIZATION_BFGS;
1059
1060 unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
1061
1062 if (optimizeTopo)
1063 {
1064 bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1065 unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, warn + 1);
1066 double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1067 double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, warn + 1);
1069 dynamic_pointer_cast<NNIHomogeneousTreeLikelihood>(tl), parametersToEstimate,
1070 optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1071 reparam, optVerbose, optMethodDeriv, nstep, nniAlgo);
1072 }
1073
1074 if (verbose && nstep > 1)
1075 ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
1076 parametersToEstimate.matchParametersValues(tl->getParameters());
1078 dynamic_pointer_cast<DiscreteRatesAcrossSitesTreeLikelihoodInterface>(tl), parametersToEstimate,
1079 backupListener, nstep, tolerance, nbEvalMax, messageHandler, profiler, reparam, optVerbose, optMethodDeriv, optMethodModel);
1080 }
1081 else if (optName == "FullD")
1082 {
1083 // Uses Newton-raphson algorithm with numerical derivatives when required.
1084
1085 if (optimizeTopo)
1086 {
1087 bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1088 unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, warn + 1);
1089 double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1090 double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional, warn + 1);
1092 dynamic_pointer_cast<NNIHomogeneousTreeLikelihood>(tl), parametersToEstimate,
1093 optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1094 reparam, optVerbose, optMethodDeriv, nniAlgo);
1095 }
1096
1097 parametersToEstimate.matchParametersValues(tl->getParameters());
1099 dynamic_pointer_cast<DiscreteRatesAcrossSitesTreeLikelihoodInterface>(tl), parametersToEstimate,
1100 backupListener, tolerance, nbEvalMax, messageHandler, profiler, reparam, useClock, optVerbose, optMethodDeriv);
1101 }
1102 else
1103 throw Exception("Unknown optimization method: " + optName);
1104
1105 string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn);
1106 shared_ptr<OptimizerInterface> finalOptimizer = nullptr;
1107 if (finalMethod == "none")
1108 {}
1109 else if (finalMethod == "simplex")
1110 {
1111 finalOptimizer = make_shared<DownhillSimplexMethod>(tl);
1112 }
1113 else if (finalMethod == "powell")
1114 {
1115 finalOptimizer = make_shared<PowellMultiDimensions>(tl);
1116 }
1117 else
1118 throw Exception("Unknown final optimization method: " + finalMethod);
1119
1120 if (finalOptimizer)
1121 {
1122 parametersToEstimate.matchParametersValues(tl->getParameters());
1123 if (verbose)
1124 ApplicationTools::displayResult("Final optimization step", finalMethod);
1125 finalOptimizer->setProfiler(profiler);
1126 finalOptimizer->setMessageHandler(messageHandler);
1127 finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
1128 finalOptimizer->getStopCondition()->setTolerance(tolerance);
1129 finalOptimizer->setVerbose(verbose);
1130 finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
1131 finalOptimizer->init(parametersToEstimate);
1132 finalOptimizer->optimize();
1133 n += finalOptimizer->getNumberOfEvaluations();
1134 }
1135
1136 if (verbose)
1137 ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
1138 if (backupFile != "none")
1139 {
1140 string bf = backupFile + ".def";
1141 rename(backupFile.c_str(), bf.c_str());
1142 }
1143 return tl;
1144}
1145
1146
1147/******************************************************************************/
1148/**************** Output ************************************/
1149/******************************************************************************/
1150
1152 const vector<const Tree*>& trees,
1153 const map<string, string>& params,
1154 const string& prefix,
1155 const string& suffix,
1156 bool suffixIsOptional,
1157 bool verbose,
1158 bool checkOnly,
1159 int warn)
1160{
1161 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
1162 string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
1163 OMultiTree* treeWriter;
1164 if (format == "Newick")
1165 treeWriter = new Newick();
1166 else if (format == "Nexus")
1167 treeWriter = new NexusIOTree();
1168 else if (format == "NHX")
1169 treeWriter = new Nhx();
1170 else
1171 throw Exception("Unknown format for tree writing: " + format);
1172
1173 if (!checkOnly)
1174 treeWriter->writeTrees(trees, file, true);
1175
1176 delete treeWriter;
1177 if (verbose)
1178 ApplicationTools::displayResult("Wrote trees to file ", file);
1179}
1180
1181
1182/******************************************************************************/
1183
1185 const SubstitutionModelSet& modelSet,
1186 OutputStream& out,
1187 int warn,
1188 bool withAlias)
1189{
1190 (out << "nonhomogeneous=general").endLine();
1191 (out << "nonhomogeneous.number_of_models=" << modelSet.getNumberOfModels()).endLine();
1192
1193 if (modelSet.isStationary())
1194 (out << "nonhomogeneous.stationarity = yes");
1195
1196 // Get the parameter links:
1197 map< size_t, vector<string>> modelLinks; // for each model index, stores the list of global parameters.
1198 map< string, set<size_t>> parameterLinks; // for each parameter name, stores the list of model indices, which should be sorted.
1199 vector<string> writtenNames;
1200
1201 // Loop over all models:
1202 for (size_t i = 0; i < modelSet.getNumberOfModels(); ++i)
1203 {
1204 shared_ptr<const BranchModelInterface> model = modelSet.getModel(i);
1205
1206 // First get the aliases for this model:
1207
1208 map<string, string> aliases;
1209 ParameterList pl = model->getParameters();
1210
1211 if (withAlias)
1212 {
1213 for (size_t np = 0; np < pl.size(); np++)
1214 {
1215 string nfrom = modelSet.getFrom(pl[np].getName() + "_" + TextTools::toString(i + 1));
1216 if (nfrom != "")
1217 aliases[pl[np].getName()] = nfrom;
1218 }
1219 }
1220
1221 // Now print it:
1222 writtenNames.clear();
1223 out.endLine() << "model" << (i + 1) << "=";
1224 BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
1225 bIOsm.write(*model, out, aliases, writtenNames);
1226
1227 out.endLine();
1228 vector<int> ids = modelSet.getNodesWithModel(i);
1229 out << "model" << (i + 1) << ".nodes_id=" << ids[0];
1230 for (size_t j = 1; j < ids.size(); ++j)
1231 {
1232 out << "," << ids[j];
1233 }
1234 out.endLine();
1235 }
1236
1237 // First get the aliases for this frequencies set
1238
1239 if (!modelSet.isStationary())
1240 {
1241 const auto& pFS = modelSet.rootFrequencySet();
1242 ParameterList plf = pFS.getParameters();
1243
1244 map<string, string> aliases;
1245
1246 if (withAlias)
1247 {
1248 for (size_t np = 0; np < plf.size(); np++)
1249 {
1250 string nfrom = modelSet.getFrom(plf[np].getName());
1251 if (nfrom != "")
1252 aliases[plf[np].getName()] = nfrom;
1253 }
1254 }
1255
1256 // Root frequencies:
1257 out.endLine();
1258 (out << "# Root frequencies:").endLine();
1259 out << "nonhomogeneous.root_freq=";
1260
1262 bIO.writeFrequencySet(pFS, out, aliases, writtenNames);
1263 }
1264}
std::string getFrom(const std::string &name) const
void aliasParameters(const std::string &p1, const std::string &p2)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static std::shared_ptr< OutputStream > warning
static std::shared_ptr< OutputStream > message
static bool parameterExists(const std::string &parameterName, const std::map< std::string, std::string > &params)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static void displayBooleanResult(const std::string &text, bool result)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
static std::string CONSTRAINTS_AUTO
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Branch model I/O in BppO format.
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
const std::map< std::string, std::string > & getUnparsedArguments() const override
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true)
Interface for all Branch models.
virtual std::string getName() const =0
Get the name of the model.
virtual std::string getDescription() const=0
virtual bool isEmpty() const=0
static bool fileExists(const std::string &filename)
static std::vector< std::string > putStreamIntoVectorOfStrings(std::istream &input)
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static std::shared_ptr< NNIHomogeneousTreeLikelihood > optimizeTreeNNI2(std::shared_ptr< NNIHomogeneousTreeLikelihood > tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
static unsigned int optimizeNumericalParameters2(std::shared_ptr< DiscreteRatesAcrossSitesTreeLikelihoodInterface > tl, const ParameterList &parameters, std::shared_ptr< OptimizationListener > listener=nullptr, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, bool useClock=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static unsigned int optimizeTreeScale(std::shared_ptr< TreeLikelihoodInterface > tl, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, unsigned int verbose=1)
Optimize the scale of a TreeLikelihood.
static std::shared_ptr< NNIHomogeneousTreeLikelihood > optimizeTreeNNI(std::shared_ptr< NNIHomogeneousTreeLikelihood > tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, unsigned int nStep=1, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
static unsigned int optimizeNumericalParameters(std::shared_ptr< DiscreteRatesAcrossSitesTreeLikelihoodInterface > tl, const ParameterList &parameters, std::shared_ptr< OptimizationListener > listener=nullptr, unsigned int nstep=1, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON, const std::string &optMethodModel=OPTIMIZATION_BRENT)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static void setSubstitutionModelParametersInitialValuesWithAliases(BranchModelInterface &model, std::map< std::string, std::string > &unparsedParameterValues, size_t modelNumber, std::shared_ptr< const AlignmentDataInterface > data, std::map< std::string, std::string > &sharedParams, bool verbose)
Set parameter initial values of a given model in a set according to options.
static void completeMixedSubstitutionModelSet(MixedSubstitutionModelSet &mixedModelSet, std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Complete a MixedSubstitutionModelSet object according to options, given this model has already been f...
static std::unique_ptr< SubstitutionModelSet > getSubstitutionModelSet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gcode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Gets a SubstitutionModelSet object according to options.
static void setSubstitutionModelSet(SubstitutionModelSet &modelSet, std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gcode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Sets a SubstitutionModelSet object according to options.
static void writeTrees(const std::vector< const Tree * > &trees, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.
static void printParameters(const SubstitutionModelSet &modelSet, OutputStream &out, int warn=1, bool withAlias=true)
Output a SubstitutionModelSet description to a file.
static std::shared_ptr< TreeLikelihoodInterface > optimizeParameters(std::shared_ptr< TreeLikelihoodInterface > tl, const ParameterList &parameters, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Optimize parameters according to options.
bool isComplete() const
checks if this HyperNode includes at least a submodel of each mixed model
Substitution models manager for Mixed Substitution Models. This class inherits from SubstitutionModel...
void addToHyperNode(size_t nM, const Vuint &vnS, int nH=-1)
static const std::string PHYML
static const std::string FAST
static const std::string BETTER
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
a simple parser for reading trees from a Nexus file.
Definition: NexusIoTree.h:36
The so-called 'Nhx - New Hampshire eXtended' parenthetic format.
Definition: Nhx.h:62
General interface for tree writers.
Definition: IoTree.h:382
virtual void writeTrees(const std::vector< const Tree * > &trees, const std::string &path, bool overwrite) const =0
Write trees to a file.
static std::string OPTIMIZATION_NEWTON
static std::string OPTIMIZATION_BFGS
static std::string OPTIMIZATION_BRENT
static std::string OPTIMIZATION_GRADIENT
virtual OutputStream & endLine()=0
virtual const ParameterList & getIndependentParameters() const=0
virtual bool hasParameter(const std::string &name) const
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void reset()
virtual const Parameter & parameter(const std::string &name) const
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual void setParameter(size_t index, const Parameter &param)
virtual size_t whichParameterHasName(const std::string &name) const
virtual std::string parameter() const
virtual const ConstraintInterface & constraint() const
virtual std::shared_ptr< const ConstraintInterface > getConstraint() const
virtual void setConstraint(std::shared_ptr< ConstraintInterface > constraint)
virtual const std::string & getName() const
virtual bool hasConstraint() const
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual std::string getNamespace() const=0
static std::unique_ptr< FrequencySetInterface > getRootFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::vector< double > &rateFreqs, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Get A FrequencySet object for root frequencies (NH models) according to options.
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
Substitution models manager for non-homogeneous / non-reversible models of evolution.
const FrequencySetInterface & rootFrequencySet() const
void setRootFrequencies(std::shared_ptr< FrequencySetInterface > rootFreqs)
Sets a given FrequencySet for root frequencies.
void addModel(std::shared_ptr< TransitionModelInterface > model, const std::vector< int > &nodesId)
Add a new model to the set, and set relationships with nodes and params.
const std::vector< int > & getNodesWithModel(size_t i) const
Get a list of nodes id for which the given model is associated.
void clear()
Resets all the information contained in this object.
std::shared_ptr< const TransitionModelInterface > getModel(size_t i) const
Get one model from the set knowing its index.
Interface for all transition models.
static std::unique_ptr< TreeTemplate< Node > > getRandomTree(std::vector< std::string > &leavesNames, bool rooted=true)
Draw a random tree from a list of taxa, using a Yule process.
static double convertToClockTree(Tree &tree, int nodeId, bool noneg=false)
Modify a tree's branch lengths to make a clock tree, by rebalancing branch lengths.
Definition: TreeTools.cpp:598
static void constrainedMidPointRooting(Tree &tree)
Determine the mid-point position of the root along the branch that already contains the root....
Definition: TreeTools.cpp:1147
static void computeBranchLengthsGrafen(Tree &tree, double power=1, bool init=true)
Compute branch lengths using Grafen's method.
Definition: TreeTools.cpp:583
static double getHeight(const Tree &tree, int nodeId)
Get the height of the subtree defined by node 'node', i.e. the maximum distance between leaves and th...
Definition: TreeTools.cpp:172
static void diff(std::vector< T > &v1, std::vector< T > &v2, std::vector< T > &v3)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.
double convert(const bpp::ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)
std::vector< unsigned int > Vuint