bpp-phyl3  3.0.0
PhylogeneticsApplicationTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../../App/PhylogeneticsApplicationTools.h"
6 #include "../../Io/BppOBranchModelFormat.h"
7 #include "../../Io/BppOFrequencySetFormat.h"
8 #include "../../Io/BppOMultiTreeReaderFormat.h"
9 #include "../../Io/BppOSubstitutionModelFormat.h"
10 #include "../../Io/BppOTransitionModelFormat.h"
11 #include "../../Io/BppOTreeReaderFormat.h"
12 #include "../../Io/Newick.h"
13 #include "../../Io/NexusIoTree.h"
14 #include "../../Io/Nhx.h"
15 #include "../../Model/FrequencySet/MvaFrequencySet.h"
16 #include "../../Model/MixedTransitionModel.h"
17 #include "../../Model/Protein/Coala.h"
18 #include "../../Model/SubstitutionModel.h"
19 #include "../../Model/WrappedModel.h"
20 #include "../../Tree/Tree.h"
21 #include "../../Tree/TreeTools.h"
22 #include "../OptimizationTools.h"
24 
25 // From bpp-core
28 #include <Bpp/Io/FileTools.h>
29 #include <Bpp/Text/TextTools.h>
32 #include <Bpp/Text/KeyvalTools.h>
34 #include <Bpp/Numeric/DataTable.h>
41 
42 // From bpp-seq:
47 
48 using namespace bpp;
49 
50 // From the STL:
51 #include <fstream>
52 #include <memory>
53 #include <set>
54 #include <vector>
55 
56 using namespace std;
57 
58 /*************************************************************/
59 /***************** TREES ************************************/
60 /*************************************************************/
61 
62 
63 /******************************************************************************/
64 
65 
66 map<size_t, shared_ptr<Tree>> LegacyPhylogeneticsApplicationTools::getTrees(
67  const map<string, string>& params,
68  const map<size_t, std::shared_ptr<AlignmentDataInterface>>& mSeq,
69  map<string, string>& unparsedParams,
70  const string& prefix,
71  const string& suffix,
72  bool suffixIsOptional,
73  bool verbose,
74  int warn)
75 {
76  vector<string> vTreesName = ApplicationTools::matchingParameters(prefix + "tree*", params);
77 
78  map<size_t, shared_ptr<Tree>> mTree;
79 
80  for (size_t nT = 0; nT < vTreesName.size(); nT++)
81  {
82  size_t poseq = vTreesName[nT].find("=");
83  size_t num = 0;
84  size_t len = (prefix + "tree").size();
85  string suff = vTreesName[nT].substr(len, poseq - len);
86  bool flag = 0;
87  size_t nbTree = 1;
88 
89  if (TextTools::isDecimalInteger(suff, '$'))
90  num = static_cast<size_t>(TextTools::toInt(suff));
91  else
92  {
93  flag = 1;
94  num = 1;
95  }
96 
97  if (!flag)
98  {
101  }
102 
103  string treeDesc = ApplicationTools::getStringParameter(vTreesName[nT], params, "", suffix, suffixIsOptional);
104 
105  string treeName;
106 
107  map<string, string> args;
108 
109  KeyvalTools::parseProcedure(treeDesc, treeName, args);
110 
111  if (treeName == "user")
112  {
113  string format;
114 
115  if (args.find("format") != args.end())
116  format = args["format"];
117  else
118  {
119  format = "Newick";
120  ApplicationTools::displayWarning("Warning, " + vTreesName[nT] + " format set to Newick");
121  }
122 
123  string treeFilePath = ApplicationTools::getAFilePath("file", args, true, true, suffix, suffixIsOptional, "none", warn);
124 
125  unique_ptr<IMultiTree> treeReader;
126  if (format == "Newick")
127  treeReader.reset(new Newick(true));
128  else if (format == "Nexus")
129  treeReader.reset(new NexusIOTree());
130  else if (format == "NHX")
131  treeReader.reset(new Nhx());
132  else
133  throw Exception("Unknown format for tree reading: " + format);
134 
135  vector<unique_ptr<Tree>> trees;
136  treeReader->readTrees(treeFilePath, trees);
137 
138  if (verbose)
139  {
140  if (flag)
141  {
143  ApplicationTools::displayResult("Tree file", treeFilePath);
144  }
145 
146  ApplicationTools::displayResult("Number of trees in file", trees.size());
147  }
148 
149  if (flag)
150  {
151  nbTree = trees.size();
152 
153  for (size_t i2 = 0; i2 < trees.size(); i2++)
154  {
155  if (mTree.find(i2 + 1) != mTree.end())
156  {
157  ApplicationTools::displayWarning("Tree " + TextTools::toString(i2 + 1) + " already assigned, replaced by new one.");
158  }
159 
160  mTree[i2 + 1] = std::move(trees[i2]);
161  ApplicationTools::displayResult("Number of leaves", trees[i2]->getNumberOfLeaves());
162  }
163  }
164  else
165  {
166  if (trees.size() > 1)
167  throw Exception("Error : Several trees for description of " + vTreesName[nT] + ".");
168 
169  if (trees.size() == 1)
170  {
171  if (mTree.find(num) != mTree.end())
172  {
173  ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
174  }
175  mTree[num] = std::move(trees[0]);
176  ApplicationTools::displayResult("Number of leaves", trees[0]->getNumberOfLeaves());
177  }
178  }
179  }
180  else if (treeName == "random")
181  {
182  size_t seqNum;
183 
184  if (args.find("data") == args.end())
185  {
186  ApplicationTools::displayWarning("Random tree set from data 1");
187  seqNum = 1;
188  }
189  else
190  seqNum = (size_t) TextTools::toInt(args["data"]);
191 
192 
193  if (mSeq.find(seqNum) == mSeq.end())
194  throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
195 
196  vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
197  auto tree = TreeTemplateTools::getRandomTree(names);
198  tree->setBranchLengths(1.);
199 
200  if (mTree.find(num) != mTree.end())
201  {
202  ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
203  }
204  mTree[num] = std::move(tree);
205  ApplicationTools::displayResult("Number of leaves", tree->getNumberOfLeaves());
206  }
207 
208  // //////////
209  // Setting branch lengths?
210  string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", args, "Input", "", true, 1);
211  string cmdName;
212  map<string, string> cmdArgs;
213 
214  KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
215  if (cmdName == "Input")
216  {
217  // Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
218  string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, 2);
219  if (midPointRootBrLengths == "yes")
220  {
221  if (flag)
222  {
223  for (size_t i = 0; i < nbTree; i++)
224  {
226  }
227  }
228  else
230  }
231  }
232  else if (cmdName == "Equal")
233  {
234  double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, 2);
235  if (value <= 0)
236  throw Exception("Value for branch length must be superior to 0");
237  ApplicationTools::displayResult("Branch lengths set to", value);
238  if (flag)
239  {
240  for (size_t i = 0; i < nbTree; i++)
241  {
242  mTree[i + 1]->setBranchLengths(value);
243  }
244  }
245  else
246  mTree[num]->setBranchLengths(value);
247  }
248  else if (cmdName == "Clock")
249  {
250  if (flag)
251  {
252  for (size_t i = 0; i < nbTree; i++)
253  {
254  TreeTools::convertToClockTree(*mTree[i + 1], mTree[i + 1]->getRootId(), true);
255  }
256  }
257  else
258  TreeTools::convertToClockTree(*mTree[num], mTree[num]->getRootId(), true);
259  }
260  else if (cmdName == "Grafen")
261  {
262  string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, 2);
263  double h;
264  if (flag)
265  {
266  for (size_t i = 0; i < nbTree; i++)
267  {
268  auto& tree = *mTree[i + 1];
269  if (grafenHeight == "input")
270  {
271  h = TreeTools::getHeight(tree, tree.getRootId());
272  }
273  else
274  {
275  h = TextTools::toDouble(grafenHeight);
276  if (h <= 0)
277  throw Exception("Height must be positive in Grafen's method.");
278  }
280 
281  double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
282  ApplicationTools::displayResult("Grafen's rho", rho);
284  double nh = TreeTools::getHeight(tree, tree.getRootId());
285  tree.scaleTree(h / nh);
286  }
287  }
288  else
289  {
290  auto& tree = *mTree[num];
291  if (grafenHeight == "input")
292  {
293  h = TreeTools::getHeight(tree, tree.getRootId());
294  }
295  else
296  {
297  h = TextTools::toDouble(grafenHeight);
298  if (h <= 0)
299  throw Exception("Height must be positive in Grafen's method.");
300  }
302 
303  double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
304  ApplicationTools::displayResult("Grafen's rho", rho);
306  double nh = TreeTools::getHeight(tree, tree.getRootId());
307  tree.scaleTree(h / nh);
308  }
309  }
310  else
311  throw Exception("Method '" + initBrLenMethod + "' unknown for computing branch lengths.");
312 
313  // //////////// Setting branch lengths with aliases
314 
315  vector<string> vBrNb = ApplicationTools::matchingParameters("BrLen*", args);
316 
317  for (size_t ib = 0; ib < vBrNb.size(); ib++)
318  {
319  string apeq = args[vBrNb[ib]];
320  string aveq = vBrNb[ib];
321 
322  if (TextTools::isDecimalInteger(apeq))
323  mTree[num]->setDistanceToFather(TextTools::toInt(aveq.substr(5, string::npos)), TextTools::toDouble(apeq));
324  else
325  {
326  size_t posun = apeq.find("_");
327  size_t posd = aveq.find("_");
328  unparsedParams[aveq + (posd != string::npos ? "" : "_" + TextTools::toString(num))] = apeq + (posun != string::npos ? "" : "_" + TextTools::toString(num));
329  }
330  }
331 
332  ApplicationTools::displayResult("Branch lengths", cmdName);
333  }
334 
335  return mTree;
336 }
337 
338 
339 /******************************************************/
340 /**** SUBSTITUTION MODEL SET **************************/
341 /******************************************************/
342 
343 /******************************************************************************/
344 
346  BranchModelInterface& model,
347  map<string, string>& unparsedParameterValues,
348  size_t modelNumber,
349  std::shared_ptr<const AlignmentDataInterface> data,
350  map<string, string>& sharedParams,
351  bool verbose)
352 {
353  string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedParameterValues, "", "", true, 2);
354 
355  if (verbose)
356  ApplicationTools::displayResult("Frequencies Initialization for model", (initFreqs == "") ? "None" : initFreqs);
357 
358  if (initFreqs != "")
359  {
360  try
361  {
362  auto& tmodel = dynamic_cast<TransitionModelInterface&>(model);
363  if (initFreqs == "observed")
364  {
365  if (!data)
366  throw Exception("Missing data for observed frequencies");
367  unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedParameterValues, 0);
368  tmodel.setFreqFromData(*data, psi);
369  }
370  else if (initFreqs.substr(0, 6) == "values")
371  {
372  // Initialization using the "values" argument
373  map<int, double> frequencies;
374 
375  string rf = initFreqs.substr(6);
376  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
377  int i = 0;
378  while (strtok.hasMoreToken())
379  frequencies[i++] = TextTools::toDouble(strtok.nextToken());
380  tmodel.setFreq(frequencies);
381  }
382  else
383  throw Exception("Unknown initFreqs argument");
384  }
385  catch (exception& e)
386  {
387  ApplicationTools::displayMessage("Frequencies initialization not possible for model " + model.getName());
388  }
389  }
390 
392  for (size_t i = 0; i < pl.size(); ++i)
393  {
394  AutoParameter ap(pl[i]);
396  pl.setParameter(i, ap);
397  }
398  for (size_t i = 0; i < pl.size(); ++i)
399  {
400  const string pName = pl[i].getName();
401  size_t posp = model.getParameterNameWithoutNamespace(pName).rfind(".");
402  string value;
403  bool test1 = (initFreqs == "");
404  bool test2 = (model.getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) != "theta");
405  bool test3 = (unparsedParameterValues.find(pName) != unparsedParameterValues.end());
406 
407  if (test1 || test2 || test3)
408  {
409  if (!test1 && !test2 && test3)
410  ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
411 
412  value = ApplicationTools::getStringParameter(pName, unparsedParameterValues, TextTools::toString(pl[i].getValue()));
413 
414  try
415  {
416  pl[i].setValue(TextTools::toDouble(value));
417  if (verbose)
418  ApplicationTools::displayResult("Parameter found", pName + +"_" + TextTools::toString(modelNumber) + "=" + TextTools::toString(pl[i].getValue()));
419  }
420  catch (Exception& e)
421  {
422  sharedParams[pl[i].getName() + "_" + TextTools::toString(modelNumber)] = value;
423  }
424  }
425  }
426 
427  model.matchParametersValues(pl);
428 }
429 
430 /******************************************************************************/
431 
433  shared_ptr<const Alphabet> alphabet,
434  shared_ptr<const GeneticCode> gCode,
435  shared_ptr<const AlignmentDataInterface> data,
436  const map<string, string>& params,
437  const string& suffix,
438  bool suffixIsOptional,
439  bool verbose,
440  int warn)
441 {
442  if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
443  throw Exception("A value is needed for this parameter: nonhomogeneous.number_of_models .");
444  size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
445  if (nbModels == 0)
446  throw Exception("The number of models can't be 0 !");
447 
448  bool nomix = true;
449  for (size_t i = 0; nomix& (i < nbModels); i++)
450  {
451  string prefix = "model" + TextTools::toString(i + 1);
452  string modelDesc;
453  modelDesc = ApplicationTools::getStringParameter(prefix, params, "", suffix, suffixIsOptional, warn);
454 
455  if (modelDesc.find("Mixed") != string::npos)
456  nomix = false;
457  }
458 
459  unique_ptr<SubstitutionModelSet> modelSet = nullptr;
460  auto modelSet1 = make_unique<SubstitutionModelSet>(alphabet);
461  setSubstitutionModelSet(*modelSet1, alphabet, gCode, data, params, suffix, suffixIsOptional, verbose, warn);
462 
463  if (modelSet1->hasMixedTransitionModel())
464  {
465  modelSet = make_unique<MixedSubstitutionModelSet>(*modelSet1);
466  completeMixedSubstitutionModelSet(dynamic_cast<MixedSubstitutionModelSet&>(*modelSet), alphabet, data, params, suffix, suffixIsOptional, verbose);
467  }
468  else
469  modelSet = std::move(modelSet1);
470 
471  return modelSet;
472 }
473 
474 /******************************************************************************/
475 
477  SubstitutionModelSet& modelSet,
478  shared_ptr<const Alphabet> alphabet,
479  shared_ptr<const GeneticCode> gCode,
480  shared_ptr<const AlignmentDataInterface> data,
481  const map<string, string>& params,
482  const string& suffix,
483  bool suffixIsOptional,
484  bool verbose,
485  int warn)
486 {
487  modelSet.clear();
488  if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
489  throw Exception("You must specify this parameter: 'nonhomogeneous.number_of_models'.");
490  size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
491  if (nbModels == 0)
492  throw Exception("The number of models can't be 0 !");
493 
494  if (verbose)
495  ApplicationTools::displayResult("Number of distinct models", TextTools::toString(nbModels));
496 
497  BppOTransitionModelFormat bIO(BppOTransitionModelFormat::ALL, true, true, true, false, warn);
498 
499  // ///////////////////////////////////////////
500  // Build a new model set object:
501 
502  vector<double> rateFreqs;
503  string tmpDesc;
504  if (AlphabetTools::isCodonAlphabet(*alphabet))
505  {
506  if (!gCode)
507  throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet(): a GeneticCode instance is required for instantiating a codon model.");
508  bIO.setGeneticCode(gCode);
509  tmpDesc = ApplicationTools::getStringParameter("model1", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
510  }
511  else if (AlphabetTools::isWordAlphabet(*alphabet))
512  tmpDesc = ApplicationTools::getStringParameter("model1", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
513  else
514  tmpDesc = ApplicationTools::getStringParameter("model1", params, "JC69", suffix, suffixIsOptional, warn);
515 
516  std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
517  mData[1]=data;
518 
519  shared_ptr<TransitionModelInterface> tmp = bIO.readTransitionModel(alphabet, tmpDesc, mData, 1, false);
520 
521  if (tmp->getNumberOfStates() != alphabet->getSize())
522  {
523  // Markov-Modulated Markov Model...
524 
525  size_t n = static_cast<size_t>(tmp->getNumberOfStates() / alphabet->getSize());
526  rateFreqs = vector<double>(n, 1. / static_cast<double>(n));
527  // Equal rates assumed for now, may be changed later
528  }
529 
530  // ////////////////////////////////////
531  // Deal with root frequencies
532 
533  map<string, string> unparsedParameters;
534 
535  bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", params, false, "", true, warn);
536  std::shared_ptr<FrequencySetInterface> rootFrequencies(0);
537  if (!stationarity)
538  {
539  rootFrequencies = PhylogeneticsApplicationTools::getRootFrequencySet(alphabet, gCode, mData, 1, params, unparsedParameters, rateFreqs, suffix, suffixIsOptional, verbose);
540  stationarity = !rootFrequencies;
541  string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "", suffix, suffixIsOptional, warn);
542  if (freqDescription.substr(0, 10) == "MVAprotein")
543  {
544  auto tmp2 = dynamic_pointer_cast<Coala>(tmp);
545  if (tmp2)
546  dynamic_pointer_cast<MvaFrequencySet>(rootFrequencies)->initSet(*tmp2);
547  else
548  throw Exception("The MVAprotein frequencies set at the root can only be used if a Coala model is used on branches.");
549  }
550  }
551 
552  ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
553 
554  if (!stationarity)
555  modelSet.setRootFrequencies(rootFrequencies);
556 
557 
558  // //////////////////////////////////////
559  // Now parse all models:
560 
561  bIO.setVerbose(true);
562 
563  for (size_t i = 0; i < nbModels; ++i)
564  {
565  string prefix = "model" + TextTools::toString(i + 1);
566  string modelDesc;
567  if (AlphabetTools::isCodonAlphabet(*alphabet))
568  modelDesc = ApplicationTools::getStringParameter(prefix, params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
569  else if (AlphabetTools::isWordAlphabet(*alphabet))
570  modelDesc = ApplicationTools::getStringParameter(prefix, params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
571  else
572  modelDesc = ApplicationTools::getStringParameter(prefix, params, "JC69", suffix, suffixIsOptional, warn);
573 
574  auto model = bIO.readTransitionModel(alphabet, modelDesc, mData, 1, false);
575 
576  map<string, string> unparsedModelParameters = bIO.getUnparsedArguments();
577  map<string, string> sharedParameters;
578 
579 
580  setSubstitutionModelParametersInitialValuesWithAliases(
581  *model,
582  unparsedModelParameters, i + 1, data,
583  sharedParameters,
584  verbose);
585 
586  unparsedParameters.insert(sharedParameters.begin(), sharedParameters.end());
587 
588  vector<int> nodesId = ApplicationTools::getVectorParameter<int>(prefix + ".nodes_id", params, ',', ':', "", suffix, suffixIsOptional, warn);
589 
590  if (verbose)
591  ApplicationTools::displayResult("Model" + TextTools::toString(i + 1) + " is associated to", TextTools::toString(nodesId.size()) + " node(s).");
592 
593  modelSet.addModel(std::move(model), nodesId);
594  }
595 
596  // Finally check parameter aliasing:
597 
598  string aliasDesc = ApplicationTools::getStringParameter("nonhomogeneous.alias", params, "", suffix, suffixIsOptional, warn);
599  StringTokenizer st(aliasDesc, ",");
600  while (st.hasMoreToken())
601  {
602  string alias = st.nextToken();
603  string::size_type index = alias.find("->");
604  if (index == string::npos)
605  throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet. Bad alias syntax, should contain `->' symbol: " + alias);
606  unparsedParameters[alias.substr(0, index)] = alias.substr(index + 2);
607  }
608 
609  // alias unparsedParameters
610 
611  modelSet.aliasParameters(unparsedParameters, verbose);
612 }
613 
614 /******************************************************************************/
615 
617  MixedSubstitutionModelSet& mixedModelSet,
618  shared_ptr<const Alphabet> alphabet,
619  shared_ptr<const AlignmentDataInterface> data,
620  const map<string, string>& params,
621  const string& suffix,
622  bool suffixIsOptional,
623  bool verbose,
624  int warn)
625 {
626  // /////////////////////////////////////////
627  // Looks for the allowed paths
628 
629  size_t numd;
630  if (!ApplicationTools::parameterExists("site.number_of_paths", params))
631  numd = 0;
632  else
633  numd = ApplicationTools::getParameter<size_t>("site.number_of_paths", params, 1, suffix, suffixIsOptional, warn);
634 
635  if (verbose)
636  ApplicationTools::displayResult("Number of distinct paths", TextTools::toString(numd));
637 
638  vector<string> vdesc;
639  while (numd)
640  {
641  string desc = ApplicationTools::getStringParameter("site.path" + TextTools::toString(numd), params, "", suffix, suffixIsOptional, warn);
642  if (desc.size() == 0)
643  break;
644  else
645  vdesc.push_back(desc);
646  numd--;
647  }
648 
649  if (vdesc.size() == 0)
650  {
651  mixedModelSet.complete();
652  mixedModelSet.computeHyperNodesProbabilities();
653  return;
654  }
655 
656  for (vector<string>::iterator it(vdesc.begin()); it != vdesc.end(); it++)
657  {
658  mixedModelSet.addEmptyHyperNode();
659  StringTokenizer st(*it, "&");
660  while (st.hasMoreToken())
661  {
662  string submodel = st.nextToken();
663  string::size_type indexo = submodel.find("[");
664  string::size_type indexf = submodel.find("]");
665  if ((indexo == string::npos) | (indexf == string::npos))
666  throw Exception("PhylogeneticsApplicationTools::setMixedSubstitutionModelSet. Bad path syntax, should contain `[]' symbols: " + submodel);
667  int num = TextTools::toInt(submodel.substr(5, indexo - 5));
668  string p2 = submodel.substr(indexo + 1, indexf - indexo - 1);
669 
670  auto pSM = dynamic_pointer_cast<const MixedTransitionModelInterface>(mixedModelSet.getModel(static_cast<size_t>(num - 1)));
671  if (!pSM)
672  throw BadIntegerException("LegacyPhylogeneticsApplicationTools::setMixedSubstitutionModelSet: Wrong model for number", num - 1);
673  Vuint submodnb = pSM->getSubmodelNumbers(p2);
674 
675  mixedModelSet.addToHyperNode(static_cast<size_t>(num - 1), submodnb);
676  }
677 
678  if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
679  throw Exception("A path should own at least a submodel of each mixed model: " + *it);
680 
681  if (verbose)
682  ApplicationTools::displayResult("Site Path", *it);
683  }
684 
685  // / Checks if the paths are separate
686  if (!mixedModelSet.hasExclusivePaths())
687  throw Exception("All paths must be disjoint.");
688 
689  // / Puts all the remaining models in a new path
690  string st;
691  st = (mixedModelSet.complete()) ? "Yes" : "No";
692 
693  if (verbose)
694  ApplicationTools::displayResult("Site Path Completion", st);
695 
696  mixedModelSet.computeHyperNodesProbabilities();
697 
698  if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
699  throw Exception("The remaining submodels can not create a complete path.");
700 }
701 
702 
703 /*************************************************************/
704 /***** OPTIMIZATORS *****************************************/
705 /*************************************************************/
706 
707 /******************************************************************************/
708 
710  shared_ptr<TreeLikelihoodInterface> tl,
711  const ParameterList& parameters,
712  const map<string, string>& params,
713  const string& suffix,
714  bool suffixIsOptional,
715  bool verbose,
716  int warn)
717 {
718  string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
719  if (optimization == "None")
720  return tl;
721  string optName;
722  map<string, string> optArgs;
723  KeyvalTools::parseProcedure(optimization, optName, optArgs);
724 
725  unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn);
726 
727  string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional);
728  shared_ptr<OutputStream> messageHandler =
729  (mhPath == "none") ? nullptr :
730  (mhPath == "std") ? ApplicationTools::message :
731  make_shared<StlOutputStream>(make_unique<ofstream>(mhPath.c_str(), ios::out));
732  if (verbose)
733  ApplicationTools::displayResult("Message handler", mhPath);
734 
735  string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional);
736  shared_ptr<OutputStream> profiler =
737  (prPath == "none") ? nullptr :
738  (prPath == "std") ? ApplicationTools::message :
739  make_shared<StlOutputStream>(make_unique<ofstream>(prPath.c_str(), ios::out));
740  if (profiler)
741  profiler->setPrecision(20);
742  if (verbose)
743  ApplicationTools::displayResult("Profiler", prPath);
744 
745  bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, warn);
746  if (scaleFirst)
747  {
748  // We scale the tree before optimizing each branch length separately:
749  if (verbose)
750  ApplicationTools::displayMessage("Scaling the tree before optimizing each branch length separately.");
751  double tolerance = ApplicationTools::getDoubleParameter("optimization.scale_first.tolerance", params, .0001, suffix, suffixIsOptional, warn + 1);
752  if (verbose)
753  ApplicationTools::displayResult("Scaling tolerance", TextTools::toString(tolerance));
754  unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.scale_first.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
755  if (verbose)
756  ApplicationTools::displayResult("Scaling max # f eval", TextTools::toString(nbEvalMax));
758  tl,
759  tolerance,
760  nbEvalMax,
761  messageHandler,
762  profiler);
763  if (verbose)
764  ApplicationTools::displayResult("New tree likelihood", -tl->getValue());
765  }
766 
767  // Should I ignore some parameters?
768  ParameterList parametersToEstimate = parameters;
769  vector<string> parNames = parametersToEstimate.getParameterNames();
770 
771  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", params, "", suffix, suffixIsOptional, warn + 1);
772  if (paramListDesc.length() == 0)
773  paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
774  StringTokenizer st(paramListDesc, ",");
775  while (st.hasMoreToken())
776  {
777  try
778  {
779  string param = st.nextToken();
780  if (param == "BrLen")
781  {
782  vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
783  parametersToEstimate.deleteParameters(vs);
784  if (verbose)
785  ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
786  }
787  else if (param == "Ancient")
788  {
789  auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
790  if (!nhtl)
791  ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
792  else
793  {
794  vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
795  parametersToEstimate.deleteParameters(vs);
796  }
797  if (verbose)
798  ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
799  }
800  else if (param == "Model")
801  {
802  vector<string> vs;
803  vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
804  auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
805  if (nhtl)
806  {
807  vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
808  VectorTools::diff(vs1, vs2, vs);
809  }
810  else
811  vs = vs1;
812 
813  parametersToEstimate.deleteParameters(vs);
814  if (verbose)
815  ApplicationTools::displayResult("Parameter ignored", string("Model"));
816  }
817  else if (param == "*")
818  {
819  parametersToEstimate.reset();
820  if (verbose)
821  ApplicationTools::displayResult("Parameter ignored", string("All"));
822  }
823  else if (param.find("*") != string::npos)
824  {
825  vector<string> vs = ApplicationTools::matchingParameters(param, parNames);
826 
827  bool verbhere = verbose;
828 
829  if (vs.size() >= 20)
830  {
831  if (verbose)
832  ApplicationTools::displayResult("Number of parameters ignored", vs.size());
833  verbhere = false;
834  }
835 
836  for (auto& it : vs)
837  {
838  parametersToEstimate.deleteParameter(it);
839  if (verbhere)
840  ApplicationTools::displayResult("Parameter ignored", it);
841  }
842  }
843  else
844  {
845  parametersToEstimate.deleteParameter(param);
846  if (verbose)
847  ApplicationTools::displayResult("Parameter ignored", param);
848  }
849  }
850  catch (ParameterNotFoundException& pnfe)
851  {
852  ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be ignored!");
853  }
854  }
855 
856  // Should I constrain some parameters?
857  vector<string> parToEstNames = parametersToEstimate.getParameterNames();
858 
859  paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameter", params, "", suffix, suffixIsOptional, warn);
860  if (paramListDesc.length() == 0)
861  paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameters", params, "", suffix, suffixIsOptional, warn);
862 
863  string constraint = "";
864  string pc, param = "";
865 
866  StringTokenizer st2(paramListDesc, ",");
867  while (st2.hasMoreToken())
868  {
869  try
870  {
871  pc = st2.nextToken();
872  string::size_type index = pc.find("=");
873  if (index == string::npos)
874  throw Exception("PhylogeneticsApplicationTools::optimizeParameters. Bad constrain syntax, should contain `=' symbol: " + pc);
875  param = pc.substr(0, index);
876  constraint = pc.substr(index + 1);
877  std::shared_ptr<IntervalConstraint> ic(new IntervalConstraint(constraint));
878 
879  vector<string> parNames2;
880 
881  if (param == "BrLen")
882  parNames2 = tl->getBranchLengthsParameters().getParameterNames();
883  else if (param == "Ancient")
884  {
885  auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
886  if (!nhtl)
887  ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
888  else
889  parNames2 = nhtl->getRootFrequenciesParameters().getParameterNames();
890  }
891  else if (param == "Model")
892  {
893  vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
894  auto nhtl = dynamic_pointer_cast<NonHomogeneousTreeLikelihood>(tl);
895  if (nhtl)
896  {
897  vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
898  VectorTools::diff(vs1, vs2, parNames2);
899  }
900  else
901  parNames2 = vs1;
902  }
903  else if (param == "*")
904  parNames2 = parToEstNames;
905  else if (param.find("*") != string::npos)
906  parNames2 = ApplicationTools::matchingParameters(param, parToEstNames);
907  else
908  parNames2.push_back(param);
909 
910  for (size_t i = 0; i < parNames2.size(); ++i)
911  {
912  Parameter& par = parametersToEstimate.parameter(parNames2[i]);
913  if (par.hasConstraint())
914  {
915  par.setConstraint(std::shared_ptr<ConstraintInterface>(*ic & (*par.getConstraint())));
916  if (par.constraint().isEmpty())
917  throw Exception("Empty interval for parameter " + parNames[i] + par.constraint().getDescription());
918  }
919  else
920  par.setConstraint(ic);
921 
922  if (verbose)
923  ApplicationTools::displayResult("Parameter constrained " + par.getName(), par.constraint().getDescription());
924  }
925  }
926  catch (ParameterNotFoundException& pnfe)
927  {
928  ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be constrained!");
929  }
930  catch (ConstraintException& pnfe)
931  {
932  throw Exception("Parameter '" + param + "' does not fit the constraint " + constraint);
933  }
934  }
935 
936 
937  // /////
938  // / optimization options
939 
940  unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
941  if (verbose)
942  ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
943 
944  double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
945  if (verbose)
946  ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
947 
948  // Backing up or restoring?
949  shared_ptr<BackupListener> backupListener;
950  string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false);
951  if (backupFile != "none")
952  {
953  ApplicationTools::displayResult("Parameters will be backup to", backupFile);
954  backupListener.reset(new BackupListener(backupFile));
955  if (FileTools::fileExists(backupFile))
956  {
957  ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
958  ifstream bck(backupFile.c_str(), ios::in);
959  vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
960  double fval = TextTools::toDouble(lines[0].substr(5));
961  ParameterList pl = tl->getParameters();
962  for (size_t l = 1; l < lines.size(); ++l)
963  {
964  if (!TextTools::isEmpty(lines[l]))
965  {
966  StringTokenizer stp(lines[l], "=");
967  if (stp.numberOfRemainingTokens() != 2)
968  {
969  cerr << "Corrupted backup file!!!" << endl;
970  cerr << "at line " << l << ": " << lines[l] << endl;
971  }
972  string pname = stp.nextToken();
973  string pvalue = stp.nextToken();
974  if (pl.hasParameter(pname))
975  {
976  size_t p = pl.whichParameterHasName(pname);
977  pl.setParameter(p, AutoParameter(pl[p]));
978  pl[p].setValue(TextTools::toDouble(pvalue));
979  }
980  else
981  ApplicationTools::displayMessage("Warning: unknown parameter in backup file : " + pname);
982  }
983  }
984  bck.close();
985  tl->setParameters(pl);
986  if (convert(abs(tl->getValue() - fval)) > 0.000001)
987  ApplicationTools::displayWarning("Warning, incorrect likelihood value after restoring from backup file.");
988  ApplicationTools::displayResult("Restoring log-likelihood", -tl->getValue());
989  }
990  }
991 
992  // There it goes...
993  bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", params, false, suffix, suffixIsOptional, warn + 1);
994  if (verbose)
995  ApplicationTools::displayResult("Optimize topology", optimizeTopo ? "yes" : "no");
996  string nniMethod = ApplicationTools::getStringParameter("optimization.topology.algorithm_nni.method", params, "phyml", suffix, suffixIsOptional, warn + 1);
997  string nniAlgo;
998  if (nniMethod == "fast")
999  {
1000  nniAlgo = NNITopologySearch::FAST;
1001  }
1002  else if (nniMethod == "better")
1003  {
1004  nniAlgo = NNITopologySearch::BETTER;
1005  }
1006  else if (nniMethod == "phyml")
1007  {
1008  nniAlgo = NNITopologySearch::PHYML;
1009  }
1010  else
1011  throw Exception("Unknown NNI algorithm: '" + nniMethod + "'.");
1012 
1013 
1014  string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, warn + 1);
1015  string optMethodDeriv;
1016  if (order == "Gradient")
1017  {
1019  }
1020  else if (order == "Newton")
1021  {
1022  optMethodDeriv = OptimizationTools::OPTIMIZATION_NEWTON;
1023  }
1024  else if (order == "BFGS")
1025  {
1026  optMethodDeriv = OptimizationTools::OPTIMIZATION_BFGS;
1027  }
1028  else
1029  throw Exception("Unknown derivatives algorithm: '" + order + "'.");
1030  if (verbose)
1031  ApplicationTools::displayResult("Optimization method", optName);
1032  if (verbose)
1033  ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
1034 
1035  // See if we should reparametrize:
1036  bool reparam = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, warn + 1);
1037  if (verbose)
1038  ApplicationTools::displayResult("Reparametrization", (reparam ? "yes" : "no"));
1039 
1040  // See if we should use a molecular clock constraint:
1041  string clock = ApplicationTools::getStringParameter("optimization.clock", params, "None", "", true, warn + 1);
1042  if (clock != "None" && clock != "Global")
1043  throw Exception("Molecular clock option not recognized, should be one of 'Global' or 'None'.");
1044  bool useClock = (clock == "Global");
1045  if (useClock && optimizeTopo)
1046  throw Exception("PhylogeneticsApplicationTools::optimizeParameters. Cannot optimize topology with a molecular clock.");
1047  if (verbose)
1048  ApplicationTools::displayResult("Molecular clock", clock);
1049 
1050  unsigned int n = 0;
1051  if ((optName == "D-Brent") || (optName == "D-BFGS"))
1052  {
1053  // Uses Newton-Brent method or Newton-BFGS method
1054  string optMethodModel;
1055  if (optName == "D-Brent")
1056  optMethodModel = OptimizationTools::OPTIMIZATION_BRENT;
1057  else
1058  optMethodModel = OptimizationTools::OPTIMIZATION_BFGS;
1059 
1060  unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
1061 
1062  if (optimizeTopo)
1063  {
1064  bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1065  unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, warn + 1);
1066  double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1067  double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, warn + 1);
1069  dynamic_pointer_cast<NNIHomogeneousTreeLikelihood>(tl), parametersToEstimate,
1070  optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1071  reparam, optVerbose, optMethodDeriv, nstep, nniAlgo);
1072  }
1073 
1074  if (verbose && nstep > 1)
1075  ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
1076  parametersToEstimate.matchParametersValues(tl->getParameters());
1078  dynamic_pointer_cast<DiscreteRatesAcrossSitesTreeLikelihoodInterface>(tl), parametersToEstimate,
1079  backupListener, nstep, tolerance, nbEvalMax, messageHandler, profiler, reparam, optVerbose, optMethodDeriv, optMethodModel);
1080  }
1081  else if (optName == "FullD")
1082  {
1083  // Uses Newton-raphson algorithm with numerical derivatives when required.
1084 
1085  if (optimizeTopo)
1086  {
1087  bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1088  unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, "", true, warn + 1);
1089  double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1090  double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional, warn + 1);
1092  dynamic_pointer_cast<NNIHomogeneousTreeLikelihood>(tl), parametersToEstimate,
1093  optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1094  reparam, optVerbose, optMethodDeriv, nniAlgo);
1095  }
1096 
1097  parametersToEstimate.matchParametersValues(tl->getParameters());
1099  dynamic_pointer_cast<DiscreteRatesAcrossSitesTreeLikelihoodInterface>(tl), parametersToEstimate,
1100  backupListener, tolerance, nbEvalMax, messageHandler, profiler, reparam, useClock, optVerbose, optMethodDeriv);
1101  }
1102  else
1103  throw Exception("Unknown optimization method: " + optName);
1104 
1105  string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn);
1106  shared_ptr<OptimizerInterface> finalOptimizer = nullptr;
1107  if (finalMethod == "none")
1108  {}
1109  else if (finalMethod == "simplex")
1110  {
1111  finalOptimizer = make_shared<DownhillSimplexMethod>(tl);
1112  }
1113  else if (finalMethod == "powell")
1114  {
1115  finalOptimizer = make_shared<PowellMultiDimensions>(tl);
1116  }
1117  else
1118  throw Exception("Unknown final optimization method: " + finalMethod);
1119 
1120  if (finalOptimizer)
1121  {
1122  parametersToEstimate.matchParametersValues(tl->getParameters());
1123  if (verbose)
1124  ApplicationTools::displayResult("Final optimization step", finalMethod);
1125  finalOptimizer->setProfiler(profiler);
1126  finalOptimizer->setMessageHandler(messageHandler);
1127  finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
1128  finalOptimizer->getStopCondition()->setTolerance(tolerance);
1129  finalOptimizer->setVerbose(verbose);
1130  finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
1131  finalOptimizer->init(parametersToEstimate);
1132  finalOptimizer->optimize();
1133  n += finalOptimizer->getNumberOfEvaluations();
1134  }
1135 
1136  if (verbose)
1137  ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
1138  if (backupFile != "none")
1139  {
1140  string bf = backupFile + ".def";
1141  rename(backupFile.c_str(), bf.c_str());
1142  }
1143  return tl;
1144 }
1145 
1146 
1147 /******************************************************************************/
1148 /**************** Output ************************************/
1149 /******************************************************************************/
1150 
1152  const vector<const Tree*>& trees,
1153  const map<string, string>& params,
1154  const string& prefix,
1155  const string& suffix,
1156  bool suffixIsOptional,
1157  bool verbose,
1158  bool checkOnly,
1159  int warn)
1160 {
1161  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
1162  string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
1163  OMultiTree* treeWriter;
1164  if (format == "Newick")
1165  treeWriter = new Newick();
1166  else if (format == "Nexus")
1167  treeWriter = new NexusIOTree();
1168  else if (format == "NHX")
1169  treeWriter = new Nhx();
1170  else
1171  throw Exception("Unknown format for tree writing: " + format);
1172 
1173  if (!checkOnly)
1174  treeWriter->writeTrees(trees, file, true);
1175 
1176  delete treeWriter;
1177  if (verbose)
1178  ApplicationTools::displayResult("Wrote trees to file ", file);
1179 }
1180 
1181 
1182 /******************************************************************************/
1183 
1185  const SubstitutionModelSet& modelSet,
1186  OutputStream& out,
1187  int warn,
1188  bool withAlias)
1189 {
1190  (out << "nonhomogeneous=general").endLine();
1191  (out << "nonhomogeneous.number_of_models=" << modelSet.getNumberOfModels()).endLine();
1192 
1193  if (modelSet.isStationary())
1194  (out << "nonhomogeneous.stationarity = yes");
1195 
1196  // Get the parameter links:
1197  map< size_t, vector<string>> modelLinks; // for each model index, stores the list of global parameters.
1198  map< string, set<size_t>> parameterLinks; // for each parameter name, stores the list of model indices, which should be sorted.
1199  vector<string> writtenNames;
1200 
1201  // Loop over all models:
1202  for (size_t i = 0; i < modelSet.getNumberOfModels(); ++i)
1203  {
1204  shared_ptr<const BranchModelInterface> model = modelSet.getModel(i);
1205 
1206  // First get the aliases for this model:
1207 
1208  map<string, string> aliases;
1209  ParameterList pl = model->getParameters();
1210 
1211  if (withAlias)
1212  {
1213  for (size_t np = 0; np < pl.size(); np++)
1214  {
1215  string nfrom = modelSet.getFrom(pl[np].getName() + "_" + TextTools::toString(i + 1));
1216  if (nfrom != "")
1217  aliases[pl[np].getName()] = nfrom;
1218  }
1219  }
1220 
1221  // Now print it:
1222  writtenNames.clear();
1223  out.endLine() << "model" << (i + 1) << "=";
1224  BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
1225  bIOsm.write(*model, out, aliases, writtenNames);
1226 
1227  out.endLine();
1228  vector<int> ids = modelSet.getNodesWithModel(i);
1229  out << "model" << (i + 1) << ".nodes_id=" << ids[0];
1230  for (size_t j = 1; j < ids.size(); ++j)
1231  {
1232  out << "," << ids[j];
1233  }
1234  out.endLine();
1235  }
1236 
1237  // First get the aliases for this frequencies set
1238 
1239  if (!modelSet.isStationary())
1240  {
1241  const auto& pFS = modelSet.rootFrequencySet();
1242  ParameterList plf = pFS.getParameters();
1243 
1244  map<string, string> aliases;
1245 
1246  if (withAlias)
1247  {
1248  for (size_t np = 0; np < plf.size(); np++)
1249  {
1250  string nfrom = modelSet.getFrom(plf[np].getName());
1251  if (nfrom != "")
1252  aliases[plf[np].getName()] = nfrom;
1253  }
1254  }
1255 
1256  // Root frequencies:
1257  out.endLine();
1258  (out << "# Root frequencies:").endLine();
1259  out << "nonhomogeneous.root_freq=";
1260 
1262  bIO.writeFrequencySet(pFS, out, aliases, writtenNames);
1263  }
1264 }
std::string getFrom(const std::string &name) const
void aliasParameters(const std::string &p1, const std::string &p2)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static std::shared_ptr< OutputStream > warning
static std::shared_ptr< OutputStream > message
static bool parameterExists(const std::string &parameterName, const std::map< std::string, std::string > &params)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static void displayBooleanResult(const std::string &text, bool result)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
static std::string CONSTRAINTS_AUTO
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
Branch model I/O in BppO format.
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
Transition model I/O in BppO format.
std::unique_ptr< TransitionModelInterface > readTransitionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true)
Interface for all Branch models.
virtual std::string getName() const =0
Get the name of the model.
virtual std::string getDescription() const=0
virtual bool isEmpty() const=0
static bool fileExists(const std::string &filename)
static std::vector< std::string > putStreamIntoVectorOfStrings(std::istream &input)
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static std::shared_ptr< NNIHomogeneousTreeLikelihood > optimizeTreeNNI2(std::shared_ptr< NNIHomogeneousTreeLikelihood > tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
static unsigned int optimizeNumericalParameters2(std::shared_ptr< DiscreteRatesAcrossSitesTreeLikelihoodInterface > tl, const ParameterList &parameters, std::shared_ptr< OptimizationListener > listener=nullptr, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, bool useClock=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static unsigned int optimizeTreeScale(std::shared_ptr< TreeLikelihoodInterface > tl, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, unsigned int verbose=1)
Optimize the scale of a TreeLikelihood.
static std::shared_ptr< NNIHomogeneousTreeLikelihood > optimizeTreeNNI(std::shared_ptr< NNIHomogeneousTreeLikelihood > tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, unsigned int nStep=1, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
static unsigned int optimizeNumericalParameters(std::shared_ptr< DiscreteRatesAcrossSitesTreeLikelihoodInterface > tl, const ParameterList &parameters, std::shared_ptr< OptimizationListener > listener=nullptr, unsigned int nstep=1, double tolerance=0.000001, unsigned int tlEvalMax=1000000, std::shared_ptr< OutputStream > messageHandler=ApplicationTools::message, std::shared_ptr< OutputStream > profiler=ApplicationTools::message, bool reparametrization=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON, const std::string &optMethodModel=OPTIMIZATION_BRENT)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static void setSubstitutionModelParametersInitialValuesWithAliases(BranchModelInterface &model, std::map< std::string, std::string > &unparsedParameterValues, size_t modelNumber, std::shared_ptr< const AlignmentDataInterface > data, std::map< std::string, std::string > &sharedParams, bool verbose)
Set parameter initial values of a given model in a set according to options.
static std::map< size_t, std::shared_ptr< Tree > > getTrees(const std::map< std::string, std::string > &params, const std::map< size_t, std::shared_ptr< AlignmentDataInterface >> &mSeq, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a list ofTree objects according to options.
static void completeMixedSubstitutionModelSet(MixedSubstitutionModelSet &mixedModelSet, std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Complete a MixedSubstitutionModelSet object according to options, given this model has already been f...
static std::unique_ptr< SubstitutionModelSet > getSubstitutionModelSet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gcode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Gets a SubstitutionModelSet object according to options.
static void setSubstitutionModelSet(SubstitutionModelSet &modelSet, std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gcode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Sets a SubstitutionModelSet object according to options.
static void writeTrees(const std::vector< const Tree * > &trees, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.
static void printParameters(const SubstitutionModelSet &modelSet, OutputStream &out, int warn=1, bool withAlias=true)
Output a SubstitutionModelSet description to a file.
static std::shared_ptr< TreeLikelihoodInterface > optimizeParameters(std::shared_ptr< TreeLikelihoodInterface > tl, const ParameterList &parameters, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Optimize parameters according to options.
bool isComplete() const
checks if this HyperNode includes at least a submodel of each mixed model
Substitution models manager for Mixed Substitution Models. This class inherits from SubstitutionModel...
void addToHyperNode(size_t nM, const Vuint &vnS, int nH=-1)
static const std::string PHYML
static const std::string FAST
static const std::string BETTER
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
a simple parser for reading trees from a Nexus file.
Definition: NexusIoTree.h:36
The so-called 'Nhx - New Hampshire eXtended' parenthetic format.
Definition: Nhx.h:62
General interface for tree writers.
Definition: IoTree.h:382
virtual void writeTrees(const std::vector< const Tree * > &trees, const std::string &path, bool overwrite) const =0
Write trees to a file.
static std::string OPTIMIZATION_NEWTON
static std::string OPTIMIZATION_BFGS
static std::string OPTIMIZATION_BRENT
static std::string OPTIMIZATION_GRADIENT
virtual OutputStream & endLine()=0
virtual const ParameterList & getIndependentParameters() const=0
virtual bool hasParameter(const std::string &name) const
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void reset()
virtual const Parameter & parameter(const std::string &name) const
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual void setParameter(size_t index, const Parameter &param)
virtual size_t whichParameterHasName(const std::string &name) const
virtual std::string parameter() const
virtual const ConstraintInterface & constraint() const
virtual std::shared_ptr< const ConstraintInterface > getConstraint() const
virtual void setConstraint(std::shared_ptr< ConstraintInterface > constraint)
virtual const std::string & getName() const
virtual bool hasConstraint() const
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual std::string getNamespace() const=0
static std::unique_ptr< FrequencySetInterface > getRootFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::vector< double > &rateFreqs, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Get A FrequencySet object for root frequencies (NH models) according to options.
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
Substitution models manager for non-homogeneous / non-reversible models of evolution.
const std::vector< int > & getNodesWithModel(size_t i) const
Get a list of nodes id for which the given model is associated.
std::shared_ptr< const TransitionModelInterface > getModel(size_t i) const
Get one model from the set knowing its index.
void setRootFrequencies(std::shared_ptr< FrequencySetInterface > rootFreqs)
Sets a given FrequencySet for root frequencies.
void addModel(std::shared_ptr< TransitionModelInterface > model, const std::vector< int > &nodesId)
Add a new model to the set, and set relationships with nodes and params.
const FrequencySetInterface & rootFrequencySet() const
void clear()
Resets all the information contained in this object.
Interface for all transition models.
static std::unique_ptr< TreeTemplate< Node > > getRandomTree(std::vector< std::string > &leavesNames, bool rooted=true)
Draw a random tree from a list of taxa, using a Yule process.
static double convertToClockTree(Tree &tree, int nodeId, bool noneg=false)
Modify a tree's branch lengths to make a clock tree, by rebalancing branch lengths.
Definition: TreeTools.cpp:599
static void constrainedMidPointRooting(Tree &tree)
Determine the mid-point position of the root along the branch that already contains the root....
Definition: TreeTools.cpp:1153
static void computeBranchLengthsGrafen(Tree &tree, double power=1, bool init=true)
Compute branch lengths using Grafen's method.
Definition: TreeTools.cpp:584
static double getHeight(const Tree &tree, int nodeId)
Get the height of the subtree defined by node 'node', i.e. the maximum distance between leaves and th...
Definition: TreeTools.cpp:173
static void diff(std::vector< T > &v1, std::vector< T > &v2, std::vector< T > &v3)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.
double convert(const bpp::ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)
std::vector< unsigned int > Vuint