bpp-phyl3  3.0.0
OptimizationTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <Bpp/Text/KeyvalTools.h>
7 
19 
20 // bpp-phyl
22 #include "Io/Newick.h"
23 #include "OptimizationTools.h"
24 #include "PseudoNewtonOptimizer.h"
25 #include "Tree/PhyloTreeTools.h"
26 
27 // From bpp-seq:
28 #include <Bpp/Seq/Io/Fasta.h>
29 
30 #include <memory>
31 
32 using namespace bpp;
33 using namespace std;
34 
35 /******************************************************************************/
36 
38 
40 
42  std::shared_ptr<PhyloLikelihoodInterface> lik,
43  const std::map<std::string, std::string>& params,
44  const std::string& suffix,
45  bool suffixIsOptional,
46  bool verb,
47  int warn) :
48  parameters(),
49  listener(nullptr),
50  nstep(1),
51  tolerance(0.000001),
52  nbEvalMax(1000000),
53  messenger(ApplicationTools::message),
54  profiler(ApplicationTools::message),
55  reparametrization(false),
56  useClock(0),
57  verbose(1),
58  optMethodDeriv(OPTIMIZATION_NEWTON),
59  optMethodModel(OPTIMIZATION_BRENT)
60 {
62  string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
63 
64  map<string, string> optArgs;
65  KeyvalTools::parseProcedure(optimization, optMethodModel, optArgs);
66 
67  if (optMethodModel == "D-Brent")
68  // Uses Newton-Brent method or Newton-BFGS method
70  else if (optMethodModel == "D-BFGS")
72  else if (optMethodModel == "FullD")
74  else
75  throw Exception("Unknown optimization method " + optMethodModel);
76 
77  nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
78 
79 
80  // VERBOSITY
81 
82  verbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn + 1);
83 
84  //MESSAGE
85 
86  string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
87  messenger =
88  (mhPath == "none") ? nullptr :
89  (mhPath == "std") ? ApplicationTools::message :
90  make_shared<StlOutputStream>(make_unique<ofstream>(mhPath.c_str(), ios::out));
91  if (verb)
92  ApplicationTools::displayResult("Message handler", mhPath);
93 
94  // PROFILE
95  string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
96  profiler =
97  (prPath == "none") ? nullptr :
98  (prPath == "std") ? ApplicationTools::message :
99  make_shared<StlOutputStream>(make_unique<ofstream>(prPath.c_str(), ios::out));
100  if (profiler)
101  profiler->setPrecision(20);
102  if (verb)
103  ApplicationTools::displayResult("Profiler", prPath);
104 
105 
106  bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, warn + 1);
107  if (scaleFirst)
108  {
109  ApplicationTools::displayError("Sorry, optimization.scale_first not implemented yet for process.");
110  exit(-1);
111  }
112 
113  // Should I ignore some parameters?
114 
115  parameters = lik->getParameters();
116  vector<string> parNames = parameters.getParameterNames();
117 
118  if (params.find("optimization.ignore_parameter") != params.end())
119  throw Exception("optimization.ignore_parameter is deprecated, use optimization.ignore_parameters instead!");
120 
121  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
122  StringTokenizer st(paramListDesc, ",");
123  while (st.hasMoreToken())
124  {
125  try
126  {
127  string param = st.nextToken();
128  if (param == "BrLen")
129  {
130  vector<string> vs = lik->getBranchLengthParameters().getParameterNames();
132  if (verb)
133  ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
134  }
135  else if (param == "Ancient")
136  {
137  vector<string> vs = lik->getRootFrequenciesParameters().getParameterNames();
139  if (verb)
140  ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
141  }
142  else if (param == "Model")
143  {
144  vector<string> vs = lik->getSubstitutionModelParameters().getParameterNames();
146  if (verb)
147  ApplicationTools::displayResult("Parameter ignored", string("Model"));
148  }
149  else if (param == "*")
150  {
151  parameters.reset();
152  if (verb)
153  ApplicationTools::displayResult("Parameter ignored", string("All"));
154  }
155  else if (param.find("*") != string::npos)
156  {
157  vector<string> vs = ApplicationTools::matchingParameters(param, parNames);
158  bool verbhere = verb;
159 
160  if (vs.size() >= 20)
161  {
162  if (verb)
163  {
164  ApplicationTools::displayResult("Number of parameters ignored", vs.size());
165  ApplicationTools::displayMessage(" from " + param);
166  }
167  verbhere = false;
168  }
169 
170  for (auto& it : vs)
171  {
173  if (verbhere)
174  ApplicationTools::displayResult("Parameter ignored", it);
175  }
176  }
177  else
178  {
180  if (verb)
181  ApplicationTools::displayResult("Parameter ignored", param);
182  }
183  }
184  catch (ParameterNotFoundException& pnfe)
185  {
186  ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be ignored!");
187  }
188  }
189 
190  // Should I constrain some parameters?
191  vector<string> parToEstNames = parameters.getParameterNames();
192 
193  if (params.find("optimization.constrain_parameter") != params.end())
194  throw Exception("optimization.constrain_parameter is deprecated, use optimization.constrain_parameters instead!");
195 
196  paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameters", params, "", suffix, suffixIsOptional, warn + 1);
197 
198  string constraint = "";
199  string pc, param = "";
200 
201  StringTokenizer st2(paramListDesc, ",");
202  while (st2.hasMoreToken())
203  {
204  try
205  {
206  pc = st2.nextToken();
207  string::size_type index = pc.find("=");
208  if (index == string::npos)
209  throw Exception("PhylogeneticsApplicationTools::optimizeParamaters. Bad constrain syntax, should contain `=' symbol: " + pc);
210  param = pc.substr(0, index);
211  constraint = pc.substr(index + 1);
212  std::shared_ptr<IntervalConstraint> ic(new IntervalConstraint(constraint));
213 
214  vector<string> parNames2;
215 
216  if (param == "BrLen")
217  parNames2 = lik->getBranchLengthParameters().getParameterNames();
218  else if (param == "Ancient")
219  parNames2 = lik->getRootFrequenciesParameters().getParameterNames();
220  else if (param == "Model")
221  {
222  vector<string> vs = lik->getSubstitutionModelParameters().getParameterNames();
223  }
224  else if (param.find("*") != string::npos)
225  parNames2 = ApplicationTools::matchingParameters(param, parToEstNames);
226  else
227  parNames2.push_back(param);
228 
229 
230  for (size_t i = 0; i < parNames2.size(); i++)
231  {
232  Parameter& par = parameters.parameter(parNames2[i]);
233  if (par.hasConstraint())
234  {
235  par.setConstraint(std::shared_ptr<ConstraintInterface>(*ic & (*par.getConstraint())));
236  if (par.getConstraint()->isEmpty())
237  throw Exception("Empty interval for parameter " + parNames[i] + par.getConstraint()->getDescription());
238  }
239  else
240  par.setConstraint(ic);
241 
242  if (verb)
243  ApplicationTools::displayResult("Parameter constrained " + par.getName(), par.getConstraint()->getDescription());
244  }
245  }
246  catch (ParameterNotFoundException& pnfe)
247  {
248  ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be constrained!");
249  }
250  catch (ConstraintException& pnfe)
251  {
252  throw Exception("Parameter '" + param + "' does not fit the constraint " + constraint);
253  }
254  }
255 
256  nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
257  if (verb)
259 
260  tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
261  if (verb)
263 
264  // Backing up or restoring?
265  backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
266  if (backupFile != "none")
267  {
268  ApplicationTools::displayResult("Parameters will be backup to", backupFile);
269  listener.reset(new BackupListener(backupFile));
271  {
272  ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
273  ifstream bck(backupFile.c_str(), ios::in);
274  vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
275  double fval = TextTools::toDouble(lines[0].substr(5));
276  ParameterList pl = lik->getParameters();
277  for (size_t l = 1; l < lines.size(); ++l)
278  {
279  if (!TextTools::isEmpty(lines[l]))
280  {
281  StringTokenizer stp(lines[l], "=");
282  if (stp.numberOfRemainingTokens() != 2)
283  {
284  cerr << "Corrupted backup file!!!" << endl;
285  cerr << "at line " << l << ": " << lines[l] << endl;
286  }
287  string pname = stp.nextToken();
288  string pvalue = stp.nextToken();
289  if (pl.hasParameter(pname))
290  {
291  size_t p = pl.whichParameterHasName(pname);
292  pl.setParameter(p, AutoParameter(pl[p]));
293  pl[p].setValue(TextTools::toDouble(pvalue));
294  }
295  else
296  ApplicationTools::displayWarning("Unknown parameter in backup file : " + pname);
297  }
298  }
299  bck.close();
300  lik->setParameters(pl);
301  if (convert(abs(lik->getValue() - fval)) > 0.000001)
302  ApplicationTools::displayMessage("Changed likelihood from backup file.");
303  ApplicationTools::displayResult("Restoring log-likelihood", -lik->getValue());
304  }
305  }
306 
307  string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, warn + 1);
308  if (order == "Gradient")
309  {
311  }
312  else if (order == "Newton")
313  {
315  }
316  else if (order == "BFGS")
317  {
319  }
320  else
321  throw Exception("Unknown derivatives algorithm: '" + order + "'.");
322 
323  if (verb)
324  ApplicationTools::displayResult("Optimization method", optMethodModel);
325  if (verb)
326  ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
327 
328  // See if we should reparametrize:
329  reparametrization = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, false, suffix, suffixIsOptional, warn + 1);
330  if (verb)
331  ApplicationTools::displayResult("Reparametrization", (reparametrization ? "yes" : "no"));
332 
333  // See if we should use a molecular clock constraint:
334  string clock = ApplicationTools::getStringParameter("optimization.clock", params, "None", suffix, suffixIsOptional, warn + 1);
335  if (clock != "None" && clock != "Global")
336  throw Exception("Molecular clock option not recognized, should be one of 'Global' or 'None'.");
337  useClock = (clock == "Global");
338  if (verb)
339  ApplicationTools::displayResult("Molecular clock", clock);
340 
341 
342 }
343 
344 /******************************************************************************/
345 
346 std::string OptimizationTools::OPTIMIZATION_NEWTON = "newton";
347 std::string OptimizationTools::OPTIMIZATION_GRADIENT = "gradient";
348 std::string OptimizationTools::OPTIMIZATION_BRENT = "Brent";
349 std::string OptimizationTools::OPTIMIZATION_BFGS = "BFGS";
350 
351 /******************************************************************************/
352 
354  std::shared_ptr<PhyloLikelihoodInterface> lik,
355  const OptimizationOptions& optopt)
356 {
357  shared_ptr<SecondOrderDerivable> f = lik;
358  ParameterList pl = optopt.parameters;
359 
360  // Shall we reparametrize the function to remove constraints?
361  if (optopt.reparametrization)
362  {
363  f = make_shared<ReparametrizationDerivableSecondOrderWrapper>(f, optopt.parameters);
364 
365  // Reset parameters to remove constraints:
366  pl = f->getParameters().createSubList(optopt.parameters.getParameterNames());
367  }
368 
369  // ///////////////
370  // Build optimizer:
371 
372  // Branch lengths
373 
374  auto desc = make_unique<MetaOptimizerInfos>();
375  unique_ptr<MetaOptimizer> poptimizer;
376 
378  desc->addOptimizer("Branch length parameters", make_shared<ConjugateGradientMultiDimensions>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
379  else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
380  desc->addOptimizer("Branch length parameters", make_shared<PseudoNewtonOptimizer>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
381  else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
382  desc->addOptimizer("Branch length parameters", make_shared<BfgsMultiDimensions>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
383  else
384  throw Exception("OptimizationTools::optimizeNumericalParameters. Unknown derivative optimization method: " + optopt.optMethodDeriv);
385 
386  // Other parameters
387 
388  if (optopt.optMethodModel == OPTIMIZATION_BRENT)
389  {
390  ParameterList plTmp = lik->getSubstitutionModelParameters();
391  plTmp.addParameters(lik->getRootFrequenciesParameters());
392  ParameterList plsm = optopt.parameters.getCommonParametersWith(plTmp);
393 
394  desc->addOptimizer("Substitution model parameters", make_shared<SimpleMultiDimensions>(f), plsm.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
395 
396  ParameterList plrd = optopt.parameters.getCommonParametersWith(lik->getRateDistributionParameters());
397  desc->addOptimizer("Rate distribution parameters", make_shared<SimpleMultiDimensions>(f), plrd.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
398 
399  poptimizer = make_unique<MetaOptimizer>(f, std::move(desc), optopt.nstep);
400  }
401  else if (optopt.optMethodModel == OPTIMIZATION_BFGS)
402  {
403  vector<string> vNameDer;
404  auto fnum = make_shared<ThreePointsNumericalDerivative>(f);
405 
406  ParameterList plsm = optopt.parameters.getCommonParametersWith(lik->getSubstitutionModelParameters());
407  vNameDer = plsm.getParameterNames();
408 
409  ParameterList plrd = optopt.parameters.getCommonParametersWith(lik->getRateDistributionParameters());
410 
411  vector<string> vNameDer2 = plrd.getParameterNames();
412 
413  vNameDer.insert(vNameDer.begin(), vNameDer2.begin(), vNameDer2.end());
414  fnum->setParametersToDerivate(vNameDer);
415 
416  desc->addOptimizer("Rate & model distribution parameters", make_shared<BfgsMultiDimensions>(fnum), vNameDer, 1, MetaOptimizerInfos::IT_TYPE_FULL);
417  poptimizer = make_unique<MetaOptimizer>(fnum, std::move(desc), optopt.nstep);
418  }
419  else
420  throw Exception("OptimizationTools::optimizeNumericalParameters. Unknown optimization method: " + optopt.optMethodModel);
421 
422  poptimizer->setVerbose(optopt.verbose);
423  poptimizer->setProfiler(optopt.profiler);
424  poptimizer->setMessageHandler(optopt.messenger);
425  poptimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
426  poptimizer->getStopCondition()->setTolerance(optopt.tolerance);
427 
428  // Optimize TreeLikelihood function:
429  poptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
430  auto nanListener = make_shared<NaNListener>(poptimizer.get(), lik.get());
431  poptimizer->addOptimizationListener(nanListener);
432  if (optopt.listener)
433  poptimizer->addOptimizationListener(optopt.listener);
434 
435  poptimizer->init(pl);
436  poptimizer->optimize();
437 
438 // optopt.parameters.setAllParametersValues(poptimizer->getParameters());
439 
440  if (optopt.verbose > 0)
442 
443  // We're done.
444  uint nb = poptimizer->getNumberOfEvaluations();
445  return nb;
446 }
447 
448 
449 /************************************************************/
450 
452  std::shared_ptr<PhyloLikelihoodInterface> lik,
453  const OptimizationOptions& optopt)
454 {
455  shared_ptr<SecondOrderDerivable> f = lik;
456  ParameterList pl = optopt.parameters;
457 
458  // Shall we use a molecular clock constraint on branch lengths?
459  // unique_ptr<GlobalClockTreeLikelihoodFunctionWrapper> fclock;
460  // if (useClock)
461  // {
462  // fclock.reset(new GlobalClockTreeLikelihoodFunctionWrapper(lik));
463  // f = fclock.get();
464  // if (verbose > 0)
465  // ApplicationTools::displayResult("Log-likelihood after adding clock", -lik->getLogLikelihood());
466 
467  // // Reset parameters to use new branch lengths. WARNING! 'old' branch parameters do not exist anymore and have been replaced by heights
468  // pl = fclock->getParameters().getCommonParametersWith(parameters);
469  // pl.addParameters(fclock->getHeightParameters());
470  // }
471 
472  // Shall we reparametrize the function to remove constraints?
473  shared_ptr<SecondOrderDerivable> frep;
474  if (optopt.reparametrization)
475  {
477  f = frep;
478 
479  // Reset parameters to remove constraints:
480  pl = f->getParameters().createSubList(pl.getParameterNames());
481  }
482 
483  // Build optimizer:
484  unique_ptr<OptimizerInterface> optimizer;
485  shared_ptr<AbstractNumericalDerivative> fnum;
486 
488  {
489  fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
490  fnum->setInterval(0.0000001);
491  optimizer = make_unique<ConjugateGradientMultiDimensions>(fnum);
492  }
493  else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
494  {
495  fnum = make_shared<ThreePointsNumericalDerivative>(f);
496  fnum->setInterval(0.0001);
497  optimizer = make_unique<PseudoNewtonOptimizer>(fnum);
498  }
499  else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
500  {
501  fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
502  fnum->setInterval(0.0001);
503  optimizer = make_unique<BfgsMultiDimensions>(fnum);
504  }
505  else
506  throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optopt.optMethodDeriv);
507 
508  // Numerical derivatives:
509  // Variables not derivatived in Likelihood DF but in numerical way
510  ParameterList tmp = lik->getParameters();
511 
512  // if (useClock)
513  // tmp.addParameters(fclock->getHeightParameters());
514 
515  fnum->setParametersToDerivate(tmp.getParameterNames());
516 
517  optimizer->setVerbose(optopt.verbose);
518  optimizer->setProfiler(optopt.profiler);
519  optimizer->setMessageHandler(optopt.messenger);
520  optimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
521  optimizer->getStopCondition()->setTolerance(optopt.tolerance);
522 
523  // Optimize TreeLikelihood function:
524  optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
525  auto nanListener = make_shared<NaNListener>(optimizer.get(), lik.get());
526  optimizer->addOptimizationListener(nanListener);
527  if (optopt.listener)
528  optimizer->addOptimizationListener(optopt.listener);
529 
530  optimizer->init(pl);
531  optimizer->optimize();
532 
533  if (optopt.verbose > 0)
535 
536  // We're done.
537  return optimizer->getNumberOfEvaluations();
538 }
539 
540 /************************************************************/
541 
543  std::shared_ptr<SingleProcessPhyloLikelihood> lik,
544  const OptimizationOptions& optopt)
545 {
546  shared_ptr<SecondOrderDerivable> f = lik;
547  ParameterList pl = optopt.parameters;
548  if (optopt.reparametrization)
549  {
550  // Shall we reparametrize the function to remove constraints?
551  f = make_shared<ReparametrizationDerivableSecondOrderWrapper>(f, optopt.parameters);
552 
553  // Reset parameters to remove constraints:
554  pl = f->getParameters().createSubList(optopt.parameters.getParameterNames());
555  }
556 
557  // Build optimizer:
558  shared_ptr<AbstractNumericalDerivative> fnum;
559  unique_ptr<OptimizerInterface> optimizer;
560 
562  {
563  lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.00001, NumericalDerivativeType::ThreePoints);
564 
565  fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
566  fnum->setInterval(0.0000001);
567  optimizer.reset(new ConjugateGradientMultiDimensions(fnum));
568  }
569  else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
570  {
571  lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.0001, NumericalDerivativeType::FivePoints);
572  fnum = make_shared<ThreePointsNumericalDerivative>(f);
573  fnum->setInterval(0.0001);
574  optimizer.reset(new PseudoNewtonOptimizer(fnum));
575  }
576  else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
577  {
578  lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.0001, NumericalDerivativeType::ThreePoints);
579  fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
580  fnum->setInterval(0.0001);
581  optimizer.reset(new BfgsMultiDimensions(fnum));
582  }
583  else
584  throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optopt.optMethodDeriv);
585 
586 
587  // Variables not derivatived in Likelihood DF but in numerical way
588  ParameterList tmp = f->getParameters();
589 
590  fnum->setParametersToDerivate(tmp.getParameterNames());
591 
592  optimizer->setVerbose(optopt.verbose);
593  optimizer->setProfiler(optopt.profiler);
594  optimizer->setMessageHandler(optopt.messenger);
595  optimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
596  optimizer->getStopCondition()->setTolerance(optopt.tolerance);
597 
598  // Optimize TreeLikelihood function:
599  optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
600  auto nanListener = make_shared<NaNListener>(optimizer.get(), lik.get());
601  optimizer->addOptimizationListener(nanListener);
602  if (optopt.listener)
603  optimizer->addOptimizationListener(optopt.listener);
604 
605  optimizer->init(pl);
606  optimizer->optimize();
607 
608  if (optopt.verbose > 0)
610 
611  // We're done.
612  return optimizer->getNumberOfEvaluations();
613 }
614 
615 
616 /******************************************************************************/
617 
618 std::string OptimizationTools::DISTANCEMETHOD_INIT = "init";
619 std::string OptimizationTools::DISTANCEMETHOD_PAIRWISE = "pairwise";
620 std::string OptimizationTools::DISTANCEMETHOD_ITERATIONS = "iterations";
621 
622 /******************************************************************************/
623 
624 unique_ptr<DistanceMatrix> OptimizationTools::estimateDistanceMatrix(
625  DistanceEstimation& estimationMethod,
626  const ParameterList& parametersToIgnore,
627  const std::string& param,
628  unsigned int verbose)
629 {
630  if (param != DISTANCEMETHOD_PAIRWISE && param != DISTANCEMETHOD_INIT)
631  throw Exception("OptimizationTools::estimateDistanceMatrix. Invalid option param=" + param + ".");
632  estimationMethod.resetAdditionalParameters();
633  estimationMethod.setVerbose(verbose);
634  if (param == DISTANCEMETHOD_PAIRWISE)
635  {
636  ParameterList tmp = estimationMethod.process().getSubstitutionModelParameters(true);
637  tmp.addParameters(estimationMethod.process().getRateDistributionParameters(true));
638  tmp.addParameters(estimationMethod.process().getRootFrequenciesParameters(true));
639  tmp.deleteParameters(parametersToIgnore.getParameterNames());
640  estimationMethod.setAdditionalParameters(tmp);
641  }
642  // Compute matrice:
643  if (verbose > 0)
644  ApplicationTools::displayTask("Estimating distance matrix", true);
645  estimationMethod.computeMatrix();
646  auto matrix = estimationMethod.getMatrix();
647 
648  if (verbose > 0)
650 
651  return matrix;
652 }
653 
654 /******************************************************************************/
655 
656 unique_ptr<TreeTemplate<Node>> OptimizationTools::buildDistanceTree(
657  DistanceEstimation& estimationMethod,
658  AgglomerativeDistanceMethodInterface& reconstructionMethod,
659  const std::string& param,
660  OptimizationOptions& optopt)
661 {
662  estimationMethod.resetAdditionalParameters();
663  estimationMethod.setVerbose(optopt.verbose);
664 
665  if (param == DISTANCEMETHOD_PAIRWISE)
666  {
667  ParameterList tmp = estimationMethod.process().getSubstitutionModelParameters(true);
668  tmp.addParameters(estimationMethod.process().getRateDistributionParameters(true));
669  tmp.addParameters(estimationMethod.process().getRootFrequenciesParameters(true));
670  tmp = tmp.getCommonParametersWith(optopt.parameters);
671  estimationMethod.setAdditionalParameters(tmp);
672  }
673 
674  unique_ptr<TreeTemplate<Node>> tree = nullptr;
675 
676  bool test = true;
677 
678  auto process = std::shared_ptr<SubstitutionProcessInterface>(estimationMethod.process().clone());
679  auto autoProc = dynamic_pointer_cast<AutonomousSubstitutionProcessInterface>(process);
680  auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process);
681  if (!autoProc && !procMb)
682  throw Exception("OptimizationTools::buildDistanceTree : unknown process type. Ask developpers.");
683 
684  // Vector of successive trees, to return the best one
685  std::vector<unique_ptr<TreeTemplate<Node>>> vTree;
686  std::vector<double> vLik;
687 
688  size_t nstep=0;
689  while (test && nstep<100)
690  {
691  // Compute matrice:
692  if (optopt.verbose > 0)
693  ApplicationTools::displayTask("Estimating distance matrix", true);
694 
695  estimationMethod.computeMatrix();
696  auto matrix = estimationMethod.getMatrix();
697 
698  if (estimationMethod.getVerbose() > 0)
700 
701  // Compute tree:
702  if (matrix->size() == 2)
703  {
704  // Special case, there is only one possible tree:
705  Node* n1 = new Node(0);
706  Node* n2 = new Node(1, matrix->getName(0));
707  n2->setDistanceToFather((*matrix)(0, 0) / 2.);
708  Node* n3 = new Node(2, matrix->getName(1));
709  n3->setDistanceToFather((*matrix)(0, 0) / 2.);
710  n1->addSon(n2);
711  n1->addSon(n3);
712  return unique_ptr<TreeTemplate<Node>>(new TreeTemplate<Node>(n1));
713  }
714 
715  if (optopt.verbose > 0)
716  ApplicationTools::displayTask("Building tree");
717 
718  reconstructionMethod.setDistanceMatrix(*matrix);
719  reconstructionMethod.computeTree();
720 
721  tree = make_unique<TreeTemplate<Node>>(reconstructionMethod.tree());
722 
723  vTree.push_back(std::move(tree));
724 
725  if (estimationMethod.getVerbose() > 0)
727 
728  size_t nbTree = vTree.size();
729  const auto& ltree = vTree[nbTree-1];
730 
731  if (vTree.size()>1)
732  {
733  for (size_t iT = 0; iT < nbTree - 1; iT++)
734  {
735  const auto& pTree = vTree[iT];
736  int rf = TreeTools::robinsonFouldsDistance(*pTree, *ltree);
737  // if (optopt.verbose > 0)
738  ApplicationTools::displayResult("Topo. distance with iteration " + TextTools::toString(iT + 1), TextTools::toString(rf));
739  test &= (rf != 0);
740  if (!test)
741  break;
742  }
743  }
744 
745  if ((param != DISTANCEMETHOD_ITERATIONS) || !test)
746  break; // Ends here.
747 
748  // Now, re-estimate parameters:
749  Context context;
750 
751  auto phyloTree = make_shared<ParametrizablePhyloTree>(*PhyloTreeTools::buildFromTreeTemplate(*ltree));
752  if (autoProc)
753  autoProc->setPhyloTree(*phyloTree);
754  else
755  if (procMb)
756  {
757  auto& coll = procMb->collection();
758  size_t maxTNb = procMb->getTreeNumber();
759  coll.replaceTree(phyloTree, maxTNb);
760  }
761 
762  auto lik = make_shared<LikelihoodCalculationSingleProcess>(context, estimationMethod.getData(), process);
763  auto tl = make_shared<SingleProcessPhyloLikelihood>(context, lik);
764 
765  vLik.push_back(tl->getValue());
766 
767  // hide opt verbose
768  optopt.verbose= estimationMethod.getVerbose()>0?uint(estimationMethod.getVerbose()-1):0;
769 
770  optimizeNumericalParameters(tl, optopt);
771  process->matchParametersValues(tl->getParameters());
772 
773  estimationMethod.matchParametersValues(process->getParameters());
774 
775  auto trtemp = std::make_shared<ParametrizablePhyloTree>(*tl->tree());
776  const PhyloTree trt2(*trtemp);
777  tree.reset(TreeTemplateTools::buildFromPhyloTree(trt2).release());
778 
779  if (optopt.verbose > 0)
780  {
781  auto tmp = process->getSubstitutionModelParameters(true);
782  for (unsigned int i = 0; i < tmp.size(); ++i)
783  {
784  ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
785  }
786  tmp = process->getRootFrequenciesParameters(true);
787  for (unsigned int i = 0; i < tmp.size(); ++i)
788  {
789  ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
790  }
791  tmp = process->getRateDistributionParameters(true);
792  for (unsigned int i = 0; i < tmp.size(); ++i)
793  {
794  ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
795  }
796  }
797  nstep++;
798  }
799 
800  size_t posM = static_cast<size_t>(std::distance(vLik.begin(), std::min_element(vLik.begin(), vLik.end())));
801 
802  return std::move(vTree[posM]);
803 }
804 
805 /******************************************************************************/
Interface for agglomerative distance methods.
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayError(const std::string &text)
static void displayTaskDone()
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
static std::string CONSTRAINTS_AUTO
Context for dataflow node construction.
Definition: DataFlow.h:527
std::unique_ptr< DistanceMatrix > getMatrix() const
Get the distance matrix.
void resetAdditionalParameters()
Reset all additional parameters.
const SubstitutionProcessInterface & process() const
void computeMatrix()
Perform the distance computation.
bool matchParametersValues(const ParameterList &parameters)
void setVerbose(size_t verbose)
std::shared_ptr< const AlignmentDataInterface > getData() const
void setAdditionalParameters(const ParameterList &parameters)
Specify a list of parameters to be estimated.
virtual void setDistanceMatrix(const DistanceMatrix &matrix)=0
Set the distance matrix to use.
virtual void computeTree()=0
Perform the clustering.
virtual const Tree & tree() const =0
static bool fileExists(const std::string &filename)
static std::vector< std::string > putStreamIntoVectorOfStrings(std::istream &input)
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static std::string IT_TYPE_STEP
static std::string IT_TYPE_FULL
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:374
std::shared_ptr< OutputStream > profiler
std::shared_ptr< OutputStream > messenger
std::shared_ptr< OptimizationListener > listener
static std::string DISTANCEMETHOD_PAIRWISE
static std::unique_ptr< DistanceMatrix > estimateDistanceMatrix(DistanceEstimation &estimationMethod, const ParameterList &parametersToIgnore, const std::string &param=DISTANCEMETHOD_INIT, unsigned int verbose=0)
Estimate a distance matrix using maximum likelihood.
static std::string OPTIMIZATION_NEWTON
static unsigned int optimizeNumericalParameters2(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BFGS
static unsigned int optimizeNumericalParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string DISTANCEMETHOD_ITERATIONS
static std::string DISTANCEMETHOD_INIT
static std::unique_ptr< TreeTemplate< Node > > buildDistanceTree(DistanceEstimation &estimationMethod, AgglomerativeDistanceMethodInterface &reconstructionMethod, const std::string &param, OptimizationOptions &optopt)
Build a tree using a distance method.
static std::string OPTIMIZATION_BRENT
static std::string OPTIMIZATION_GRADIENT
virtual bool hasParameter(const std::string &name) const
virtual void addParameters(const ParameterList &params)
virtual ParameterList getCommonParametersWith(const ParameterList &params) const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void reset()
virtual const Parameter & parameter(const std::string &name) const
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual void setParameter(size_t index, const Parameter &param)
virtual size_t whichParameterHasName(const std::string &name) const
virtual ParameterList createSubList(const std::vector< std::string > &names) const
virtual std::string parameter() const
virtual std::shared_ptr< const ConstraintInterface > getConstraint() const
virtual void setConstraint(std::shared_ptr< ConstraintInterface > constraint)
virtual const std::string & getName() const
virtual bool hasConstraint() const
static std::shared_ptr< PhyloTree > buildFromTreeTemplate(const TreeTemplate< Node > &treetemp)
This Optimizer implements Newton's algorithm for finding a minimum of a function. This is in fact a m...
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
virtual ParameterList getSubstitutionModelParameters(bool independent) const =0
Methods to retrieve the parameters of specific objects.
virtual ParameterList getRootFrequenciesParameters(bool independent) const =0
virtual SubstitutionProcessInterface * clone() const =0
virtual ParameterList getRateDistributionParameters(bool independent) const =0
static std::unique_ptr< TreeTemplate< Node > > buildFromPhyloTree(const PhyloTree &treetemp)
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static int robinsonFouldsDistance(const Tree &tr1, const Tree &tr2, bool checkNames=true, int *missing_in_tr2=NULL, int *missing_in_tr1=NULL)
Calculates the Robinson-Foulds topological distance between two trees.
Definition: TreeTools.cpp:842
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
std::string toString(T t)
Defines the basic types of data flow nodes.
double convert(const bpp::ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)