bpp-phyl3 3.0.0
OptimizationTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7
19
20// bpp-phyl
22#include "Io/Newick.h"
23#include "OptimizationTools.h"
25#include "Tree/PhyloTreeTools.h"
26
27// From bpp-seq:
28#include <Bpp/Seq/Io/Fasta.h>
29
30#include <memory>
31
32using namespace bpp;
33using namespace std;
34
35/******************************************************************************/
36
37OptimizationTools::OptimizationTools() {}
38
40
42 std::shared_ptr<PhyloLikelihoodInterface> lik,
43 const std::map<std::string, std::string>& params,
44 const std::string& suffix,
45 bool suffixIsOptional,
46 bool verb,
47 int warn) :
48 parameters(),
49 listener(nullptr),
50 nstep(1),
51 tolerance(0.000001),
52 nbEvalMax(1000000),
53 messenger(ApplicationTools::message),
54 profiler(ApplicationTools::message),
55 reparametrization(false),
56 useClock(0),
57 verbose(1),
58 optMethodDeriv(OPTIMIZATION_NEWTON),
59 optMethodModel(OPTIMIZATION_BRENT)
60{
62 string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
63
64 map<string, string> optArgs;
65 KeyvalTools::parseProcedure(optimization, optMethodModel, optArgs);
66
67 if (optMethodModel == "D-Brent")
68 // Uses Newton-Brent method or Newton-BFGS method
70 else if (optMethodModel == "D-BFGS")
72 else if (optMethodModel == "FullD")
74 else
75 throw Exception("Unknown optimization method " + optMethodModel);
76
77 nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
78
79
80 // VERBOSITY
81
82 verbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn + 1);
83
84 // MESSAGE
85
86 string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
87 messenger =
88 (mhPath == "none") ? nullptr :
89 (mhPath == "std") ? ApplicationTools::message :
90 make_shared<StlOutputStream>(make_unique<ofstream>(mhPath.c_str(), ios::out));
91 if (verb)
92 ApplicationTools::displayResult("Message handler", mhPath);
93
94 // PROFILE
95 string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
96 profiler =
97 (prPath == "none") ? nullptr :
98 (prPath == "std") ? ApplicationTools::message :
99 make_shared<StlOutputStream>(make_unique<ofstream>(prPath.c_str(), ios::out));
100 if (profiler)
101 profiler->setPrecision(20);
102 if (verb)
103 ApplicationTools::displayResult("Profiler", prPath);
104
105
106 bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, warn + 1);
107 if (scaleFirst)
108 {
109 ApplicationTools::displayError("Sorry, optimization.scale_first not implemented yet for process.");
110 exit(-1);
111 }
112
113 // Should I ignore some parameters?
114
115 parameters = lik->getParameters();
116 vector<string> parNames = parameters.getParameterNames();
117
118 if (params.find("optimization.ignore_parameter") != params.end())
119 throw Exception("optimization.ignore_parameter is deprecated, use optimization.ignore_parameters instead!");
120
121 string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
122 StringTokenizer st(paramListDesc, ",");
123 while (st.hasMoreToken())
124 {
125 try
126 {
127 string param = st.nextToken();
128 if (param == "BrLen")
129 {
130 vector<string> vs = lik->getBranchLengthParameters().getParameterNames();
132 if (verb)
133 ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
134 }
135 else if (param == "Ancient")
136 {
137 vector<string> vs = lik->getRootFrequenciesParameters().getParameterNames();
139 if (verb)
140 ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
141 }
142 else if (param == "Model")
143 {
144 vector<string> vs = lik->getSubstitutionModelParameters().getParameterNames();
146 if (verb)
147 ApplicationTools::displayResult("Parameter ignored", string("Model"));
148 }
149 else if (param == "*")
150 {
152 if (verb)
153 ApplicationTools::displayResult("Parameter ignored", string("All"));
154 }
155 else if (param.find("*") != string::npos)
156 {
157 vector<string> vs = ApplicationTools::matchingParameters(param, parNames);
158 bool verbhere = verb;
159
160 if (vs.size() >= 20)
161 {
162 if (verb)
163 {
164 ApplicationTools::displayResult("Number of parameters ignored", vs.size());
165 ApplicationTools::displayMessage(" from " + param);
166 }
167 verbhere = false;
168 }
169
170 for (auto& it : vs)
171 {
173 if (verbhere)
174 ApplicationTools::displayResult("Parameter ignored", it);
175 }
176 }
177 else
178 {
180 if (verb)
181 ApplicationTools::displayResult("Parameter ignored", param);
182 }
183 }
184 catch (ParameterNotFoundException& pnfe)
185 {
186 ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be ignored!");
187 }
188 }
189
190 // Should I constrain some parameters?
191 vector<string> parToEstNames = parameters.getParameterNames();
192
193 if (params.find("optimization.constrain_parameter") != params.end())
194 throw Exception("optimization.constrain_parameter is deprecated, use optimization.constrain_parameters instead!");
195
196 paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameters", params, "", suffix, suffixIsOptional, warn + 1);
197
198 string constraint = "";
199 string pc, param = "";
200
201 StringTokenizer st2(paramListDesc, ",");
202 while (st2.hasMoreToken())
203 {
204 try
205 {
206 pc = st2.nextToken();
207 string::size_type index = pc.find("=");
208 if (index == string::npos)
209 throw Exception("PhylogeneticsApplicationTools::optimizeParamaters. Bad constrain syntax, should contain `=' symbol: " + pc);
210 param = pc.substr(0, index);
211 constraint = pc.substr(index + 1);
212 std::shared_ptr<IntervalConstraint> ic(new IntervalConstraint(constraint));
213
214 vector<string> parNames2;
215
216 if (param == "BrLen")
217 parNames2 = lik->getBranchLengthParameters().getParameterNames();
218 else if (param == "Ancient")
219 parNames2 = lik->getRootFrequenciesParameters().getParameterNames();
220 else if (param == "Model")
221 {
222 vector<string> vs = lik->getSubstitutionModelParameters().getParameterNames();
223 }
224 else if (param.find("*") != string::npos)
225 parNames2 = ApplicationTools::matchingParameters(param, parToEstNames);
226 else
227 parNames2.push_back(param);
228
229
230 for (size_t i = 0; i < parNames2.size(); i++)
231 {
232 Parameter& par = parameters.parameter(parNames2[i]);
233 if (par.hasConstraint())
234 {
235 par.setConstraint(std::shared_ptr<ConstraintInterface>(*ic & (*par.getConstraint())));
236 if (par.getConstraint()->isEmpty())
237 throw Exception("Empty interval for parameter " + parNames[i] + par.getConstraint()->getDescription());
238 }
239 else
240 par.setConstraint(ic);
241
242 if (verb)
243 ApplicationTools::displayResult("Parameter constrained " + par.getName(), par.getConstraint()->getDescription());
244 }
245 }
246 catch (ParameterNotFoundException& pnfe)
247 {
248 ApplicationTools::displayWarning("Parameter '" + pnfe.parameter() + "' not found, and so can't be constrained!");
249 }
250 catch (ConstraintException& pnfe)
251 {
252 throw Exception("Parameter '" + param + "' does not fit the constraint " + constraint);
253 }
254 }
255
256 nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
257 if (verb)
259
260 tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
261 if (verb)
263
264 // Backing up or restoring?
265 backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
266 if (backupFile != "none")
267 {
268 ApplicationTools::displayResult("Parameters will be backup to", backupFile);
271 {
272 ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
273 ifstream bck(backupFile.c_str(), ios::in);
274 vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
275 double fval = TextTools::toDouble(lines[0].substr(5));
276 ParameterList pl = lik->getParameters();
277 for (size_t l = 1; l < lines.size(); ++l)
278 {
279 if (!TextTools::isEmpty(lines[l]))
280 {
281 StringTokenizer stp(lines[l], "=");
282 if (stp.numberOfRemainingTokens() != 2)
283 {
284 cerr << "Corrupted backup file!!!" << endl;
285 cerr << "at line " << l << ": " << lines[l] << endl;
286 }
287 string pname = stp.nextToken();
288 string pvalue = stp.nextToken();
289 if (pl.hasParameter(pname))
290 {
291 size_t p = pl.whichParameterHasName(pname);
292 pl.setParameter(p, AutoParameter(pl[p]));
293 pl[p].setValue(TextTools::toDouble(pvalue));
294 }
295 else
296 ApplicationTools::displayWarning("Unknown parameter in backup file : " + pname);
297 }
298 }
299 bck.close();
300 lik->setParameters(pl);
301 if (convert(abs(lik->getValue() - fval)) > 0.000001)
302 ApplicationTools::displayMessage("Changed likelihood from backup file.");
303 ApplicationTools::displayResult("Restoring log-likelihood", -lik->getValue());
304 }
305 }
306
307 string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, warn + 1);
308 if (order == "Gradient")
309 {
311 }
312 else if (order == "Newton")
313 {
315 }
316 else if (order == "BFGS")
317 {
319 }
320 else
321 throw Exception("Unknown derivatives algorithm: '" + order + "'.");
322
323 if (verb)
324 ApplicationTools::displayResult("Optimization method", optMethodModel);
325 if (verb)
326 ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
327
328 // See if we should reparametrize:
329 reparametrization = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, false, suffix, suffixIsOptional, warn + 1);
330 if (verb)
331 ApplicationTools::displayResult("Reparametrization", (reparametrization ? "yes" : "no"));
332
333 // See if we should use a molecular clock constraint:
334 string clock = ApplicationTools::getStringParameter("optimization.clock", params, "None", suffix, suffixIsOptional, warn + 1);
335 if (clock != "None" && clock != "Global")
336 throw Exception("Molecular clock option not recognized, should be one of 'Global' or 'None'.");
337 useClock = (clock == "Global");
338 if (verb)
339 ApplicationTools::displayResult("Molecular clock", clock);
340}
341
342/******************************************************************************/
343
344std::string OptimizationTools::OPTIMIZATION_NEWTON = "newton";
345std::string OptimizationTools::OPTIMIZATION_GRADIENT = "gradient";
346std::string OptimizationTools::OPTIMIZATION_BRENT = "Brent";
347std::string OptimizationTools::OPTIMIZATION_BFGS = "BFGS";
348
349/******************************************************************************/
350
352 std::shared_ptr<PhyloLikelihoodInterface> lik,
353 const OptimizationOptions& optopt)
354{
355 shared_ptr<SecondOrderDerivable> f = lik;
356 ParameterList pl = optopt.parameters;
357
358 // Shall we reparametrize the function to remove constraints?
359 if (optopt.reparametrization)
360 {
361 f = make_shared<ReparametrizationDerivableSecondOrderWrapper>(f, optopt.parameters);
362
363 // Reset parameters to remove constraints:
364 pl = f->getParameters().createSubList(optopt.parameters.getParameterNames());
365 }
366
367 // ///////////////
368 // Build optimizer:
369
370 // Branch lengths
371
372 auto desc = make_unique<MetaOptimizerInfos>();
373 unique_ptr<MetaOptimizer> poptimizer;
374
376 desc->addOptimizer("Branch length parameters", make_shared<ConjugateGradientMultiDimensions>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
377 else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
378 desc->addOptimizer("Branch length parameters", make_shared<PseudoNewtonOptimizer>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
379 else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
380 desc->addOptimizer("Branch length parameters", make_shared<BfgsMultiDimensions>(f), lik->getBranchLengthParameters().getParameterNames(), 2, MetaOptimizerInfos::IT_TYPE_FULL);
381 else
382 throw Exception("OptimizationTools::optimizeNumericalParameters. Unknown derivative optimization method: " + optopt.optMethodDeriv);
383
384 // Other parameters
385
387 {
388 ParameterList plTmp = lik->getSubstitutionModelParameters();
389 plTmp.addParameters(lik->getRootFrequenciesParameters());
391
392 desc->addOptimizer("Substitution model parameters", make_shared<SimpleMultiDimensions>(f), plsm.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
393
394 ParameterList plrd = optopt.parameters.getCommonParametersWith(lik->getRateDistributionParameters());
395 desc->addOptimizer("Rate distribution parameters", make_shared<SimpleMultiDimensions>(f), plrd.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
396
397 poptimizer = make_unique<MetaOptimizer>(f, std::move(desc), optopt.nstep);
398 }
399 else if (optopt.optMethodModel == OPTIMIZATION_BFGS)
400 {
401 vector<string> vNameDer;
402 auto fnum = make_shared<ThreePointsNumericalDerivative>(f);
403
404 ParameterList plsm = optopt.parameters.getCommonParametersWith(lik->getSubstitutionModelParameters());
405 vNameDer = plsm.getParameterNames();
406
407 ParameterList plrd = optopt.parameters.getCommonParametersWith(lik->getRateDistributionParameters());
408
409 vector<string> vNameDer2 = plrd.getParameterNames();
410
411 vNameDer.insert(vNameDer.begin(), vNameDer2.begin(), vNameDer2.end());
412 fnum->setParametersToDerivate(vNameDer);
413
414 desc->addOptimizer("Rate & model distribution parameters", make_shared<BfgsMultiDimensions>(fnum), vNameDer, 1, MetaOptimizerInfos::IT_TYPE_FULL);
415 poptimizer = make_unique<MetaOptimizer>(fnum, std::move(desc), optopt.nstep);
416 }
417 else
418 throw Exception("OptimizationTools::optimizeNumericalParameters. Unknown optimization method: " + optopt.optMethodModel);
419
420 poptimizer->setVerbose(optopt.verbose);
421 poptimizer->setProfiler(optopt.profiler);
422 poptimizer->setMessageHandler(optopt.messenger);
423 poptimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
424 poptimizer->getStopCondition()->setTolerance(optopt.tolerance);
425
426 // Optimize TreeLikelihood function:
427 poptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
428 auto nanListener = make_shared<NaNListener>(poptimizer.get(), lik.get());
429 poptimizer->addOptimizationListener(nanListener);
430 if (optopt.listener)
431 poptimizer->addOptimizationListener(optopt.listener);
432
433 poptimizer->init(pl);
434 poptimizer->optimize();
435
436 // optopt.parameters.setAllParametersValues(poptimizer->getParameters());
437
438 if (optopt.verbose > 0)
440
441 // We're done.
442 uint nb = poptimizer->getNumberOfEvaluations();
443 return nb;
444}
445
446
447/************************************************************/
448
450 std::shared_ptr<PhyloLikelihoodInterface> lik,
451 const OptimizationOptions& optopt)
452{
453 shared_ptr<SecondOrderDerivable> f = lik;
454 ParameterList pl = optopt.parameters;
455
456 // Shall we use a molecular clock constraint on branch lengths?
457 // unique_ptr<GlobalClockTreeLikelihoodFunctionWrapper> fclock;
458 // if (useClock)
459 // {
460 // fclock.reset(new GlobalClockTreeLikelihoodFunctionWrapper(lik));
461 // f = fclock.get();
462 // if (verbose > 0)
463 // ApplicationTools::displayResult("Log-likelihood after adding clock", -lik->getLogLikelihood());
464
465 // // Reset parameters to use new branch lengths. WARNING! 'old' branch parameters do not exist anymore and have been replaced by heights
466 // pl = fclock->getParameters().getCommonParametersWith(parameters);
467 // pl.addParameters(fclock->getHeightParameters());
468 // }
469
470 // Shall we reparametrize the function to remove constraints?
471 shared_ptr<SecondOrderDerivable> frep;
472 if (optopt.reparametrization)
473 {
475 f = frep;
476
477 // Reset parameters to remove constraints:
478 pl = f->getParameters().createSubList(pl.getParameterNames());
479 }
480
481 // Build optimizer:
482 unique_ptr<OptimizerInterface> optimizer;
483 shared_ptr<AbstractNumericalDerivative> fnum;
484
486 {
487 fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
488 fnum->setInterval(0.0000001);
489 optimizer = make_unique<ConjugateGradientMultiDimensions>(fnum);
490 }
491 else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
492 {
493 fnum = make_shared<ThreePointsNumericalDerivative>(f);
494 fnum->setInterval(0.0001);
495 optimizer = make_unique<PseudoNewtonOptimizer>(fnum);
496 }
497 else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
498 {
499 fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
500 fnum->setInterval(0.0001);
501 optimizer = make_unique<BfgsMultiDimensions>(fnum);
502 }
503 else
504 throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optopt.optMethodDeriv);
505
506 // Numerical derivatives:
507 // Variables not derivatived in Likelihood DF but in numerical way
508 ParameterList tmp = lik->getParameters();
509
510 // if (useClock)
511 // tmp.addParameters(fclock->getHeightParameters());
512
513 fnum->setParametersToDerivate(tmp.getParameterNames());
514
515 optimizer->setVerbose(optopt.verbose);
516 optimizer->setProfiler(optopt.profiler);
517 optimizer->setMessageHandler(optopt.messenger);
518 optimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
519 optimizer->getStopCondition()->setTolerance(optopt.tolerance);
520
521 // Optimize TreeLikelihood function:
522 optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
523 auto nanListener = make_shared<NaNListener>(optimizer.get(), lik.get());
524 optimizer->addOptimizationListener(nanListener);
525 if (optopt.listener)
526 optimizer->addOptimizationListener(optopt.listener);
527
528 optimizer->init(pl);
529 optimizer->optimize();
530
531 if (optopt.verbose > 0)
533
534 // We're done.
535 return optimizer->getNumberOfEvaluations();
536}
537
538/************************************************************/
539
541 std::shared_ptr<SingleProcessPhyloLikelihood> lik,
542 const OptimizationOptions& optopt)
543{
544 shared_ptr<SecondOrderDerivable> f = lik;
545 ParameterList pl = optopt.parameters;
546 if (optopt.reparametrization)
547 {
548 // Shall we reparametrize the function to remove constraints?
549 f = make_shared<ReparametrizationDerivableSecondOrderWrapper>(f, optopt.parameters);
550
551 // Reset parameters to remove constraints:
552 pl = f->getParameters().createSubList(optopt.parameters.getParameterNames());
553 }
554
555 // Build optimizer:
556 shared_ptr<AbstractNumericalDerivative> fnum;
557 unique_ptr<OptimizerInterface> optimizer;
558
560 {
561 lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.00001, NumericalDerivativeType::ThreePoints);
562
563 fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
564 fnum->setInterval(0.0000001);
565 optimizer.reset(new ConjugateGradientMultiDimensions(fnum));
566 }
567 else if (optopt.optMethodDeriv == OPTIMIZATION_NEWTON)
568 {
569 lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.0001, NumericalDerivativeType::FivePoints);
570 fnum = make_shared<ThreePointsNumericalDerivative>(f);
571 fnum->setInterval(0.0001);
572 optimizer.reset(new PseudoNewtonOptimizer(fnum));
573 }
574 else if (optopt.optMethodDeriv == OPTIMIZATION_BFGS)
575 {
576 lik->likelihoodCalculationSingleProcess().setNumericalDerivateConfiguration(0.0001, NumericalDerivativeType::ThreePoints);
577 fnum = make_shared<TwoPointsNumericalDerivative>(dynamic_pointer_cast<FirstOrderDerivable>(f));
578 fnum->setInterval(0.0001);
579 optimizer.reset(new BfgsMultiDimensions(fnum));
580 }
581 else
582 throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optopt.optMethodDeriv);
583
584
585 // Variables not derivatived in Likelihood DF but in numerical way
586 ParameterList tmp = f->getParameters();
587
588 fnum->setParametersToDerivate(tmp.getParameterNames());
589
590 optimizer->setVerbose(optopt.verbose);
591 optimizer->setProfiler(optopt.profiler);
592 optimizer->setMessageHandler(optopt.messenger);
593 optimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
594 optimizer->getStopCondition()->setTolerance(optopt.tolerance);
595
596 // Optimize TreeLikelihood function:
597 optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
598 auto nanListener = make_shared<NaNListener>(optimizer.get(), lik.get());
599 optimizer->addOptimizationListener(nanListener);
600 if (optopt.listener)
601 optimizer->addOptimizationListener(optopt.listener);
602
603 optimizer->init(pl);
604 optimizer->optimize();
605
606 if (optopt.verbose > 0)
608
609 // We're done.
610 return optimizer->getNumberOfEvaluations();
611}
612
613
614/******************************************************************************/
615
616std::string OptimizationTools::DISTANCEMETHOD_INIT = "init";
617std::string OptimizationTools::DISTANCEMETHOD_PAIRWISE = "pairwise";
618std::string OptimizationTools::DISTANCEMETHOD_ITERATIONS = "iterations";
619
620/******************************************************************************/
621
623 DistanceEstimation& estimationMethod,
624 const ParameterList& parametersToIgnore,
625 const std::string& param,
626 unsigned int verbose)
627{
628 if (param != DISTANCEMETHOD_PAIRWISE && param != DISTANCEMETHOD_INIT)
629 throw Exception("OptimizationTools::estimateDistanceMatrix. Invalid option param=" + param + ".");
630 estimationMethod.resetAdditionalParameters();
631 estimationMethod.setVerbose(verbose);
632 if (param == DISTANCEMETHOD_PAIRWISE)
633 {
634 ParameterList tmp = estimationMethod.process().getSubstitutionModelParameters(true);
635 tmp.addParameters(estimationMethod.process().getRateDistributionParameters(true));
636 tmp.addParameters(estimationMethod.process().getRootFrequenciesParameters(true));
637 tmp.deleteParameters(parametersToIgnore.getParameterNames());
638 estimationMethod.setAdditionalParameters(tmp);
639 }
640 // Compute matrice:
641 if (verbose > 0)
642 ApplicationTools::displayTask("Estimating distance matrix", true);
643 estimationMethod.computeMatrix();
644 auto matrix = estimationMethod.getMatrix();
645
646 if (verbose > 0)
648
649 return matrix;
650}
651
652/******************************************************************************/
653
654unique_ptr<TreeTemplate<Node>> OptimizationTools::buildDistanceTree(
655 DistanceEstimation& estimationMethod,
656 AgglomerativeDistanceMethodInterface& reconstructionMethod,
657 const std::string& param,
658 OptimizationOptions& optopt)
659{
660 estimationMethod.resetAdditionalParameters();
661 estimationMethod.setVerbose(optopt.verbose);
662
663 if (param == DISTANCEMETHOD_PAIRWISE)
664 {
665 ParameterList tmp = estimationMethod.process().getSubstitutionModelParameters(true);
666 tmp.addParameters(estimationMethod.process().getRateDistributionParameters(true));
667 tmp.addParameters(estimationMethod.process().getRootFrequenciesParameters(true));
668 tmp = tmp.getCommonParametersWith(optopt.parameters);
669 estimationMethod.setAdditionalParameters(tmp);
670 }
671
672 unique_ptr<TreeTemplate<Node>> tree = nullptr;
673
674 bool test = true;
675
676 auto process = std::shared_ptr<SubstitutionProcessInterface>(estimationMethod.process().clone());
677 auto autoProc = dynamic_pointer_cast<AutonomousSubstitutionProcessInterface>(process);
678 auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process);
679 if (!autoProc && !procMb)
680 throw Exception("OptimizationTools::buildDistanceTree : unknown process type. Ask developpers.");
681
682 // Vector of successive trees, to return the best one
683 std::vector<unique_ptr<TreeTemplate<Node>>> vTree;
684 std::vector<double> vLik;
685
686 size_t nstep = 0;
687 while (test && nstep < 100)
688 {
689 // Compute matrice:
690 if (optopt.verbose > 0)
691 ApplicationTools::displayTask("Estimating distance matrix", true);
692
693 estimationMethod.computeMatrix();
694 auto matrix = estimationMethod.getMatrix();
695
696 if (estimationMethod.getVerbose() > 0)
698
699 // Compute tree:
700 if (matrix->size() == 2)
701 {
702 // Special case, there is only one possible tree:
703 Node* n1 = new Node(0);
704 Node* n2 = new Node(1, matrix->getName(0));
705 n2->setDistanceToFather((*matrix)(0, 0) / 2.);
706 Node* n3 = new Node(2, matrix->getName(1));
707 n3->setDistanceToFather((*matrix)(0, 0) / 2.);
708 n1->addSon(n2);
709 n1->addSon(n3);
710 return unique_ptr<TreeTemplate<Node>>(new TreeTemplate<Node>(n1));
711 }
712
713 if (optopt.verbose > 0)
714 ApplicationTools::displayTask("Building tree");
715
716 reconstructionMethod.setDistanceMatrix(*matrix);
717 reconstructionMethod.computeTree();
718
719 tree = make_unique<TreeTemplate<Node>>(reconstructionMethod.tree());
720
721 vTree.push_back(std::move(tree));
722
723 if (estimationMethod.getVerbose() > 0)
725
726 size_t nbTree = vTree.size();
727 const auto& ltree = vTree[nbTree - 1];
728
729 if (vTree.size() > 1)
730 {
731 for (size_t iT = 0; iT < nbTree - 1; iT++)
732 {
733 const auto& pTree = vTree[iT];
734 int rf = TreeTools::robinsonFouldsDistance(*pTree, *ltree);
735 // if (optopt.verbose > 0)
736 ApplicationTools::displayResult("Topo. distance with iteration " + TextTools::toString(iT + 1), TextTools::toString(rf));
737 test &= (rf != 0);
738 if (!test)
739 break;
740 }
741 }
742
743 if ((param != DISTANCEMETHOD_ITERATIONS) || !test)
744 break; // Ends here.
745
746 // Now, re-estimate parameters:
747 Context context;
748
749 auto phyloTree = make_shared<ParametrizablePhyloTree>(*PhyloTreeTools::buildFromTreeTemplate(*ltree));
750 if (autoProc)
751 autoProc->setPhyloTree(*phyloTree);
752 else if (procMb)
753 {
754 auto& coll = procMb->collection();
755 size_t maxTNb = procMb->getTreeNumber();
756 coll.replaceTree(phyloTree, maxTNb);
757 }
758
759 auto lik = make_shared<LikelihoodCalculationSingleProcess>(context, estimationMethod.getData(), process);
760 auto tl = make_shared<SingleProcessPhyloLikelihood>(context, lik);
761
762 vLik.push_back(tl->getValue());
763
764 // hide opt verbose
765 optopt.verbose = estimationMethod.getVerbose() > 0 ? uint(estimationMethod.getVerbose() - 1) : 0;
766
767 optimizeNumericalParameters(tl, optopt);
768 process->matchParametersValues(tl->getParameters());
769
770 estimationMethod.matchParametersValues(process->getParameters());
771
772 auto trtemp = std::make_shared<ParametrizablePhyloTree>(*tl->tree());
773 const PhyloTree trt2(*trtemp);
774 tree.reset(TreeTemplateTools::buildFromPhyloTree(trt2).release());
775
776 if (optopt.verbose > 0)
777 {
778 auto tmp = process->getSubstitutionModelParameters(true);
779 for (unsigned int i = 0; i < tmp.size(); ++i)
780 {
781 ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
782 }
783 tmp = process->getRootFrequenciesParameters(true);
784 for (unsigned int i = 0; i < tmp.size(); ++i)
785 {
786 ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
787 }
788 tmp = process->getRateDistributionParameters(true);
789 for (unsigned int i = 0; i < tmp.size(); ++i)
790 {
791 ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
792 }
793 }
794 nstep++;
795 }
796
797 size_t posM = static_cast<size_t>(std::distance(vLik.begin(), std::min_element(vLik.begin(), vLik.end())));
798
799 return std::move(vTree[posM]);
800}
801
802/******************************************************************************/
Interface for agglomerative distance methods.
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static void displayTask(const std::string &text, bool eof=false)
static std::shared_ptr< OutputStream > message
static void displayError(const std::string &text)
static void displayTaskDone()
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
static std::string CONSTRAINTS_AUTO
Context for dataflow node construction.
Definition: DataFlow.h:527
void resetAdditionalParameters()
Reset all additional parameters.
void computeMatrix()
Perform the distance computation.
bool matchParametersValues(const ParameterList &parameters)
std::unique_ptr< DistanceMatrix > getMatrix() const
Get the distance matrix.
std::shared_ptr< const AlignmentDataInterface > getData() const
const SubstitutionProcessInterface & process() const
void setVerbose(size_t verbose)
void setAdditionalParameters(const ParameterList &parameters)
Specify a list of parameters to be estimated.
virtual void setDistanceMatrix(const DistanceMatrix &matrix)=0
Set the distance matrix to use.
virtual void computeTree()=0
Perform the clustering.
virtual const Tree & tree() const =0
static bool fileExists(const std::string &filename)
static std::vector< std::string > putStreamIntoVectorOfStrings(std::istream &input)
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static std::string IT_TYPE_STEP
static std::string IT_TYPE_FULL
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:386
std::shared_ptr< OutputStream > profiler
std::shared_ptr< OutputStream > messenger
std::shared_ptr< OptimizationListener > listener
static std::string DISTANCEMETHOD_PAIRWISE
static std::unique_ptr< DistanceMatrix > estimateDistanceMatrix(DistanceEstimation &estimationMethod, const ParameterList &parametersToIgnore, const std::string &param=DISTANCEMETHOD_INIT, unsigned int verbose=0)
Estimate a distance matrix using maximum likelihood.
static std::string OPTIMIZATION_NEWTON
static unsigned int optimizeNumericalParameters2(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BFGS
static unsigned int optimizeNumericalParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string DISTANCEMETHOD_ITERATIONS
static std::string DISTANCEMETHOD_INIT
static std::unique_ptr< TreeTemplate< Node > > buildDistanceTree(DistanceEstimation &estimationMethod, AgglomerativeDistanceMethodInterface &reconstructionMethod, const std::string &param, OptimizationOptions &optopt)
Build a tree using a distance method.
static std::string OPTIMIZATION_BRENT
static std::string OPTIMIZATION_GRADIENT
virtual bool hasParameter(const std::string &name) const
virtual void addParameters(const ParameterList &params)
virtual ParameterList getCommonParametersWith(const ParameterList &params) const
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void reset()
virtual const Parameter & parameter(const std::string &name) const
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual void setParameter(size_t index, const Parameter &param)
virtual size_t whichParameterHasName(const std::string &name) const
virtual ParameterList createSubList(const std::vector< std::string > &names) const
virtual std::string parameter() const
virtual std::shared_ptr< const ConstraintInterface > getConstraint() const
virtual void setConstraint(std::shared_ptr< ConstraintInterface > constraint)
virtual const std::string & getName() const
virtual bool hasConstraint() const
static std::shared_ptr< PhyloTree > buildFromTreeTemplate(const TreeTemplate< Node > &treetemp)
This Optimizer implements Newton's algorithm for finding a minimum of a function. This is in fact a m...
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
virtual ParameterList getSubstitutionModelParameters(bool independent) const =0
Methods to retrieve the parameters of specific objects.
virtual ParameterList getRootFrequenciesParameters(bool independent) const =0
virtual SubstitutionProcessInterface * clone() const =0
virtual ParameterList getRateDistributionParameters(bool independent) const =0
static std::unique_ptr< TreeTemplate< Node > > buildFromPhyloTree(const PhyloTree &treetemp)
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static int robinsonFouldsDistance(const Tree &tr1, const Tree &tr2, bool checkNames=true, int *missing_in_tr2=NULL, int *missing_in_tr1=NULL)
Calculates the Robinson-Foulds topological distance between two trees.
Definition: TreeTools.cpp:841
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
std::string toString(T t)
Defines the basic types of data flow nodes.
double convert(const bpp::ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)