bpp-phyl3  3.0.0
PseudoNewtonOptimizer.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 /**************************************************************************/
6 
8 
11 #include <Bpp/Text/TextTools.h>
13 
17 
18 using namespace bpp;
19 
20 /**************************************************************************/
21 
23 {
24  return NumTools::abs<double>(
25  dynamic_cast<const PseudoNewtonOptimizer*>(optimizer_)->currentValue_ -
26  dynamic_cast<const PseudoNewtonOptimizer*>(optimizer_)->previousValue_);
27 }
28 
29 /**************************************************************************/
30 
32 {
33  return getCurrentTolerance() < tolerance_;
34 }
35 
36 /**************************************************************************/
37 
38 PseudoNewtonOptimizer::PseudoNewtonOptimizer(shared_ptr<SecondOrderDerivable> function) :
41  previousValue_(0),
42  n_(0),
43  params_(),
44  maxCorrection_(10),
45  useCG_(true)
46 {
47  setDefaultStopCondition_(make_shared<FunctionStopCondition>(this));
49 }
50 
51 /**************************************************************************/
52 
54 {
55  n_ = getParameters().size();
59 }
60 
61 /**************************************************************************/
62 
64 {
65  ParameterList* bckPoint = 0;
66  if (updateParameters())
67  bckPoint = new ParameterList(getFunction()->getParameters());
68  double newValue = 0;
69  // Compute derivative at current point:
70  std::vector<double> movements(n_);
71  ParameterList newPoint = getParameters();
72 
73  for (size_t i = 0; i < n_; i++)
74  {
75  double firstOrderDerivative = secondOrderDerivableFunction().getFirstOrderDerivative(params_[i]);
76  double secondOrderDerivative = secondOrderDerivableFunction().getSecondOrderDerivative(params_[i]);
77  if (secondOrderDerivative == 0)
78  {
79  movements[i] = 0;
80  }
81  else if (secondOrderDerivative < 0)
82  {
83  printMessage("!!! Second order derivative is negative for parameter " + params_[i] + "(" + TextTools::toString(getParameters()[i].getValue()) + "). Moving in the other direction.");
84  // movements[i] = 0; // We want to reach a minimum, not a maximum!
85  // My personal improvement:
86  movements[i] = -firstOrderDerivative / secondOrderDerivative;
87  }
88  else
89  movements[i] = firstOrderDerivative / secondOrderDerivative;
90  if (std::isnan(movements[i]))
91  {
92  printMessage("!!! Non derivable point at " + params_[i] + ". No move performed. (f=" + TextTools::toString(currentValue_) + ", d1=" + TextTools::toString(firstOrderDerivative) + ", d2=" + TextTools::toString(secondOrderDerivative) + ").");
93  movements[i] = 0; // Either first or second order derivative is infinity. This may happen when the function == inf at this point.
94  }
95  // DEBUG:
96  // cerr << "PN[" << params_[i] << "]=" << getParameters().parameter(params_[i]).getValue() << "\t" << movements[i] << "\t " << firstOrderDerivative << "\t" << secondOrderDerivative << endl;
97  newPoint[i].setValue(getParameters()[i].getValue() - movements[i]);
98  // Correct the movement in case of constraint (this is used in case of Felsenstein-Churchill correction:
99  movements[i] = getParameters()[i].getValue() - newPoint[i].getValue();
100  }
101  newValue = getFunction()->f(newPoint);
102 
103  // Check newValue:
104  unsigned int count = 0;
105  while ((count < maxCorrection_) && ((newValue > currentValue_ + getStopCondition()->getTolerance()) || std::isnan(newValue)))
106  {
107  // Restore previous point (all parameters in case of global constraint):
108  if ((count == 0) && updateParameters())
109  getFunction()->setParameters(*bckPoint);
110 
111  if (!(useCG_ && (count == 3)))
112  {
113  printMessage("!!! Function at new point is greater than at current point: " + TextTools::toString(newValue) + ">" + TextTools::toString(currentValue_) + ". Applying Felsenstein-Churchill correction: " + TextTools::toString(count));
114 
115  for (size_t i = 0; i < movements.size(); i++)
116  {
117  movements[i] = movements[i] / 2;
118  newPoint[i].setValue(getParameters()[i].getValue() - movements[i]);
119  }
120  newValue = getFunction()->f(newPoint);
121  }
122  else
123  {
124  printMessage("!!! Felsenstein-Churchill correction applied too many times.");
125  printMessage("Use conjugate gradients optimization.");
127  ConjugateGradientMultiDimensions opt(dynamic_pointer_cast<FirstOrderDerivable>(function_));
129  opt.setProfiler(getProfiler());
131  opt.setVerbose(0);
132  double tol = std::max(getStopCondition()->getCurrentTolerance() / 2., getStopCondition()->getTolerance());
133  opt.getStopCondition()->setTolerance(tol);
135  getFunction()->setParameters(getParameters());
136  opt.init(getParameters());
137  opt.optimize();
138  newPoint = opt.getParameters();
139  newValue = opt.getFunctionValue();
140 
141  if (newValue > currentValue_ + tol)
142  {
143  printMessage("!!! Conjugate gradient method failed to improve likelihood.");
144  printMessage("Back to Felsenstein-Churchill method.");
145  }
146  }
147  count++;
148  }
149 
150  if (newValue > currentValue_ + getStopCondition()->getTolerance())
151  {
152  printMessage("PseudoNewtonOptimizer::doStep. Value could not be ameliorated!");
153  newValue = currentValue_;
154  }
155  else
156  {
158  secondOrderDerivableFunction().setParameters(newPoint); // Compute derivatives for this point
159 
162  getParameters_() = newPoint;
163  }
164 
165  if (updateParameters())
166  delete bckPoint;
167  return newValue;
168 }
169 
170 /**************************************************************************/
const OptimizerInterface * optimizer_
std::shared_ptr< OptimizationStopCondition > getStopCondition() override
bool updateParameters() const override
void setProfiler(std::shared_ptr< OutputStream > profiler) override
std::shared_ptr< FunctionInterface > function_
void init(const ParameterList &params) override
void printMessage(const std::string &message)
std::string getConstraintPolicy() const override
void setConstraintPolicy(const std::string &constraintPolicy) override
std::shared_ptr< OutputStream > getMessageHandler() const override
const FunctionInterface & function() const override
std::shared_ptr< OptimizationStopCondition > getDefaultStopCondition() override
void setMessageHandler(std::shared_ptr< OutputStream > mh) override
unsigned int nbEvalMax_
double getFunctionValue() const override
void setStopCondition(std::shared_ptr< OptimizationStopCondition > stopCondition) override
std::shared_ptr< OutputStream > getProfiler() const override
ParameterList & getParameters_()
void setDefaultStopCondition_(std::shared_ptr< OptimizationStopCondition > osc)
std::shared_ptr< const FunctionInterface > getFunction() const override
const ParameterList & getParameters() const override
void setMaximumNumberOfEvaluations(unsigned int max) override
void setVerbose(unsigned int v) override
virtual double getFirstOrderDerivative(const std::string &variable) const=0
virtual void setParameters(const ParameterList &parameters)=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
This Optimizer implements Newton's algorithm for finding a minimum of a function. This is in fact a m...
PseudoNewtonOptimizer(std::shared_ptr< SecondOrderDerivable > function)
std::vector< std::string > params_
const SecondOrderDerivable & secondOrderDerivableFunction() const
void doInit(const ParameterList &params)
virtual void enableSecondOrderDerivatives(bool yn)=0
virtual double getSecondOrderDerivative(const std::string &variable) const=0
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.