bpp-phyl3  3.0.0
DistanceEstimation.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../SitePatterns.h"
6 #include "../Tree/Tree.h"
7 #include "../PatternTools.h"
8 #include "../Io/Newick.h"
9 
10 #include "DistanceEstimation.h"
11 
12 // From bpp-core:
15 
16 // From bpp-seq:
17 #include <Bpp/Seq/SiteTools.h>
18 #include <Bpp/Seq/Sequence.h>
20 #include <Bpp/Seq/DistanceMatrix.h>
21 
22 // bpp-phyl
24 
25 using namespace bpp;
26 
27 // From the STL:
28 #include <vector>
29 #include <string>
30 #include <iostream>
31 #include <fstream>
32 
33 using namespace std;
34 
35 /******************************************************************************/
37 {
38  auto desc = make_unique<MetaOptimizerInfos>();
39 
40  // Br Len optimizer
41  std::vector<std::string> name;
42  auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process_);
43  if (!procMb)
44  {
45  name.push_back("BrLen0");
46  name.push_back("BrLen1");
47  }
48  else
49  {
50  const auto& vTn = procMb->getCollection()->getTreeNumbers();
51  numProc_ = *std::max_element(vTn.begin(),vTn.end())+1;
52 
53  name.push_back("BrLen0_"+TextTools::toString(numProc_));
54  name.push_back("BrLen1_"+TextTools::toString(numProc_));
55  }
56 
57  desc->addOptimizer("Branch length", std::make_shared<PseudoNewtonOptimizer>(nullptr), name, 2, MetaOptimizerInfos::IT_TYPE_FULL);
58 
59  // Process optimizer
60  ParameterList tmp = process_->getSubstitutionModelParameters(true);
61  tmp.addParameters(process_->getRateDistributionParameters(true));
62  tmp.addParameters(process_->getRootFrequenciesParameters(true));
63  desc->addOptimizer("substitution model, root and rate distribution", std::make_shared<SimpleMultiDimensions>(nullptr), tmp.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
64 
65  defaultOptimizer_ = std::make_shared<MetaOptimizer>(nullptr, std::move(desc));
66  defaultOptimizer_->setMessageHandler(nullptr);
67  defaultOptimizer_->setProfiler(nullptr);
68  defaultOptimizer_->getStopCondition()->setTolerance(0.0001);
69  optimizer_ = dynamic_pointer_cast<OptimizerInterface>(defaultOptimizer_);
70 }
71 
73 {
74  Context context;
75 
76  size_t n = sites_->getNumberOfSequences();
77  vector<string> names = sites_->getSequenceNames();
78  dist_ = std::shared_ptr<DistanceMatrix>(new DistanceMatrix(names));
79  optimizer_->setVerbose(static_cast<unsigned int>(max(static_cast<int>(verbose_) - 2, 0)));
80 
81  Newick reader;
82 
83  auto autoProc = dynamic_pointer_cast<AutonomousSubstitutionProcessInterface>(process_);
84  auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process_);
85  if (!autoProc && !procMb)
86  throw Exception("DistanceMatrix::computeMatrix : unknown process type. Ask developpers.");
87 
88  size_t treeN = 0;
89  if (procMb)
90  treeN = procMb->getTreeNumber();
91 
92  for (size_t i = 0; i < n; ++i)
93  {
94  (*dist_)(i, i) = 0;
95  if (verbose_ == 1)
96  {
97  ApplicationTools::displayGauge(i, n - 1, '=');
98  }
99  for (size_t j = i + 1; j < n; ++j)
100  {
101  if (verbose_ > 1)
102  {
103  ApplicationTools::displayGauge(j - i - 1, n - i - 2, '=');
104  }
105 
106  auto phyloTree = make_shared<bpp::ParametrizablePhyloTree>(*reader.parenthesisToPhyloTree("(" + names[j] + ":0.01," + names[i] + ":0.01);", false, "", false, false));
107 
108  if (autoProc)
109  autoProc->setPhyloTree(*phyloTree);
110  else
111  if (procMb)
112  {
113  auto& coll = procMb->collection();
114  if (!coll.hasTreeNumber(numProc_))
115  coll.addTree(phyloTree, numProc_);
116  else
117  coll.replaceTree(phyloTree, numProc_);
118  procMb->setTreeNumber(numProc_, false);
119  }
120 
121  auto lik = std::make_shared<LikelihoodCalculationSingleProcess>(context, sites_, process_);
122  auto llh = std::make_shared<SingleProcessPhyloLikelihood>(context, lik);
123 
124  // Optimization:
125  optimizer_->setFunction(llh);
126  optimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
127  ParameterList params = llh->getBranchLengthParameters();
128  params.addParameters(parameters_);
129 
130  optimizer_->init(params);
131  optimizer_->optimize();
132 
133  // Store results:
134  if (autoProc)
135  (*dist_)(i, j) = (*dist_)(j, i) = llh->getParameterValue("BrLen0") + llh->getParameterValue("BrLen1");
136  else
137  (*dist_)(i, j) = (*dist_)(j, i) = llh->getParameterValue("BrLen0_"+TextTools::toString(numProc_)) + llh->getParameterValue("BrLen1_"+TextTools::toString(numProc_));
138 
139  }
140  if (verbose_ > 1 && ApplicationTools::message)
141  ApplicationTools::message->endLine();
142  }
143 
144  // set back correct number for process, if needed
145  if (procMb) // set back correct process number for pro
146  procMb->setTreeNumber(treeN, false);
147 
148 }
149 
150 /******************************************************************************/
static std::shared_ptr< OutputStream > message
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static std::string CONSTRAINTS_AUTO
Context for dataflow node construction.
Definition: DataFlow.h:527
void computeMatrix()
Perform the distance computation.
static std::string IT_TYPE_STEP
static std::string IT_TYPE_FULL
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
std::unique_ptr< PhyloTree > parenthesisToPhyloTree(const std::string &description, bool bootstrap=false, const std::string &propertyName="", bool withId=false, bool verbose=false) const
Definition: Newick.cpp:336
virtual void addParameters(const ParameterList &params)
virtual std::vector< std::string > getParameterNames() const
std::string toString(T t)
Defines the basic types of data flow nodes.