bpp-phyl3 3.0.0
DistanceEstimation.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../SitePatterns.h"
6#include "../Tree/Tree.h"
7#include "../PatternTools.h"
8#include "../Io/Newick.h"
9
10#include "DistanceEstimation.h"
11
12// From bpp-core:
15
16// From bpp-seq:
17#include <Bpp/Seq/SiteTools.h>
18#include <Bpp/Seq/Sequence.h>
21
22// bpp-phyl
24
25using namespace bpp;
26
27// From the STL:
28#include <vector>
29#include <string>
30#include <iostream>
31#include <fstream>
32
33using namespace std;
34
35/******************************************************************************/
36
37void DistanceEstimation::init_()
38{
39 auto desc = make_unique<MetaOptimizerInfos>();
40
41 // Br Len optimizer
42 std::vector<std::string> name;
43 auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process_);
44 if (!procMb)
45 {
46 name.push_back("BrLen0");
47 name.push_back("BrLen1");
48 }
49 else
50 {
51 const auto& vTn = procMb->getCollection()->getTreeNumbers();
52 numProc_ = *std::max_element(vTn.begin(), vTn.end()) + 1;
53
54 name.push_back("BrLen0_" + TextTools::toString(numProc_));
55 name.push_back("BrLen1_" + TextTools::toString(numProc_));
56 }
57
58 desc->addOptimizer("Branch length", std::make_shared<PseudoNewtonOptimizer>(nullptr), name, 2, MetaOptimizerInfos::IT_TYPE_FULL);
59
60 // Process optimizer
61 ParameterList tmp = process_->getSubstitutionModelParameters(true);
62 tmp.addParameters(process_->getRateDistributionParameters(true));
63 tmp.addParameters(process_->getRootFrequenciesParameters(true));
64 desc->addOptimizer("substitution model, root and rate distribution", std::make_shared<SimpleMultiDimensions>(nullptr), tmp.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
65
66 defaultOptimizer_ = std::make_shared<MetaOptimizer>(nullptr, std::move(desc));
67 defaultOptimizer_->setMessageHandler(nullptr);
68 defaultOptimizer_->setProfiler(nullptr);
69 defaultOptimizer_->getStopCondition()->setTolerance(0.0001);
70 optimizer_ = dynamic_pointer_cast<OptimizerInterface>(defaultOptimizer_);
71}
72
74{
75 Context context;
76
77 size_t n = sites_->getNumberOfSequences();
78 vector<string> names = sites_->getSequenceNames();
79 dist_ = std::make_shared<DistanceMatrix>(names);
80 optimizer_->setVerbose(static_cast<unsigned int>(max(static_cast<int>(verbose_) - 2, 0)));
81
82 Newick reader;
83
84 auto autoProc = dynamic_pointer_cast<AutonomousSubstitutionProcessInterface>(process_);
85 auto procMb = dynamic_pointer_cast<SubstitutionProcessCollectionMember>(process_);
86 if (!autoProc && !procMb)
87 throw Exception("DistanceMatrix::computeMatrix : unknown process type. Ask developpers.");
88
89 size_t treeN = 0;
90 if (procMb)
91 treeN = procMb->getTreeNumber();
92
93 for (size_t i = 0; i < n; ++i)
94 {
95 (*dist_)(i, i) = 0;
96 if (verbose_ == 1)
97 {
98 ApplicationTools::displayGauge(i, n - 1, '=');
99 }
100 for (size_t j = i + 1; j < n; ++j)
101 {
102 if (verbose_ > 1)
103 {
104 ApplicationTools::displayGauge(j - i - 1, n - i - 2, '=');
105 }
106
107 auto phyloTree = make_shared<bpp::ParametrizablePhyloTree>(*reader.parenthesisToPhyloTree("(" + names[j] + ":0.01," + names[i] + ":0.01);", false, "", false, false));
108
109 if (autoProc)
110 autoProc->setPhyloTree(*phyloTree);
111 else if (procMb)
112 {
113 auto& coll = procMb->collection();
114 if (!coll.hasTreeNumber(numProc_))
115 coll.addTree(phyloTree, numProc_);
116 else
117 coll.replaceTree(phyloTree, numProc_);
118 procMb->setTreeNumber(numProc_, false);
119 }
120
121 auto lik = std::make_shared<LikelihoodCalculationSingleProcess>(context, sites_, process_);
122 auto llh = std::make_shared<SingleProcessPhyloLikelihood>(context, lik);
123
124 // Optimization:
125 optimizer_->setFunction(llh);
126 optimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
127 ParameterList params = llh->getBranchLengthParameters();
129
130 optimizer_->init(params);
131 optimizer_->optimize();
132
133 // Store results:
134 if (autoProc)
135 (*dist_)(i, j) = (*dist_)(j, i) = llh->getParameterValue("BrLen0") + llh->getParameterValue("BrLen1");
136 else
137 (*dist_)(i, j) = (*dist_)(j, i) = llh->getParameterValue("BrLen0_" + TextTools::toString(numProc_)) + llh->getParameterValue("BrLen1_" + TextTools::toString(numProc_));
138 }
140 ApplicationTools::message->endLine();
141 }
142
143 // set back correct number for process, if needed
144 if (procMb) // set back correct process number for pro
145 procMb->setTreeNumber(treeN, false);
146}
147
148/******************************************************************************/
static std::shared_ptr< OutputStream > message
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static std::string CONSTRAINTS_AUTO
Context for dataflow node construction.
Definition: DataFlow.h:527
std::shared_ptr< DistanceMatrix > dist_
std::shared_ptr< const AlignmentDataInterface > sites_
void computeMatrix()
Perform the distance computation.
std::shared_ptr< MetaOptimizer > defaultOptimizer_
std::shared_ptr< SubstitutionProcessInterface > process_
std::shared_ptr< OptimizerInterface > optimizer_
static std::string IT_TYPE_STEP
static std::string IT_TYPE_FULL
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
std::unique_ptr< PhyloTree > parenthesisToPhyloTree(const std::string &description, bool bootstrap=false, const std::string &propertyName="", bool withId=false, bool verbose=false) const
Definition: Newick.cpp:336
virtual void addParameters(const ParameterList &params)
virtual std::vector< std::string > getParameterNames() const
std::string toString(T t)
Defines the basic types of data flow nodes.