bpp-phyl3 3.0.0
BioNJ.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6
7#include "../Tree/Tree.h"
8#include "BioNJ.h"
9
10using namespace bpp;
11
12// From the STL:
13#include <cmath>
14#include <iostream>
15
16using namespace std;
17
18double BioNJ::computeDistancesFromPair(const vector<size_t>& pair, const vector<double>& branchLengths, size_t pos)
19{
20 return positiveLengths_ ?
21 std::max(lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]), 0.)
22 : lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]);
23}
24
26{
27 // Initialization:
28 for (size_t i = 0; i < matrix_.size(); i++)
29 {
30 currentNodes_[i] = getLeafNode(static_cast<int>(i), matrix_.getName(i));
31 }
32 int idNextNode = static_cast<int>(matrix_.size());
33 vector<double> newDist(matrix_.size());
34 vector<double> newVar(matrix_.size());
35
36 // Build tree:
37 while (currentNodes_.size() > (rootTree_ ? 2 : 3))
38 {
39 if (verbose_)
41 vector<size_t> bestPair = getBestPair();
42 vector<double> distances = computeBranchLengthsForPair(bestPair);
43 Node* best1 = currentNodes_[bestPair[0]];
44 Node* best2 = currentNodes_[bestPair[1]];
45 // Distances may be used by getParentNodes (PGMA for instance).
46 best1->setDistanceToFather(distances[0]);
47 best2->setDistanceToFather(distances[1]);
48 Node* parent = getParentNode(idNextNode++, best1, best2);
49 // compute lambda
50 lambda_ = 0;
51 if (variance_(bestPair[0], bestPair[1]) == 0)
52 lambda_ = .5;
53 else
54 {
55 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
56 {
57 size_t id = i->first;
58 if (id != bestPair[0] && id != bestPair[1])
59 lambda_ += (variance_(bestPair[1], id) - variance_(bestPair[0], id));
60 }
61 double div = 2 * static_cast<double>(currentNodes_.size() - 2) * variance_(bestPair[0], bestPair[1]);
62 lambda_ /= div;
63 lambda_ += .5;
64 }
65 if (lambda_ < 0.)
66 lambda_ = 0.;
67 if (lambda_ > 1.)
68 lambda_ = 1.;
69
70 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
71 {
72 size_t id = i->first;
73 if (id != bestPair[0] && id != bestPair[1])
74 {
75 newDist[id] = computeDistancesFromPair(bestPair, distances, id);
76 newVar[id] = lambda_ * variance_(bestPair[0], id) + (1 - lambda_) * variance_(bestPair[1], id) - lambda_ * (1 - lambda_) * variance_(bestPair[0], bestPair[1]);
77 }
78 else
79 {
80 newDist[id] = 0;
81 }
82 }
83 // Actualize currentNodes_:
84 currentNodes_[bestPair[0]] = parent;
85 currentNodes_.erase(bestPair[1]);
86 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
87 {
88 size_t id = i->first;
89 matrix_( bestPair[0], id) = matrix_(id, bestPair[0]) = newDist[id];
90 variance_(bestPair[0], id) = variance_(id, bestPair[0]) = newVar[id];
91 }
92 }
93 finalStep(idNextNode);
94}
virtual Node * getLeafNode(int id, const std::string &name)
Get a leaf node.
virtual Node * getParentNode(int id, Node *son1, Node *son2)
Get an inner node.
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths.
Definition: BioNJ.cpp:18
DistanceMatrix variance_
Definition: BioNJ.h:25
double lambda_
Definition: BioNJ.h:26
void computeTree()
Compute the tree corresponding to the distance matrix.
Definition: BioNJ.cpp:25
const std::string & getName(std::size_t i) const
std::size_t size() const
std::vector< size_t > getBestPair()
Get the best pair of nodes to agglomerate.
void finalStep(int idRoot)
Method called when there ar eonly three remaining node to agglomerate, and creates the root node of t...
std::vector< double > computeBranchLengthsForPair(const std::vector< size_t > &pair)
Compute the branch lengths for two nodes to agglomerate.
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
Defines the basic types of data flow nodes.