bpp-phyl3 3.0.0
NeighborJoining.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../Tree/Tree.h"
6#include "NeighborJoining.h"
7
8using namespace bpp;
9
10#include <cmath>
11#include <iostream>
12
13using namespace std;
14
15std::vector<size_t> NeighborJoining::getBestPair()
16{
17 for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
18 {
19 size_t id = i->first;
20 sumDist_[id] = 0;
21 for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++)
22 {
23 size_t jd = j->first;
24 sumDist_[id] += matrix_(id, jd);
25 }
26 }
27
28 vector<size_t> bestPair(2);
29 double critMax = std::log(0.);
30 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
31 {
32 size_t id = i->first;
33 map<size_t, Node*>::iterator j = i;
34 j++;
35 for ( ; j != currentNodes_.end(); j++)
36 {
37 size_t jd = j->first;
38 double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * matrix_(id, jd);
39 // cout << "\t" << id << "\t" << jd << "\t" << crit << endl;
40 if (crit > critMax)
41 {
42 critMax = crit;
43 bestPair[0] = id;
44 bestPair[1] = jd;
45 }
46 }
47 }
48
49 if (critMax == std::log(0.))
50 {
51 throw Exception("Unexpected error: no maximum criterium found.");
52 }
53 return bestPair;
54}
55
56std::vector<double> NeighborJoining::computeBranchLengthsForPair(const std::vector<size_t>& pair)
57{
58 double ratio = (sumDist_[pair[0]] - sumDist_[pair[1]]) / static_cast<double>(currentNodes_.size() - 2);
59 vector<double> d(2);
61 {
62 d[0] = std::max(.5 * (matrix_(pair[0], pair[1]) + ratio), 0.);
63 d[1] = std::max(.5 * (matrix_(pair[0], pair[1]) - ratio), 0.);
64 }
65 else
66 {
67 d[0] = .5 * (matrix_(pair[0], pair[1]) + ratio);
68 d[1] = .5 * (matrix_(pair[0], pair[1]) - ratio);
69 }
70 return d;
71}
72
73double NeighborJoining::computeDistancesFromPair(const std::vector<size_t>& pair, const std::vector<double>& branchLengths, size_t pos)
74{
75 return
77 std::max(.5 * (matrix_(pair[0], pos) - branchLengths[0] + matrix_(pair[1], pos) - branchLengths[1]), 0.)
78 : .5 * (matrix_(pair[0], pos) - branchLengths[0] + matrix_(pair[1], pos) - branchLengths[1]);
79}
80
82{
83 Node* root = new Node(idRoot);
84 map<size_t, Node* >::iterator it = currentNodes_.begin();
85 size_t i1 = it->first;
86 Node* n1 = it->second;
87 it++;
88 size_t i2 = it->first;
89 Node* n2 = it->second;
90 if (currentNodes_.size() == 2)
91 {
92 // Rooted
93 double d = matrix_(i1, i2) / 2;
94 root->addSon(n1);
95 root->addSon(n2);
98 }
99 else
100 {
101 // Unrooted
102 it++;
103 size_t i3 = it->first;
104 Node* n3 = it->second;
105 double d1 = positiveLengths_ ?
106 std::max(matrix_(i1, i2) + matrix_(i1, i3) - matrix_(i2, i3), 0.)
107 : matrix_(i1, i2) + matrix_(i1, i3) - matrix_(i2, i3);
108 double d2 = positiveLengths_ ?
109 std::max(matrix_(i2, i1) + matrix_(i2, i3) - matrix_(i1, i3), 0.)
110 : matrix_(i2, i1) + matrix_(i2, i3) - matrix_(i1, i3);
111 double d3 = positiveLengths_ ?
112 std::max(matrix_(i3, i1) + matrix_(i3, i2) - matrix_(i1, i2), 0.)
113 : matrix_(i3, i1) + matrix_(i3, i2) - matrix_(i1, i2);
114 root->addSon(n1);
115 root->addSon(n2);
116 root->addSon(n3);
117 n1->setDistanceToFather(d1 / 2.);
118 n2->setDistanceToFather(d2 / 2.);
119 n3->setDistanceToFather(d3 / 2.);
120 }
121 tree_.reset(new TreeTemplate<Node>(root));
122}
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths.
std::vector< double > sumDist_
void finalStep(int idRoot)
Method called when there ar eonly three remaining node to agglomerate, and creates the root node of t...
std::vector< double > computeBranchLengthsForPair(const std::vector< size_t > &pair)
Compute the branch lengths for two nodes to agglomerate.
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:386
The phylogenetic tree class.
Definition: TreeTemplate.h:59
Defines the basic types of data flow nodes.