bpp-phyl3 3.0.0
HierarchicalClustering.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../Tree/NodeTemplate.h"
7
8using namespace bpp;
9using namespace std;
10
11const string HierarchicalClustering::COMPLETE = "Complete";
12const string HierarchicalClustering::SINGLE = "Single";
13const string HierarchicalClustering::AVERAGE = "Average";
14const string HierarchicalClustering::MEDIAN = "Median";
15const string HierarchicalClustering::WARD = "Ward";
16const string HierarchicalClustering::CENTROID = "Centroid";
17
18vector<size_t> HierarchicalClustering::getBestPair()
19{
20 vector<size_t> bestPair(2);
21 double distMin = -std::log(0.);
22 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
23 {
24 size_t id = i->first;
25 map<size_t, Node*>::iterator j = i;
26 j++;
27 for ( ; j != currentNodes_.end(); j++)
28 {
29 size_t jd = j->first;
30 double dist = matrix_(id, jd);
31 if (dist < distMin)
32 {
33 distMin = dist;
34 bestPair[0] = id;
35 bestPair[1] = jd;
36 }
37 }
38 }
39 if (distMin == -std::log(0.))
40 {
41 cout << "---------------------------------------------------------------------------------" << endl;
42 for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
43 {
44 size_t id = i->first;
45 map<size_t, Node*>::iterator j = i;
46 j++;
47 for ( ; j != currentNodes_.end(); j++)
48 {
49 size_t jd = j->first;
50 double dist = matrix_(id, jd);
51 cout << dist << "\t";
52 }
53 cout << endl;
54 }
55 cout << "---------------------------------------------------------------------------------" << endl;
56
57 throw Exception("Unexpected error: no minimum found in the distance matrix.");
58 }
59
60 return bestPair;
61}
62vector<double> HierarchicalClustering::computeBranchLengthsForPair(const vector<size_t>& pair)
63{
64 vector<double> d(2);
65 double dist = matrix_(pair[0], pair[1]) / 2.;
66 d[0] = dist - dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[0]])->getInfos().length;
67 d[1] = dist - dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[1]])->getInfos().length;
68 return d;
69}
70
71double HierarchicalClustering::computeDistancesFromPair(const vector<size_t>& pair, const vector<double>& branchLengths, size_t pos)
72{
73 double w1, w2, w3, w4;
74 if (method_ == "Single")
75 {
76 w1 = .5;
77 w2 = .5;
78 w3 = 0.;
79 w4 = -.5;
80 }
81 else if (method_ == "Complete")
82 {
83 w1 = .5;
84 w2 = .5;
85 w3 = 0.;
86 w4 = .5;
87 }
88 else if (method_ == "Median")
89 {
90 w1 = .5;
91 w2 = .5;
92 w3 = -0.25;
93 w4 = 0.;
94 }
95 else if (method_ == "Average")
96 {
97 double n1 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[0]])->getInfos().numberOfLeaves);
98 double n2 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[1]])->getInfos().numberOfLeaves);
99 w1 = n1 / (n1 + n2);
100 w2 = n2 / (n1 + n2);
101 w3 = 0.;
102 w4 = 0.;
103 }
104 else if (method_ == "Ward")
105 {
106 double n1 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[0]])->getInfos().numberOfLeaves);
107 double n2 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[1]])->getInfos().numberOfLeaves);
108 double n3 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pos])->getInfos().numberOfLeaves);
109 w1 = (n1 + n3) / (n1 + n2 + n3);
110 w2 = (n2 + n3) / (n1 + n2 + n3);
111 w3 = -n3 / (n1 + n2 + n3);
112 w4 = 0.;
113 }
114 else if (method_ == "Centroid")
115 {
116 double n1 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[0]])->getInfos().numberOfLeaves);
117 double n2 = static_cast<double>(dynamic_cast<NodeTemplate<ClusterInfos>*>(currentNodes_[pair[1]])->getInfos().numberOfLeaves);
118 w1 = n1 / (n1 + n2);
119 w2 = n2 / (n1 + n2);
120 w3 = -n1 * n2 / pow(n1 + n2, 2.);
121 w4 = 0.;
122 }
123 else
124 throw Exception("HierarchicalClustering::computeBranchLengthsForPair. unknown method '" + method_ + "'.");
125 double d1 = matrix_(pair[0], pos);
126 double d2 = matrix_(pair[1], pos);
127 double d3 = matrix_(pair[0], pair[1]);
128 return w1 * d1 + w2 * d2 + w3 * d3 + w4 * std::abs(d1 - d2);
129}
130
132{
134 map<size_t, Node*>::iterator it = currentNodes_.begin();
135 size_t i1 = it->first;
136 Node* n1 = it->second;
137 it++;
138 size_t i2 = it->first;
139 Node* n2 = it->second;
140 double d = matrix_(i1, i2) / 2;
141 root->addSon(n1);
142 root->addSon(n2);
143 n1->setDistanceToFather(d - dynamic_cast<NodeTemplate<ClusterInfos>*>(n1)->getInfos().length);
144 n2->setDistanceToFather(d - dynamic_cast<NodeTemplate<ClusterInfos>*>(n2)->getInfos().length);
146}
147
148Node* HierarchicalClustering::getLeafNode(int id, const string& name)
149{
150 ClusterInfos infos;
151 infos.numberOfLeaves = 1;
152 infos.length = 0.;
154 leaf->setInfos(infos);
155 return leaf;
156}
157
159{
160 ClusterInfos infos;
161 infos.numberOfLeaves =
162 dynamic_cast<NodeTemplate<ClusterInfos>*>(son1)->getInfos().numberOfLeaves
163 + dynamic_cast<NodeTemplate<ClusterInfos>*>(son2)->getInfos().numberOfLeaves;
164 infos.length = dynamic_cast<NodeTemplate<ClusterInfos>*>(son1)->getInfos().length + son1->getDistanceToFather();
165 Node* parent = new NodeTemplate<ClusterInfos>(id);
166 dynamic_cast<NodeTemplate<ClusterInfos>*>(parent)->setInfos(infos);
167 parent->addSon(son1);
168 parent->addSon(son2);
169 return parent;
170}
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths.
std::vector< double > computeBranchLengthsForPair(const std::vector< size_t > &pair)
Compute the branch lengths for two nodes to agglomerate.
virtual Node * getLeafNode(int id, const std::string &name)
Get a leaf node.
virtual Node * getParentNode(int id, Node *son1, Node *son2)
Get an inner node.
void finalStep(int idRoot)
Method called when there ar eonly three remaining node to agglomerate, and creates the root node of t...
The NodeTemplate class.
Definition: NodeTemplate.h:39
virtual void setInfos(const NodeInfos &infos)
Set the information to be associated to this node.
Definition: NodeTemplate.h:156
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:386
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:250
The phylogenetic tree class.
Definition: TreeTemplate.h:59
Defines the basic types of data flow nodes.
ExtendedFloat pow(const ExtendedFloat &ef, double exp)
ExtendedFloat abs(const ExtendedFloat &ef)