bpp-phyl3  3.0.0
BioNJ.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 
7 #include "../Tree/Tree.h"
8 #include "BioNJ.h"
9 
10 using namespace bpp;
11 
12 // From the STL:
13 #include <cmath>
14 #include <iostream>
15 
16 using namespace std;
17 
18 double BioNJ::computeDistancesFromPair(const vector<size_t>& pair, const vector<double>& branchLengths, size_t pos)
19 {
20  return positiveLengths_ ?
21  std::max(lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]), 0.)
22  : lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]);
23 }
24 
26 {
27  // Initialization:
28  for (size_t i = 0; i < matrix_.size(); i++)
29  {
30  currentNodes_[i] = getLeafNode(static_cast<int>(i), matrix_.getName(i));
31  }
32  int idNextNode = static_cast<int>(matrix_.size());
33  vector<double> newDist(matrix_.size());
34  vector<double> newVar(matrix_.size());
35 
36  // Build tree:
37  while (currentNodes_.size() > (rootTree_ ? 2 : 3))
38  {
39  if (verbose_)
40  ApplicationTools::displayGauge(matrix_.size() - currentNodes_.size(), matrix_.size() - (rootTree_ ? 2 : 3) - 1);
41  vector<size_t> bestPair = getBestPair();
42  vector<double> distances = computeBranchLengthsForPair(bestPair);
43  Node* best1 = currentNodes_[bestPair[0]];
44  Node* best2 = currentNodes_[bestPair[1]];
45  // Distances may be used by getParentNodes (PGMA for instance).
46  best1->setDistanceToFather(distances[0]);
47  best2->setDistanceToFather(distances[1]);
48  Node* parent = getParentNode(idNextNode++, best1, best2);
49  // compute lambda
50  lambda_ = 0;
51  if (variance_(bestPair[0], bestPair[1]) == 0)
52  lambda_ = .5;
53  else
54  {
55  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
56  {
57  size_t id = i->first;
58  if (id != bestPair[0] && id != bestPair[1])
59  lambda_ += (variance_(bestPair[1], id) - variance_(bestPair[0], id));
60  }
61  double div = 2 * static_cast<double>(currentNodes_.size() - 2) * variance_(bestPair[0], bestPair[1]);
62  lambda_ /= div;
63  lambda_ += .5;
64  }
65  if (lambda_ < 0.)
66  lambda_ = 0.;
67  if (lambda_ > 1.)
68  lambda_ = 1.;
69 
70  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
71  {
72  size_t id = i->first;
73  if (id != bestPair[0] && id != bestPair[1])
74  {
75  newDist[id] = computeDistancesFromPair(bestPair, distances, id);
76  newVar[id] = lambda_ * variance_(bestPair[0], id) + (1 - lambda_) * variance_(bestPair[1], id) - lambda_ * (1 - lambda_) * variance_(bestPair[0], bestPair[1]);
77  }
78  else
79  {
80  newDist[id] = 0;
81  }
82  }
83  // Actualize currentNodes_:
84  currentNodes_[bestPair[0]] = parent;
85  currentNodes_.erase(bestPair[1]);
86  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
87  {
88  size_t id = i->first;
89  matrix_( bestPair[0], id) = matrix_(id, bestPair[0]) = newDist[id];
90  variance_(bestPair[0], id) = variance_(id, bestPair[0]) = newVar[id];
91  }
92  }
93  finalStep(idNextNode);
94 }
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths.
Definition: BioNJ.cpp:18
void computeTree()
Compute the tree corresponding to the distance matrix.
Definition: BioNJ.cpp:25
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
Defines the basic types of data flow nodes.