bpp-phyl3  3.0.0
PhyloTreeTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <Bpp/BppString.h>
8 #include <Bpp/Numeric/Number.h>
12 #include <Bpp/Text/TextTools.h>
13 
14 #include "PhyloTreeTools.h"
15 
16 // From bpp-seq:
17 #include <Bpp/Seq/Alphabet/DNA.h>
19 
20 using namespace bpp;
21 
22 // From the STL:
23 #include <iostream>
24 #include <sstream>
25 
26 using namespace std;
27 
28 /******************************************************************************/
29 
30 const string PhyloTreeTools::BOOTSTRAP = "bootstrap";
31 
32 /******************************************************************************/
33 
34 std::shared_ptr<PhyloTree> PhyloTreeTools::buildFromTreeTemplate(const TreeTemplate<Node>& treetemp)
35 {
36  auto phyloT = std::make_shared<PhyloTree>(true);
37  const Node& root = *treetemp.getRootNode();
38 
39  auto rooti = std::make_shared<PhyloNode>(root.hasName() ? root.getName() : "");
40  phyloT->createNode(rooti);
41  phyloT->setRoot(rooti);
42  phyloT->setNodeIndex(rooti, (uint)root.getId());
43 
44  auto propi = root.getNodePropertyNames();
45  for (const auto& prop:propi)
46  {
47  rooti->setProperty(prop, *root.getNodeProperty(prop));
48  }
49 
50  phyloT->addSubTree(rooti, root);
51 
52  return phyloT;
53 }
54 
55 double PhyloTreeTools::getHeight(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node)
56 {
57  double d = 0;
58 
59  vector<shared_ptr<PhyloBranch>> edges = tree.getOutgoingEdges(node);
60  for (size_t i = 0; i < edges.size(); i++)
61  {
62  double dist = 0;
63  if (edges[i]->hasLength())
64  dist = edges[i]->getLength();
65  else
66  throw PhyloBranchPException("Branch without length.", edges[i].get());
67 
68  double c = getHeight(tree, get<1>(tree.getNodes(edges[i]))) + dist;
69  if (c > d)
70  d = c;
71  }
72 
73  return d;
74 }
75 
76 
77 size_t PhyloTreeTools::initBranchLengthsGrafen(PhyloTree& tree, std::shared_ptr<PhyloNode> node)
78 {
79  vector<shared_ptr<PhyloNode>> sons = tree.getSons(node);
80  vector<size_t> h(sons.size());
81  for (size_t i = 0; i < sons.size(); i++)
82  {
83  h[i] = initBranchLengthsGrafen(tree, sons[i]);
84  }
85  size_t thish = sons.size() == 0 ? 0 : VectorTools::sum<size_t>(h) + sons.size() - 1;
86  for (size_t i = 0; i < sons.size(); i++)
87  {
88  tree.getEdgeToFather(sons[i])->setLength((double)(thish - h[i]));
89  }
90  return thish;
91 }
92 
93 
95 {
96  initBranchLengthsGrafen(tree, tree.getRoot());
97 }
98 
100  PhyloTree& tree,
101  std::shared_ptr<PhyloNode> node,
102  double power,
103  double total,
104  double& height,
105  double& heightRaised)
106 {
107  vector<shared_ptr<PhyloNode>> sons = tree.getSons(node);
108  vector<double> hr(sons.size());
109  height = 0;
110  for (size_t i = 0; i < sons.size(); i++)
111  {
112  shared_ptr<PhyloBranch> branch = tree.getEdgeToFather(sons[i]);
113 
114  if (branch->hasLength())
115  {
116  double h;
117  computeBranchLengthsGrafen(tree, sons[i], power, total, h, hr[i]);
118  double d = h + branch->getLength();
119  if (d > height)
120  height = d;
121  }
122  else
123  throw PhyloBranchPException ("PhyloTreeTools::computeBranchLengthsGrafen. Branch length lacking.", branch.get());
124  }
125  heightRaised = std::pow(height / total, power) * total;
126  for (size_t i = 0; i < sons.size(); i++)
127  {
128  tree.getEdgeToFather(sons[i])->setLength(heightRaised - hr[i]);
129  }
130 }
131 
132 
133 void PhyloTreeTools::computeBranchLengthsGrafen(PhyloTree& tree, double power, bool init)
134 {
135  shared_ptr<PhyloNode> root = tree.getRoot();
136  if (init)
137  {
138  initBranchLengthsGrafen(tree);
139  }
140  // Scale by total height:
141  double totalHeight = getHeight(tree, root);
142  double h, hr;
143  computeBranchLengthsGrafen(tree, root, power, totalHeight, h, hr);
144 }
145 
146 double PhyloTreeTools::convertToClockTree(PhyloTree& tree, std::shared_ptr<PhyloNode> node)
147 {
148  vector<shared_ptr<PhyloNode>> sons = tree.getSons(node);
149 
150  vector<double> h(sons.size());
151  // We compute the mean height:
152  double l = 0;
153  double maxh = -1.;
154  for (size_t i = 0; i < sons.size(); i++)
155  {
156  shared_ptr<PhyloBranch> branch = tree.getEdgeToFather(sons[i]);
157 
158  if (branch->hasLength())
159  {
160  h[i] = convertToClockTree(tree, sons[i]);
161  if (h[i] > maxh)
162  maxh = h[i];
163  l += h[i] + branch->getLength();
164  }
165  else
166  throw PhyloBranchPException ("PhyloTreeTools::convertToClockTree. Branch length lacking.", branch.get());
167  }
168  if (sons.size() > 0)
169  l /= (double)sons.size();
170  if (l < maxh)
171  l = maxh;
172  for (size_t i = 0; i < sons.size(); i++)
173  {
174  tree.getEdgeToFather(sons[i])->setLength(l - h[i]);
175  }
176  return l;
177 }
178 
179 
180 double PhyloTreeTools::convertToClockTree2(PhyloTree& tree, std::shared_ptr<PhyloNode> node)
181 {
182  vector<shared_ptr<PhyloNode>> sons = tree.getSons(node);
183  vector<double> h(sons.size());
184  // We compute the mean height:
185  double l = 0;
186  double maxh = -1.;
187  for (size_t i = 0; i < sons.size(); i++)
188  {
189  shared_ptr<PhyloBranch> branch = tree.getEdgeToFather(sons[i]);
190 
191  if (branch->hasLength())
192  {
193  h[i] = convertToClockTree2(tree, sons[i]);
194  if (h[i] > maxh)
195  maxh = h[i];
196  l += h[i] + branch->getLength();
197  }
198  else
199  throw PhyloBranchPException("PhyloTreeTools::convertToClockTree2. Branch length lacking.", branch.get());
200  }
201  if (sons.size() > 0)
202  l /= (double)sons.size();
203  for (size_t i = 0; i < sons.size(); i++)
204  {
205  tree.scaleTree(sons[i], h[i] > 0 ? l / h[i] : 0);
206  }
207  return l;
208 }
209 
210 
212 {
213  // is the tree rooted?
214  if (!tree.isRooted())
215  throw Exception("The tree has to be rooted on the branch of interest to determine the midpoint position of the root");
216 
217  vector<shared_ptr<PhyloNode>> sons = tree.getSons(tree.getRoot());
218 
219  if (sons.size() > 2)
220  throw Exception("The tree is multifurcated at the root, which is not allowed.");
221 
222  double length = 0.;
223 
224  // Length of the branch containing the root:
225  shared_ptr<PhyloBranch> branch0 = tree.getEdgeToFather(sons[0]);
226  shared_ptr<PhyloBranch> branch1 = tree.getEdgeToFather(sons[1]);
227 
228  length = branch0->getLength() + branch1->getLength();
229 
230  // The fraction of the original branch allowing to split its length and to place the root:
231  double x = bestRootPosition_(tree, sons[0], sons[1], length);
232  // The new branch lengths are then computed:
233  branch0->setLength(length * x);
234  branch1->setLength(length * (1 - x));
235 }
236 
237 
238 double PhyloTreeTools::bestRootPosition_(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node1, const std::shared_ptr<PhyloNode> node2, double length)
239 {
240  double x;
241  Moments_ m1, m2;
242  double A, B; // C;
243  // The variance is expressed as a degree 2 polynomial : variance(x) = A * x * x + B * x + C
244  // The fraction x is then obtained by differentiating this equation.
245  m1 = statFromNode_(tree, node1);
246  m2 = statFromNode_(tree, node2);
247  A = 4 * m1.N * (m2.N * length) * length;
248  B = 4 * length * (m2.N * m1.sum - m1.N * m2.sum - length * m1.N * m2.N);
249 // C = (m1.N + m2.N) * (m1.squaredSum + m2.squaredSum) + m1.N * length * m2.N * length +
250 // 2 * m1.N * length * m2.sum - 2 * m2.N * length * m1.sum -
251 // (m1.sum + m2.sum) * (m1.sum + m2.sum);
252 
253  if (A < 1e-20)
254  x = 0.5;
255  else
256  x = -B / (2 * A);
257  if (x < 0)
258  x = 0;
259  else if (x > 1)
260  x = 1;
261 
262  return x;
263 }
264 
265 
266 PhyloTreeTools::Moments_ PhyloTreeTools::statFromNode_(const PhyloTree& tree, const std::shared_ptr<PhyloNode> root)
267 {
268  // This function recursively calculates both the sum of the branch lengths and the sum of the squared branch lengths down the node whose ID is rootId.
269  // If below a particular node there are N leaves, the branch between this node and its father is taken into account N times in the calculation.
270  Moments_ m;
271  static Moments_ mtmp;
272 
273  if (tree.isLeaf(root))
274  {
275  m.N = 1;
276  m.sum = 0.;
277  m.squaredSum = 0.;
278  }
279  else
280  {
281  vector<shared_ptr<PhyloNode>> sons = tree.getSons(root);
282  for (size_t i = 0; i < sons.size(); i++)
283  {
284  mtmp = statFromNode_(tree, sons[i]);
285  shared_ptr<PhyloBranch> branch = tree.getEdgeToFather(sons[i]);
286 
287  double bLength = branch->getLength();
288  m.N += mtmp.N;
289  m.sum += mtmp.sum + bLength * mtmp.N;
290  m.squaredSum += mtmp.squaredSum + 2 * bLength * mtmp.sum + mtmp.N * bLength * bLength;
291  }
292  }
293 
294  return m;
295 }
bool isLeaf(const Nref node) const
std::pair< Nref, Nref > getNodes(Eref edge) const
virtual std::shared_ptr< N > getRoot() const=0
virtual std::vector< std::shared_ptr< E > > getOutgoingEdges(const std::shared_ptr< N > node) const=0
std::vector< std::shared_ptr< N > > getSons(const std::shared_ptr< N > node) const
std::shared_ptr< E > getEdgeToFather(const std::shared_ptr< N > nodeObject) const
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual Clonable * getNodeProperty(const std::string &name)
Definition: Node.h:502
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual bool hasName() const
Tell is this node has a name.
Definition: Node.h:234
virtual std::vector< std::string > getNodePropertyNames() const
Definition: Node.h:565
General exception thrown when something is wrong with a particular branch.
static Moments_ statFromNode_(const PhyloTree &tree, const std::shared_ptr< PhyloNode > root)
static void constrainedMidPointRooting(PhyloTree &tree)
Determine the mid-point position of the root along the branch that already contains the root....
static void initBranchLengthsGrafen(PhyloTree &tree)
Grafen's method to initialize branch lengths.
static const std::string BOOTSTRAP
Bootstrap tag.
static std::shared_ptr< PhyloTree > buildFromTreeTemplate(const TreeTemplate< Node > &treetemp)
static double convertToClockTree2(PhyloTree &tree, std::shared_ptr< PhyloNode > node)
Modify a tree's branch lengths to make a clock tree, by rescaling subtrees.
static double getHeight(const PhyloTree &tree, const std::shared_ptr< PhyloNode > node)
Get the height of the subtree defined by node 'node', i.e. the maximum distance between leaves and th...
static void computeBranchLengthsGrafen(PhyloTree &tree, double power=1, bool init=true)
Compute branch lengths using Grafen's method.
static double bestRootPosition_(const PhyloTree &tree, const std::shared_ptr< PhyloNode > node1, const std::shared_ptr< PhyloNode > node2, double length)
static double convertToClockTree(PhyloTree &tree, std::shared_ptr< PhyloNode > node)
Modify a tree's branch lengths to make a clock tree, by rebalancing branch lengths.
void scaleTree(double factor)
Multiply all branch lengths by a given factor.
Definition: PhyloTree.cpp:77
The phylogenetic tree class.
Definition: TreeTemplate.h:59
virtual N * getRootNode()
Definition: TreeTemplate.h:389
Defines the basic types of data flow nodes.
ExtendedFloat pow(const ExtendedFloat &ef, double exp)