bpp-phyl3  3.0.0
Newick.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/BppString.h>
6 #include <Bpp/Numeric/Number.h>
9 #include <Bpp/Text/TextTools.h>
10 #include <Bpp/Text/TextTools.h>
11 
12 #include "../Tree/PhyloBranch.h"
13 #include "../Tree/PhyloNode.h"
14 #include "../Tree/PhyloTree.h"
15 #include "../Tree/Tree.h"
16 #include "../Tree/TreeTemplate.h"
17 #include "../Tree/TreeTemplateTools.h"
18 #include "Newick.h"
19 
20 using namespace bpp;
21 
22 // From the STL:
23 #include <iostream>
24 #include <fstream>
25 
26 using namespace std;
27 
28 /******************************************************************************/
29 
30 const string Newick::getFormatName() const { return "Newick"; }
31 
32 /******************************************************************************/
33 
34 const string Newick::getFormatDescription() const
35 {
36  return string("New hampshire parenthesis format. ") +
37  "See http://evolution.genetics.washington.edu/phylip/newicktree.html for more info.";
38 }
39 
40 /**********************************************************/
41 /* INPUT */
42 /**********************************************************/
43 
44 unique_ptr<TreeTemplate<Node>> Newick::readTreeTemplate(istream& in) const
45 {
46  // Checking the existence of specified file
47  if (!in)
48  {
49  throw IOException ("Newick::read: failed to read from stream");
50  }
51 
52  // We concatenate all line in file till we reach the ending semi colon:
53  string temp, description; // Initialization
54  // Main loop : for all file lines
55  while (getline(in, temp, '\n'))
56  {
57  string::size_type index = temp.find(";");
58  if (index != string::npos)
59  {
60  description += temp.substr(0, index + 1);
61  break;
62  }
63  else
64  description += temp;
65  }
66 
67  if (allowComments_)
68  description = TextTools::removeSubstrings(description, '[', ']');
69  if (TextTools::isEmpty(description))
70  throw IOException("Newick::read: no tree was found!");
71  return TreeTemplateTools::parenthesisToTree(description, useBootstrap_, bootstrapPropertyName_, false, verbose_);
72 }
73 
74 /*********************************************************************************/
75 
76 unique_ptr<PhyloTree> Newick::readPhyloTree(istream& in) const
77 {
78  // Checking the existence of specified file
79  if (!in)
80  {
81  throw IOException ("Newick::readPhyloTree: failed to read from stream");
82  }
83 
84  // We concatenate all line in file till we reach the ending semi colon:
85  string temp, description; // Initialization
86  // Main loop : for all file lines
87  while (getline(in, temp, '\n'))
88  {
89  string::size_type index = temp.find(";");
90  if (index != string::npos)
91  {
92  description += temp.substr(0, index + 1);
93  break;
94  }
95  else
96  description += temp;
97  }
98 
99  if (allowComments_)
100  description = TextTools::removeSubstrings(description, '[', ']');
101  if (TextTools::isEmpty(description))
102  throw IOException("Newick::read: no tree was found!");
103  return parenthesisToPhyloTree(description, useBootstrap_, bootstrapPropertyName_, false, verbose_);
104 }
105 
106 /******************************************************************************/
107 
108 void Newick::readTrees(istream& in, vector<unique_ptr<Tree>>& trees) const
109 {
110  // Checking the existence of specified file
111  if (!in)
112  {
113  throw IOException ("Newick::readTrees(vector): failed to read from stream");
114  }
115 
116  // Main loop : for all file lines
117  string temp, description; // Initialization
118  string::size_type index;
119  // We concatenate all line in file till we reach the ending semi colon:
120  while (getline(in, temp, '\n'))
121  {
122  index = temp.find(";");
123  if (index != string::npos)
124  {
125  description += temp.substr(0, index + 1);
126  if (allowComments_)
127  description = TextTools::removeSubstrings(description, '[', ']');
128  trees.push_back(TreeTemplateTools::parenthesisToTree(description, useBootstrap_, bootstrapPropertyName_, false, verbose_));
129  description = temp.substr(index + 1);
130  }
131  else
132  description += temp;
133  }
134  // In case the file is empty, the method will not add any neww tree to the vector.
135 }
136 
137 /******************************************************************************/
138 
139 void Newick::readPhyloTrees(istream& in, vector<unique_ptr<PhyloTree>>& trees) const
140 {
141  // Checking the existence of specified file
142  if (!in)
143  {
144  throw IOException ("Newick::readTrees(vector): failed to read from stream");
145  }
146 
147  // Main loop : for all file lines
148  string temp, description; // Initialization
149  string::size_type index;
150  // We concatenate all line in file till we reach the ending semi colon:
151  while (getline(in, temp, '\n'))
152  {
153  index = temp.find(";");
154  if (index != string::npos)
155  {
156  description += temp.substr(0, index + 1);
157  if (allowComments_)
158  description = TextTools::removeSubstrings(description, '[', ']');
159  trees.push_back(parenthesisToPhyloTree(description, useBootstrap_, bootstrapPropertyName_, false, verbose_));
160  description = temp.substr(index + 1);
161  }
162  else
163  description += temp;
164  }
165  // In case the file is empty, the method will not add any neww tree to the vector.
166 }
167 
168 /***************************************/
169 
170 IOTree::Element Newick::getElement(const string& elt) const
171 {
173  element.length = ""; // default
174  element.annotation = ""; // default
175  element.isLeaf = false; // default
176 
177  size_t colonIndex;
178  bool hasColon = false;
179  for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--)
180  {
181  if (elt[colonIndex] == ':')
182  {
183  hasColon = true;
184  break;
185  }
186  }
187  try
188  {
189  string elt2;
190  if (hasColon)
191  {
192  // this is an element with length:
193  elt2 = elt.substr(0, colonIndex);
194  element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
195  }
196  else
197  {
198  // this is an element without length;
199  elt2 = elt;
200  }
201 
202  string::size_type lastP = elt2.rfind(')');
203  string::size_type firstP = elt2.find('(');
204  if (firstP == string::npos)
205  {
206  // This is a leaf:
207  element.content = elt2;
208  element.isLeaf = true;
209  }
210  else
211  {
212  // This is a node:
213  if (lastP < firstP)
214  throw IOException("Newick::getElement(). Invalid format: bad closing parenthesis in " + elt2);
215  element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
216  string bootstrap = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
217  // cout << "ELEMENT: BOOTSTRAP: " << bootstrap << endl;
218  if (!TextTools::isEmpty(bootstrap))
219  {
220  element.annotation = bootstrap;
221  }
222  }
223  }
224  catch (exception& e)
225  {
226  throw IOException("Bad tree description: " + elt);
227  }
228  return element;
229 }
230 
231 /************************************************************/
232 
233 shared_ptr<PhyloNode> Newick::parenthesisToNode(PhyloTree& tree, shared_ptr<PhyloNode> father, const string& description, unsigned int& nodeCounter, bool bootstrap, const string& propertyName, bool withId, bool verbose) const
234 {
235 // cout << "NODE: " << description << endl;
236  IOTree::Element elt = getElement(description);
237 
238  // New node:
239  std::shared_ptr<PhyloNode> node(new PhyloNode());
240 
241  shared_ptr<PhyloBranch> branch(father ? new PhyloBranch() : 0);
242 
243  if (father)
244  {
245  tree.createNode(father, node, branch);
246 
247  if (!TextTools::isEmpty(elt.length))
248  branch->setLength(TextTools::toDouble(elt.length));
249  }
250  else
251  tree.createNode(node);
252 
253  if (!TextTools::isEmpty(elt.annotation))
254  {
255  if (withId)
256  {
257  auto id = static_cast<PhyloTree::NodeIndex>(TextTools::toInt(elt.annotation));
258  tree.setNodeIndex(node, id);
259  if (branch)
260  tree.setEdgeIndex(branch, id);
261  }
262  else
263  {
264  if (bootstrap)
265  {
266  if (branch)
267  branch->setProperty("bootstrap", Number<double>(TextTools::toDouble(elt.annotation)));
268  // cout << "NODE: BOOTSTRAP: " << * elt.bootstrap << endl;
269  }
270  else
271  {
272  if (branch)
273  branch->setProperty(propertyName, BppString(elt.annotation));
274  }
275  }
276  }
277 
278  NestedStringTokenizer nt(elt.content, "(", ")", ",");
279  vector<string> elements;
280  while (nt.hasMoreToken())
281  {
282  elements.push_back(nt.nextToken());
283  }
284 
285  if (elt.isLeaf)
286  {
287  // This is a leaf:
288  string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
289  if (withId)
290  {
291  StringTokenizer st(name, "_", true, true);
292  ostringstream realName;
293  for (size_t i = 0; i < st.numberOfRemainingTokens() - 1; ++i)
294  {
295  if (i != 0)
296  {
297  realName << "_";
298  }
299  realName << st.getToken(i);
300  }
301  node->setName(realName.str());
302  tree.setNodeIndex(node, static_cast<PhyloTree::NodeIndex>(
304  if (branch)
305  tree.setEdgeIndex(branch, static_cast<PhyloTree::NodeIndex>(
307  }
308  else
309  node->setName(name);
310  }
311  else
312  {
313  // This is a node:
314  for (size_t i = 0; i < elements.size(); i++)
315  {
316  // cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl;
317  parenthesisToNode(tree, node, elements[i], nodeCounter, bootstrap, propertyName, withId, verbose);
318  }
319  }
320 
321  if (!withId)
322  {
323  tree.setNodeIndex(node, nodeCounter);
324  if (branch)
325  tree.setEdgeIndex(branch, nodeCounter);
326  }
327 
328  nodeCounter++;
329  if (verbose)
331  return node;
332 }
333 
334 /******************************************************************************/
335 
336 unique_ptr<PhyloTree> Newick::parenthesisToPhyloTree(const string& description, bool bootstrap, const string& propertyName, bool withId, bool verbose) const
337 {
338  string::size_type semi = description.rfind(';');
339  if (semi == string::npos)
340  throw Exception("Newick::parenthesisToTree(). Bad format: no semi-colon found.");
341  string content = description.substr(0, semi);
342  unsigned int nodeCounter = 0;
343  auto tree = make_unique<PhyloTree>();
344  shared_ptr<PhyloNode> root = parenthesisToNode(*tree, 0, content, nodeCounter, bootstrap, propertyName, withId, verbose);
345  tree->rootAt(root);
346  if (verbose)
347  {
348  (*ApplicationTools::message) << " nodes loaded.";
349  ApplicationTools::message->endLine();
350  }
351 
352  return tree;
353 }
354 
355 /**********************************************************/
356 /* OUTPUT */
357 /**********************************************************/
358 
359 void Newick::write_(const Tree& tree, ostream& out) const
360 {
361  // Checking the existence of specified file, and possibility to open it in write mode
362  if (!out)
363  {
364  throw IOException ("Newick::writeTree: failed to write to stream");
365  }
366  if (useBootstrap_)
367  {
368  out << TreeTools::treeToParenthesis(tree, writeId_);
369  }
370  else
371  {
372  out << TreeTools::treeToParenthesis(tree, false, bootstrapPropertyName_);
373  }
374 }
375 
376 void Newick::write_(const PhyloTree& tree, ostream& out) const
377 {
378  // Checking the existence of specified file, and possibility to open it in write mode
379  if (!out)
380  {
381  throw IOException ("Newick::writeTree: failed to write to stream");
382  }
383  if (useBootstrap_)
384  {
385  out << treeToParenthesis(tree, writeId_);
386  }
387  else
388  {
389  out << treeToParenthesis(tree, false, bootstrapPropertyName_);
390  }
391 }
392 
393 /******************************************************************************/
394 
395 template<class N>
396 void Newick::write_(const TreeTemplate<N>& tree, ostream& out) const
397 {
398  // Checking the existence of specified file, and possibility to open it in write mode
399  if (!out)
400  {
401  throw IOException ("Newick::writeTree: failed to write to stream");
402  }
403  if (useBootstrap_)
404  {
405  out << TreeTemplateTools::treeToParenthesis(tree, writeId_);
406  }
407  else
408  {
409  out << TreeTemplateTools::treeToParenthesis(tree, false, bootstrapPropertyName_);
410  }
411 }
412 
413 
414 /******************************************************************************/
415 
416 void Newick::write_(const vector<const Tree*>& trees, ostream& out) const
417 {
418  // Checking the existence of specified file, and possibility to open it in write mode
419  if (!out)
420  {
421  throw IOException ("Newick::write: failed to write to stream");
422  }
423  for (unsigned int i = 0; i < trees.size(); i++)
424  {
425  if (useBootstrap_)
426  {
427  out << TreeTools::treeToParenthesis(*trees[i], writeId_);
428  }
429  else
430  {
431  out << TreeTools::treeToParenthesis(*trees[i], false, bootstrapPropertyName_);
432  }
433  }
434 }
435 
436 /******************************************************************************/
437 
438 template<class N>
439 void Newick::write_(const vector<TreeTemplate<N>*>& trees, ostream& out) const
440 {
441  // Checking the existence of specified file, and possibility to open it in write mode
442  if (!out)
443  {
444  throw IOException ("Newick::write: failed to write to stream");
445  }
446  for (unsigned int i = 0; i < trees.size(); i++)
447  {
448  if (useBootstrap_)
449  {
450  out << TreeTemplateTools::treeToParenthesis(*trees[i], writeId_);
451  }
452  else
453  {
454  out << TreeTemplateTools::treeToParenthesis(*trees[i], false, bootstrapPropertyName_);
455  }
456  }
457 }
458 
459 /******************************************************************************/
460 
461 void Newick::write_(const vector<const PhyloTree*>& trees, ostream& out) const
462 {
463  // Checking the existence of specified file, and possibility to open it in write mode
464  if (!out)
465  {
466  throw IOException ("Newick::write: failed to write to stream");
467  }
468  for (unsigned int i = 0; i < trees.size(); i++)
469  {
470  if (useBootstrap_)
471  {
472  out << treeToParenthesis(*trees[i], writeId_);
473  }
474  else
475  {
476  out << treeToParenthesis(*trees[i], false, bootstrapPropertyName_);
477  }
478  }
479 }
480 
481 /******************************************************************************/
482 
483 string Newick::nodeToParenthesis(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node, bool writeId) const
484 {
485  ostringstream s;
486  shared_ptr<PhyloBranch> branch = tree.hasFather(node) ? tree.getEdgeToFather(node) : 0;
487 
488  if (tree.getNumberOfSons(node) == 0)
489  {
490  s << node->getName();
491  }
492  else
493  {
494  s << "(";
495 
496  vector<shared_ptr<PhyloNode>> vSons = tree.getSons(node);
497 
498  for (vector<shared_ptr<PhyloNode>>::const_iterator it = vSons.begin(); it != vSons.end(); it++)
499  {
500  if (it != vSons.begin())
501  s << ",";
502 
503  s << nodeToParenthesis(tree, *it);
504  }
505 
506  s << ")";
507  }
508 
509  if (writeId)
510  {
511  if (tree.isLeaf(node))
512  s << "_";
513  s << tree.getNodeIndex(node);
514  }
515  else
516  {
517  if (branch && branch->hasProperty("bootstrap"))
518  s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
519  }
520 
521  if (branch && branch->hasLength())
522  s << ":" << branch->getLength();
523  return s.str();
524 }
525 
526 /******************************************************************************/
527 
528 string Newick::nodeToParenthesis(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node, bool bootstrap, const string& propertyName) const
529 {
530  ostringstream s;
531  shared_ptr<PhyloBranch> branch = tree.hasFather(node) ? tree.getEdgeToFather(node) : 0;
532 
533  if (tree.getNumberOfSons(node) == 0)
534  {
535  s << node->getName();
536  }
537  else
538  {
539  s << "(";
540 
541  vector<shared_ptr<PhyloNode>> vSons = tree.getSons(node);
542 
543  for (vector<shared_ptr<PhyloNode>>::const_iterator it = vSons.begin(); it != vSons.end(); it++)
544  {
545  if (it != vSons.begin())
546  s << ",";
547 
548  s << nodeToParenthesis(tree, *it, bootstrap, propertyName);
549  }
550 
551  s << ")";
552 
553  if (branch)
554  {
555  if (bootstrap)
556  {
557  if (branch->hasProperty("bootstrap"))
558  s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
559  }
560  else
561  {
562  if (node->hasProperty(propertyName))
563  s << *(dynamic_cast<const BppString*>(node->getProperty(propertyName)));
564  }
565  }
566  }
567 
568  if (branch && branch->hasLength())
569  s << ":" << branch->getLength();
570 
571  return s.str();
572 }
573 
574 /******************************************************************************/
575 
576 string Newick::treeToParenthesis(const PhyloTree& tree, bool writeId) const
577 {
578  ostringstream s;
579  s << "(";
580 
581  shared_ptr<PhyloNode> root = tree.getRoot();
582 
583  std::vector<shared_ptr<PhyloNode>> rSons = tree.getSons(root);
584 
585  if (tree.isRooted())
586  {
587  for (size_t i = 0; i < rSons.size(); ++i)
588  {
589  if (i != 0)
590  s << ",";
591  s << nodeToParenthesis(tree, rSons[i], writeId);
592  }
593  }
594  else
595  {
596  s << root->getName();
597 
598  for (size_t i = 0; i < rSons.size(); ++i)
599  {
600  if (i != 0)
601  s << ",";
602  s << nodeToParenthesis(tree, rSons[i], writeId);
603  }
604  }
605 
606  s << ")";
607 
608  const shared_ptr<PhyloBranch> branch = tree.hasFather(root) ? tree.getEdgeToFather(root) : 0;
609 
610  if (branch && branch->hasLength())
611  s << ":" << branch->getLength();
612  s << ";" << endl;
613 
614  return s.str();
615 }
616 
617 /******************************************************************************/
618 
619 string Newick::treeToParenthesis(const PhyloTree& tree, bool bootstrap, const string& propertyName) const
620 {
621  ostringstream s;
622  s << "(";
623 
624  shared_ptr<PhyloNode> root = tree.getRoot();
625 
626  std::vector<shared_ptr<PhyloNode>> rSons = tree.getSons(root);
627 
628  if (tree.isRooted())
629  {
630  for (size_t i = 0; i < rSons.size(); ++i)
631  {
632  if (i != 0)
633  s << ",";
634  s << nodeToParenthesis(tree, rSons[i], bootstrap, propertyName);
635  }
636  }
637  else
638  {
639  s << root->getName();
640 
641  for (size_t i = 0; i < rSons.size(); ++i)
642  {
643  if (i != 0)
644  s << ",";
645  s << nodeToParenthesis(tree, rSons[i], bootstrap, propertyName);
646  }
647  }
648 
649  s << ")";
650 
651  shared_ptr<PhyloBranch> branch = tree.hasFather(root) ? tree.getEdgeToFather(root) : 0;
652 
653  if (branch)
654  {
655  if (bootstrap)
656  {
657  if (branch->hasProperty("bootstrap"))
658  s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
659  }
660  else
661  {
662  if (branch->hasProperty(propertyName))
663  s << *(dynamic_cast<const BppString*>(branch->getProperty(propertyName)));
664  }
665  }
666 
667  s << ";" << endl;
668  return s.str();
669 }
return element
static std::shared_ptr< OutputStream > message
static void displayUnlimitedGauge(size_t iter, const std::string &mes="")
bool isLeaf(const Nref node) const
AssociationGraphObserver< N, E >::NodeIndex NodeIndex
virtual NodeIndex setNodeIndex(const std::shared_ptr< N > nodeObject, NodeIndex index)=0
virtual std::shared_ptr< N > getRoot() const=0
virtual EdgeIndex setEdgeIndex(const std::shared_ptr< E > edgeObject, EdgeIndex index)=0
virtual NodeIndex getNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual void createNode(std::shared_ptr< N > newNodeObject)=0
std::vector< std::shared_ptr< N > > getSons(const std::shared_ptr< N > node) const
bool hasFather(const std::shared_ptr< N > nodeObject) const
std::shared_ptr< E > getEdgeToFather(const std::shared_ptr< N > nodeObject) const
size_t getNumberOfSons(const std::shared_ptr< N > node) const
const std::string & nextToken()
virtual std::unique_ptr< TreeTemplate< Node > > readTreeTemplate(std::istream &in) const=0
virtual void readPhyloTrees(std::istream &in, std::vector< std::unique_ptr< PhyloTree >> &trees) const override=0
std::shared_ptr< PhyloNode > parenthesisToNode(PhyloTree &tree, std::shared_ptr< PhyloNode > father, const std::string &description, unsigned int &nodeCounter, bool bootstrap, const std::string &propertyName, bool withId, bool verbose) const
Definition: Newick.cpp:233
std::unique_ptr< PhyloTree > parenthesisToPhyloTree(const std::string &description, bool bootstrap=false, const std::string &propertyName="", bool withId=false, bool verbose=false) const
Definition: Newick.cpp:336
std::string nodeToParenthesis(const PhyloTree &tree, std::shared_ptr< PhyloNode > node, bool writeId=false) const
Get the Newick description of a subtree.
Definition: Newick.cpp:483
void write_(const Tree &tree, std::ostream &out) const
Definition: Newick.cpp:359
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override=0
IOTree::Element getElement(const std::string &elt) const override
Definition: Newick.cpp:170
const std::string getFormatDescription() const override
Definition: Newick.cpp:34
virtual void readTrees(std::istream &in, std::vector< std::unique_ptr< Tree >> &trees) const override=0
std::string treeToParenthesis(const PhyloTree &tree, bool writeId=false) const
Get the parenthesis description of a tree.
Definition: Newick.cpp:576
const std::string getFormatName() const override
Definition: Newick.cpp:30
size_t numberOfRemainingTokens() const
const std::string & getToken(size_t pos) const
static std::unique_ptr< TreeTemplate< Node > > parenthesisToTree(const std::string &description, bool bootstrap=true, const std::string &propertyName=TreeTools::BOOTSTRAP, bool withId=false, bool verbose=true)
Parse a string in the parenthesis format and convert it to a tree.
static std::string treeToParenthesis(const TreeTemplate< Node > &tree, bool writeId=false)
Get the parenthesis description of a tree.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static std::string treeToParenthesis(const Tree &tree, bool writeId=false)
Get the parenthesis description of a tree.
Definition: TreeTools.cpp:296
Interface for phylogenetic tree objects.
Definition: Tree.h:115
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string removeSubstrings(const std::string &s, char blockBeginning, char blockEnding)
bool isEmpty(const std::string &s)
Defines the basic types of data flow nodes.
std::string annotation
Definition: IoTree.h:36
std::string length
Definition: IoTree.h:35
std::string content
Definition: IoTree.h:34