bpp-phyl3 3.0.0
Newick.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <Bpp/BppString.h>
10#include <Bpp/Text/TextTools.h>
11
12#include "../Tree/PhyloBranch.h"
13#include "../Tree/PhyloNode.h"
14#include "../Tree/PhyloTree.h"
15#include "../Tree/Tree.h"
16#include "../Tree/TreeTemplate.h"
17#include "../Tree/TreeTemplateTools.h"
18#include "Newick.h"
19
20using namespace bpp;
21
22// From the STL:
23#include <iostream>
24#include <fstream>
25
26using namespace std;
27
28/******************************************************************************/
29
30const string Newick::getFormatName() const { return "Newick"; }
31
32/******************************************************************************/
33
34const string Newick::getFormatDescription() const
35{
36 return string("New hampshire parenthesis format. ") +
37 "See http://evolution.genetics.washington.edu/phylip/newicktree.html for more info.";
38}
39
40/**********************************************************/
41/* INPUT */
42/**********************************************************/
43
44unique_ptr<TreeTemplate<Node>> Newick::readTreeTemplate(istream& in) const
45{
46 // Checking the existence of specified file
47 if (!in)
48 {
49 throw IOException ("Newick::read: failed to read from stream");
50 }
51
52 // We concatenate all line in file till we reach the ending semi colon:
53 string temp, description; // Initialization
54 // Main loop : for all file lines
55 while (getline(in, temp, '\n'))
56 {
57 string::size_type index = temp.find(";");
58 if (index != string::npos)
59 {
60 description += temp.substr(0, index + 1);
61 break;
62 }
63 else
64 description += temp;
65 }
66
68 description = TextTools::removeSubstrings(description, '[', ']');
69 if (TextTools::isEmpty(description))
70 throw IOException("Newick::read: no tree was found!");
72}
73
74/*********************************************************************************/
75
76unique_ptr<PhyloTree> Newick::readPhyloTree(istream& in) const
77{
78 // Checking the existence of specified file
79 if (!in)
80 {
81 throw IOException ("Newick::readPhyloTree: failed to read from stream");
82 }
83
84 // We concatenate all line in file till we reach the ending semi colon:
85 string temp, description; // Initialization
86 // Main loop : for all file lines
87 while (getline(in, temp, '\n'))
88 {
89 string::size_type index = temp.find(";");
90 if (index != string::npos)
91 {
92 description += temp.substr(0, index + 1);
93 break;
94 }
95 else
96 description += temp;
97 }
98
100 description = TextTools::removeSubstrings(description, '[', ']');
101 if (TextTools::isEmpty(description))
102 throw IOException("Newick::read: no tree was found!");
104}
105
106/******************************************************************************/
107
108void Newick::readTrees(istream& in, vector<unique_ptr<Tree>>& trees) const
109{
110 // Checking the existence of specified file
111 if (!in)
112 {
113 throw IOException ("Newick::readTrees(vector): failed to read from stream");
114 }
115
116 // Main loop : for all file lines
117 string temp, description; // Initialization
118 string::size_type index;
119 // We concatenate all line in file till we reach the ending semi colon:
120 while (getline(in, temp, '\n'))
121 {
122 index = temp.find(";");
123 if (index != string::npos)
124 {
125 description += temp.substr(0, index + 1);
126 if (allowComments_)
127 description = TextTools::removeSubstrings(description, '[', ']');
129 description = temp.substr(index + 1);
130 }
131 else
132 description += temp;
133 }
134 // In case the file is empty, the method will not add any neww tree to the vector.
135}
136
137/******************************************************************************/
138
139void Newick::readPhyloTrees(istream& in, vector<unique_ptr<PhyloTree>>& trees) const
140{
141 // Checking the existence of specified file
142 if (!in)
143 {
144 throw IOException ("Newick::readTrees(vector): failed to read from stream");
145 }
146
147 // Main loop : for all file lines
148 string temp, description; // Initialization
149 string::size_type index;
150 // We concatenate all line in file till we reach the ending semi colon:
151 while (getline(in, temp, '\n'))
152 {
153 index = temp.find(";");
154 if (index != string::npos)
155 {
156 description += temp.substr(0, index + 1);
157 if (allowComments_)
158 description = TextTools::removeSubstrings(description, '[', ']');
159 trees.push_back(parenthesisToPhyloTree(description, useBootstrap_, bootstrapPropertyName_, false, verbose_));
160 description = temp.substr(index + 1);
161 }
162 else
163 description += temp;
164 }
165 // In case the file is empty, the method will not add any neww tree to the vector.
166}
167
168/***************************************/
169
170IOTree::Element Newick::getElement(const string& elt) const
171{
172 IOTree::Element element;
173 element.length = ""; // default
174 element.annotation = ""; // default
175 element.isLeaf = false; // default
176
177 size_t colonIndex;
178 bool hasColon = false;
179 for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--)
180 {
181 if (elt[colonIndex] == ':')
182 {
183 hasColon = true;
184 break;
185 }
186 }
187 try
188 {
189 string elt2;
190 if (hasColon)
191 {
192 // this is an element with length:
193 elt2 = elt.substr(0, colonIndex);
194 element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
195 }
196 else
197 {
198 // this is an element without length;
199 elt2 = elt;
200 }
201
202 string::size_type lastP = elt2.rfind(')');
203 string::size_type firstP = elt2.find('(');
204 if (firstP == string::npos)
205 {
206 // This is a leaf:
207 element.content = elt2;
208 element.isLeaf = true;
209 }
210 else
211 {
212 // This is a node:
213 if (lastP < firstP)
214 throw IOException("Newick::getElement(). Invalid format: bad closing parenthesis in " + elt2);
215 element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
216 string bootstrap = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
217 // cout << "ELEMENT: BOOTSTRAP: " << bootstrap << endl;
218 if (!TextTools::isEmpty(bootstrap))
219 {
220 element.annotation = bootstrap;
221 }
222 }
223 }
224 catch (exception& e)
225 {
226 throw IOException("Bad tree description: " + elt);
227 }
228 return element;
229}
230
231/************************************************************/
232
233shared_ptr<PhyloNode> Newick::parenthesisToNode(PhyloTree& tree, shared_ptr<PhyloNode> father, const string& description, unsigned int& nodeCounter, bool bootstrap, const string& propertyName, bool withId, bool verbose) const
234{
235 // cout << "NODE: " << description << endl;
236 IOTree::Element elt = getElement(description);
237
238 // New node:
239 std::shared_ptr<PhyloNode> node(new PhyloNode());
240
241 shared_ptr<PhyloBranch> branch(father ? new PhyloBranch() : 0);
242
243 if (father)
244 {
245 tree.createNode(father, node, branch);
246
247 if (!TextTools::isEmpty(elt.length))
248 branch->setLength(TextTools::toDouble(elt.length));
249 }
250 else
251 tree.createNode(node);
252
254 {
255 if (withId)
256 {
257 auto id = static_cast<PhyloTree::NodeIndex>(TextTools::toInt(elt.annotation));
258 tree.setNodeIndex(node, id);
259 if (branch)
260 tree.setEdgeIndex(branch, id);
261 }
262 else
263 {
264 if (bootstrap)
265 {
266 if (branch)
267 branch->setProperty("bootstrap", Number<double>(TextTools::toDouble(elt.annotation)));
268 // cout << "NODE: BOOTSTRAP: " << * elt.bootstrap << endl;
269 }
270 else
271 {
272 if (branch)
273 branch->setProperty(propertyName, BppString(elt.annotation));
274 }
275 }
276 }
277
278 NestedStringTokenizer nt(elt.content, "(", ")", ",");
279 vector<string> elements;
280 while (nt.hasMoreToken())
281 {
282 elements.push_back(nt.nextToken());
283 }
284
285 if (elt.isLeaf)
286 {
287 // This is a leaf:
288 string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
289 if (withId)
290 {
291 StringTokenizer st(name, "_", true, true);
292 ostringstream realName;
293 for (size_t i = 0; i < st.numberOfRemainingTokens() - 1; ++i)
294 {
295 if (i != 0)
296 {
297 realName << "_";
298 }
299 realName << st.getToken(i);
300 }
301 node->setName(realName.str());
302 tree.setNodeIndex(node, static_cast<PhyloTree::NodeIndex>(
304 if (branch)
305 tree.setEdgeIndex(branch, static_cast<PhyloTree::NodeIndex>(
307 }
308 else
309 node->setName(name);
310 }
311 else
312 {
313 // This is a node:
314 for (size_t i = 0; i < elements.size(); i++)
315 {
316 // cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl;
317 parenthesisToNode(tree, node, elements[i], nodeCounter, bootstrap, propertyName, withId, verbose);
318 }
319 }
320
321 if (!withId)
322 {
323 tree.setNodeIndex(node, nodeCounter);
324 if (branch)
325 tree.setEdgeIndex(branch, nodeCounter);
326 }
327
328 nodeCounter++;
329 if (verbose)
331 return node;
332}
333
334/******************************************************************************/
335
336unique_ptr<PhyloTree> Newick::parenthesisToPhyloTree(const string& description, bool bootstrap, const string& propertyName, bool withId, bool verbose) const
337{
338 string::size_type semi = description.rfind(';');
339 if (semi == string::npos)
340 throw Exception("Newick::parenthesisToTree(). Bad format: no semi-colon found.");
341 string content = description.substr(0, semi);
342 unsigned int nodeCounter = 0;
343 auto tree = make_unique<PhyloTree>();
344 shared_ptr<PhyloNode> root = parenthesisToNode(*tree, 0, content, nodeCounter, bootstrap, propertyName, withId, verbose);
345 tree->rootAt(root);
346 if (verbose)
347 {
348 (*ApplicationTools::message) << " nodes loaded.";
349 ApplicationTools::message->endLine();
350 }
351
352 return tree;
353}
354
355/**********************************************************/
356/* OUTPUT */
357/**********************************************************/
358
359void Newick::write_(const Tree& tree, ostream& out) const
360{
361 // Checking the existence of specified file, and possibility to open it in write mode
362 if (!out)
363 {
364 throw IOException ("Newick::writeTree: failed to write to stream");
365 }
366 if (useBootstrap_)
367 {
369 }
370 else
371 {
373 }
374}
375
376void Newick::write_(const PhyloTree& tree, ostream& out) const
377{
378 // Checking the existence of specified file, and possibility to open it in write mode
379 if (!out)
380 {
381 throw IOException ("Newick::writeTree: failed to write to stream");
382 }
383 if (useBootstrap_)
384 {
385 out << treeToParenthesis(tree, writeId_);
386 }
387 else
388 {
389 out << treeToParenthesis(tree, false, bootstrapPropertyName_);
390 }
391}
392
393/******************************************************************************/
394
395template<class N>
396void Newick::write_(const TreeTemplate<N>& tree, ostream& out) const
397{
398 // Checking the existence of specified file, and possibility to open it in write mode
399 if (!out)
400 {
401 throw IOException ("Newick::writeTree: failed to write to stream");
402 }
403 if (useBootstrap_)
404 {
406 }
407 else
408 {
410 }
411}
412
413
414/******************************************************************************/
415
416void Newick::write_(const vector<const Tree*>& trees, ostream& out) const
417{
418 // Checking the existence of specified file, and possibility to open it in write mode
419 if (!out)
420 {
421 throw IOException ("Newick::write: failed to write to stream");
422 }
423 for (unsigned int i = 0; i < trees.size(); i++)
424 {
425 if (useBootstrap_)
426 {
427 out << TreeTools::treeToParenthesis(*trees[i], writeId_);
428 }
429 else
430 {
432 }
433 }
434}
435
436/******************************************************************************/
437
438template<class N>
439void Newick::write_(const vector<TreeTemplate<N>*>& trees, ostream& out) const
440{
441 // Checking the existence of specified file, and possibility to open it in write mode
442 if (!out)
443 {
444 throw IOException ("Newick::write: failed to write to stream");
445 }
446 for (unsigned int i = 0; i < trees.size(); i++)
447 {
448 if (useBootstrap_)
449 {
451 }
452 else
453 {
455 }
456 }
457}
458
459/******************************************************************************/
460
461void Newick::write_(const vector<const PhyloTree*>& trees, ostream& out) const
462{
463 // Checking the existence of specified file, and possibility to open it in write mode
464 if (!out)
465 {
466 throw IOException ("Newick::write: failed to write to stream");
467 }
468 for (unsigned int i = 0; i < trees.size(); i++)
469 {
470 if (useBootstrap_)
471 {
472 out << treeToParenthesis(*trees[i], writeId_);
473 }
474 else
475 {
476 out << treeToParenthesis(*trees[i], false, bootstrapPropertyName_);
477 }
478 }
479}
480
481/******************************************************************************/
482
483string Newick::nodeToParenthesis(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node, bool writeId) const
484{
485 ostringstream s;
486 shared_ptr<PhyloBranch> branch = tree.hasFather(node) ? tree.getEdgeToFather(node) : 0;
487
488 if (tree.getNumberOfSons(node) == 0)
489 {
490 s << node->getName();
491 }
492 else
493 {
494 s << "(";
495
496 vector<shared_ptr<PhyloNode>> vSons = tree.getSons(node);
497
498 for (vector<shared_ptr<PhyloNode>>::const_iterator it = vSons.begin(); it != vSons.end(); it++)
499 {
500 if (it != vSons.begin())
501 s << ",";
502
503 s << nodeToParenthesis(tree, *it, writeId);
504 }
505
506 s << ")";
507 }
508
509 if (writeId)
510 {
511 if (tree.isLeaf(node))
512 s << "_";
513 s << tree.getNodeIndex(node);
514 }
515 else
516 {
517 if (branch && branch->hasProperty("bootstrap"))
518 s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
519 }
520
521 if (branch && branch->hasLength())
522 s << ":" << branch->getLength();
523 return s.str();
524}
525
526/******************************************************************************/
527
528string Newick::nodeToParenthesis(const PhyloTree& tree, const std::shared_ptr<PhyloNode> node, bool bootstrap, const string& propertyName) const
529{
530 ostringstream s;
531 shared_ptr<PhyloBranch> branch = tree.hasFather(node) ? tree.getEdgeToFather(node) : 0;
532
533 if (tree.getNumberOfSons(node) == 0)
534 {
535 s << node->getName();
536 }
537 else
538 {
539 s << "(";
540
541 vector<shared_ptr<PhyloNode>> vSons = tree.getSons(node);
542
543 for (vector<shared_ptr<PhyloNode>>::const_iterator it = vSons.begin(); it != vSons.end(); it++)
544 {
545 if (it != vSons.begin())
546 s << ",";
547
548 s << nodeToParenthesis(tree, *it, bootstrap, propertyName);
549 }
550
551 s << ")";
552
553 if (branch)
554 {
555 if (bootstrap)
556 {
557 if (branch->hasProperty("bootstrap"))
558 s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
559 }
560 else
561 {
562 if (node->hasProperty(propertyName))
563 s << *(dynamic_cast<const BppString*>(node->getProperty(propertyName)));
564 }
565 }
566 }
567
568 if (branch && branch->hasLength())
569 s << ":" << branch->getLength();
570
571 return s.str();
572}
573
574/******************************************************************************/
575
576string Newick::treeToParenthesis(const PhyloTree& tree, bool writeId) const
577{
578 ostringstream s;
579 s << "(";
580
581 shared_ptr<PhyloNode> root = tree.getRoot();
582
583 std::vector<shared_ptr<PhyloNode>> rSons = tree.getSons(root);
584
585 if (tree.isRooted())
586 {
587 for (size_t i = 0; i < rSons.size(); ++i)
588 {
589 if (i != 0)
590 s << ",";
591 s << nodeToParenthesis(tree, rSons[i], writeId);
592 }
593 }
594 else
595 {
596 s << root->getName();
597
598 for (size_t i = 0; i < rSons.size(); ++i)
599 {
600 if (i != 0)
601 s << ",";
602 s << nodeToParenthesis(tree, rSons[i], writeId);
603 }
604 }
605
606 s << ")";
607
608 const shared_ptr<PhyloBranch> branch = tree.hasFather(root) ? tree.getEdgeToFather(root) : 0;
609
610 if (branch && branch->hasLength())
611 s << ":" << branch->getLength();
612 s << ";" << endl;
613
614 return s.str();
615}
616
617/******************************************************************************/
618
619string Newick::treeToParenthesis(const PhyloTree& tree, bool bootstrap, const string& propertyName) const
620{
621 ostringstream s;
622 s << "(";
623
624 shared_ptr<PhyloNode> root = tree.getRoot();
625
626 std::vector<shared_ptr<PhyloNode>> rSons = tree.getSons(root);
627
628 if (tree.isRooted())
629 {
630 for (size_t i = 0; i < rSons.size(); ++i)
631 {
632 if (i != 0)
633 s << ",";
634 s << nodeToParenthesis(tree, rSons[i], bootstrap, propertyName);
635 }
636 }
637 else
638 {
639 s << root->getName();
640
641 for (size_t i = 0; i < rSons.size(); ++i)
642 {
643 if (i != 0)
644 s << ",";
645 s << nodeToParenthesis(tree, rSons[i], bootstrap, propertyName);
646 }
647 }
648
649 s << ")";
650
651 shared_ptr<PhyloBranch> branch = tree.hasFather(root) ? tree.getEdgeToFather(root) : 0;
652
653 if (branch)
654 {
655 if (bootstrap)
656 {
657 if (branch->hasProperty("bootstrap"))
658 s << (dynamic_cast<const Number<double>*>(branch->getProperty("bootstrap"))->getValue());
659 }
660 else
661 {
662 if (branch->hasProperty(propertyName))
663 s << *(dynamic_cast<const BppString*>(branch->getProperty(propertyName)));
664 }
665 }
666
667 s << ";" << endl;
668 return s.str();
669}
static std::shared_ptr< OutputStream > message
static void displayUnlimitedGauge(size_t iter, const std::string &mes="")
bool isLeaf(const Nref node) const
AssociationGraphObserver< N, E >::NodeIndex NodeIndex
virtual NodeIndex setNodeIndex(const std::shared_ptr< N > nodeObject, NodeIndex index)=0
virtual std::shared_ptr< N > getRoot() const=0
virtual EdgeIndex setEdgeIndex(const std::shared_ptr< E > edgeObject, EdgeIndex index)=0
virtual NodeIndex getNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual void createNode(std::shared_ptr< N > newNodeObject)=0
std::vector< std::shared_ptr< N > > getSons(const std::shared_ptr< N > node) const
bool hasFather(const std::shared_ptr< N > nodeObject) const
std::shared_ptr< E > getEdgeToFather(const std::shared_ptr< N > nodeObject) const
size_t getNumberOfSons(const std::shared_ptr< N > node) const
const std::string & nextToken()
void readPhyloTrees(std::istream &in, std::vector< std::unique_ptr< PhyloTree > > &trees) const override
Read trees from a stream.
Definition: Newick.cpp:139
std::shared_ptr< PhyloNode > parenthesisToNode(PhyloTree &tree, std::shared_ptr< PhyloNode > father, const std::string &description, unsigned int &nodeCounter, bool bootstrap, const std::string &propertyName, bool withId, bool verbose) const
Definition: Newick.cpp:233
std::unique_ptr< PhyloTree > parenthesisToPhyloTree(const std::string &description, bool bootstrap=false, const std::string &propertyName="", bool withId=false, bool verbose=false) const
Definition: Newick.cpp:336
std::string nodeToParenthesis(const PhyloTree &tree, std::shared_ptr< PhyloNode > node, bool writeId=false) const
Get the Newick description of a subtree.
Definition: Newick.cpp:483
void write_(const Tree &tree, std::ostream &out) const
Definition: Newick.cpp:359
IOTree::Element getElement(const std::string &elt) const override
Definition: Newick.cpp:170
const std::string getFormatDescription() const override
Definition: Newick.cpp:34
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override
Read a tree from a stream.
Definition: Newick.cpp:76
std::string bootstrapPropertyName_
Definition: Newick.h:63
std::string treeToParenthesis(const PhyloTree &tree, bool writeId=false) const
Get the parenthesis description of a tree.
Definition: Newick.cpp:576
std::unique_ptr< TreeTemplate< Node > > readTreeTemplate(std::istream &in) const override
Definition: Newick.cpp:44
bool useBootstrap_
Definition: Newick.h:62
bool verbose_
Definition: Newick.h:64
void readTrees(std::istream &in, std::vector< std::unique_ptr< Tree > > &trees) const override
Read trees from a stream.
Definition: Newick.cpp:108
bool writeId_
Definition: Newick.h:61
bool allowComments_
Definition: Newick.h:60
size_t numberOfRemainingTokens() const
const std::string & getToken(size_t pos) const
static std::unique_ptr< TreeTemplate< Node > > parenthesisToTree(const std::string &description, bool bootstrap=true, const std::string &propertyName=TreeTools::BOOTSTRAP, bool withId=false, bool verbose=true)
Parse a string in the parenthesis format and convert it to a tree.
static std::string treeToParenthesis(const TreeTemplate< Node > &tree, bool writeId=false)
Get the parenthesis description of a tree.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static std::string treeToParenthesis(const Tree &tree, bool writeId=false)
Get the parenthesis description of a tree.
Definition: TreeTools.cpp:295
Interface for phylogenetic tree objects.
Definition: Tree.h:115
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string removeSubstrings(const std::string &s, char blockBeginning, char blockEnding)
bool isEmpty(const std::string &s)
Defines the basic types of data flow nodes.
std::string annotation
Definition: IoTree.h:36
std::string length
Definition: IoTree.h:35
std::string content
Definition: IoTree.h:34