bpp-phyl3 3.0.0
ExtendedNewick.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <Bpp/BppString.h>
10
11#include "../Tree/PhyloBranch.h"
12#include "../Tree/PhyloNode.h"
13#include "../Tree/PhyloDAG.h"
14#include "ExtendedNewick.h"
15
16using namespace bpp;
17
18// From the STL:
19#include <iostream>
20#include <fstream>
21
22using namespace std;
23
24/******************************************************************************/
25
26const string ExtendedNewick::getFormatName() const { return "ExtendedNewick"; }
27
28/******************************************************************************/
29
31{
32 return string("Extended Newick Format. ");
33}
34
35/**********************************************************/
36/* INPUT */
37/**********************************************************/
38
39
40unique_ptr<PhyloDAG> ExtendedNewick::readPhyloDAG(istream& in) const
41{
42 // Checking the existence of specified file
43 if (!in)
44 {
45 throw IOException ("ExtendedNewick::readPhyloDAG: failed to read from stream");
46 }
47
48 // We concatenate all line in file till we reach the ending semi colon:
49 string temp, description; // Initialization
50 // Main loop : for all file lines
51 while (getline(in, temp, '\n'))
52 {
53 string::size_type index = temp.find(";");
54 if (index != string::npos)
55 {
56 description += temp.substr(0, index + 1);
57 break;
58 }
59 else
60 description += temp;
61 }
62
64 description = TextTools::removeSubstrings(description, '[', ']');
65 if (TextTools::isEmpty(description))
66 throw IOException("ExtendedNewick::read: no dag was found!");
67 return parenthesisToPhyloDAG(description, verbose_);
68}
69
70
71/******************************************************************************/
72
73void ExtendedNewick::readPhyloDAGs(istream& in, vector<unique_ptr<PhyloDAG>>& dags) const
74{
75 // Checking the existence of specified file
76 if (!in)
77 {
78 throw IOException ("ExtendedNewick::readPhyloDAGs(vector): failed to read from stream");
79 }
80
81 // Main loop : for all file lines
82 string temp, description; // Initialization
83 string::size_type index;
84 // We concatenate all line in file till we reach the ending semi colon:
85 while (getline(in, temp, '\n'))
86 {
87 index = temp.find(";");
88 if (index != string::npos)
89 {
90 description += temp.substr(0, index + 1);
92 description = TextTools::removeSubstrings(description, '[', ']');
93 dags.push_back(parenthesisToPhyloDAG(description, verbose_));
94 description = temp.substr(index + 1);
95 }
96 else
97 description += temp;
98 }
99 // In case the file is empty, the method will not add any neww dag to the vector.
100}
101
102/***************************************/
103
105{
106 IODAG::Element element;
107 element.length = ""; // default
108 element.annotation = ""; // default
109 element.isLeaf = false; // default
110
111 size_t colonIndex;
112 bool hasColon = false;
113 for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--)
114 {
115 if (elt[colonIndex] == ':')
116 {
117 hasColon = true;
118 break;
119 }
120 }
121 try
122 {
123 string elt2;
124 if (hasColon)
125 {
126 // this is an element with length:
127 elt2 = elt.substr(0, colonIndex);
128 element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
129 }
130 else
131 {
132 // this is an element without length;
133 elt2 = elt;
134 }
135
136 string::size_type lastP = elt2.rfind(')');
137 string::size_type firstP = elt2.find('(');
138 if (firstP == string::npos)
139 {
140 // This is a leaf:
141 element.content = elt2;
142 element.annotation = elt2;
143 element.isLeaf = true;
144 }
145 else
146 {
147 // This is a node:
148 if (lastP < firstP)
149 throw IOException("ExtendedNewick::getElement(). Invalid format: bad closing parenthesis in " + elt2);
150 element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
151 string annot = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
152 if (!TextTools::isEmpty(annot))
153 {
154 element.annotation = annot;
155 }
156 }
157 }
158 catch (exception& e)
159 {
160 throw IOException("Bad dag description: " + elt);
161 }
162 return element;
163}
164
165/************************************************************/
166
167shared_ptr<PhyloNode> ExtendedNewick::parenthesisToNode(PhyloDAG& dag, std::shared_ptr<PhyloNode> father, const std::string& description, unsigned int& nodeCounter, unsigned int& branchCounter, std::map<std::string, std::shared_ptr<PhyloNode>>& mapEvent, bool withId, bool verbose) const
168{
169 // cerr << "NODE: " << description << endl;
170 IODAG::Element elt = getElement(description);
171
172 // Is the node a connecting one?
173
174 string annot = elt.annotation;
175 size_t poshash = annot.find("#");
176
177 shared_ptr<PhyloNode> node;
178
179
180 // Check Event:
181 if (poshash != string::npos)
182 {
183 string evId = annot.substr(poshash + 1);
184 string label = annot.substr(0, poshash);
185
186 if (mapEvent.find(evId) != mapEvent.end())
187 node = mapEvent[evId];
188 else
189 {
190 node = std::make_shared<PhyloNode>(label);
191 if (evId[0] == 'H')
192 {
193 auto event = std::make_shared<NodeEvent>(NodeEvent::hybridizationEvent);
194 node->setProperty("event", *event);
195 }
196 mapEvent[evId] = node;
197 dag.createNode(node);
198 }
199 }
200 else
201 {
202 node = std::make_shared<PhyloNode>(annot);
203 dag.createNode(node);
204 }
205
206 shared_ptr<PhyloBranch> branch(father ? new PhyloBranch() : 0);
207
208 if (father)
209 {
210 dag.link(father, node, branch);
211
212 if (!TextTools::isEmpty(elt.length))
213 branch->setLength(TextTools::toDouble(elt.length));
214 }
215
216
217 if (annot.size() != 0)
218 {
219 if (withId)
220 {
221 auto id = static_cast<PhyloDAG::NodeIndex>(TextTools::toInt(elt.annotation));
222 dag.setNodeIndex(node, id);
223 }
224 }
225
226 NestedStringTokenizer nt(elt.content, "(", ")", ",");
227 vector<string> elements;
228 while (nt.hasMoreToken())
229 {
230 elements.push_back(nt.nextToken());
231 }
232
233 if (elt.isLeaf)
234 {
235 // This is a leaf:
236 string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
237 if (withId)
238 {
239 StringTokenizer st(name, "_", true, true);
240 ostringstream realName;
241 for (size_t i = 0; i < st.numberOfRemainingTokens() - 1; ++i)
242 {
243 if (i != 0)
244 realName << "_";
245
246 realName << st.getToken(i);
247 }
248 node->setName(realName.str());
249 dag.setNodeIndex(node, static_cast<PhyloDAG::NodeIndex>(
251 }
252 else
253 node->setName(name);
254 }
255 else
256 {
257 // This is a node:
258 for (size_t i = 0; i < elements.size(); i++)
259 {
260 parenthesisToNode(dag, node, elements[i], nodeCounter, branchCounter, mapEvent, withId, verbose);
261 }
262 }
263
264 if (!withId)
265 {
266 if (!dag.hasNodeIndex(node))
267 {
268 dag.setNodeIndex(node, nodeCounter);
269 nodeCounter++;
270 }
271
272 if (branch)
273 {
274 dag.setEdgeIndex(branch, branchCounter);
275 branchCounter++;
276 }
277 }
278
279 if (verbose)
281 return node;
282}
283
284/******************************************************************************/
285
286unique_ptr<PhyloDAG> ExtendedNewick::parenthesisToPhyloDAG(const string& description, bool withId, bool verbose) const
287{
288 string::size_type semi = description.rfind(';');
289 if (semi == string::npos)
290 throw Exception("ExtendedNewick::parenthesisToPhyloDAG(). Bad format: no semi-colon found.");
291 string content = description.substr(0, semi);
292 unsigned int nodeCounter = 0;
293 unsigned int branchCounter = 0;
294 map<std::string, shared_ptr<PhyloNode>> mapEvent;
295
296 auto dag = make_unique<PhyloDAG>();
297 shared_ptr<PhyloNode> root = parenthesisToNode(*dag, 0, content, nodeCounter, branchCounter, mapEvent, withId, verbose);
298 dag->rootAt(root);
299 if (verbose)
300 {
301 (*ApplicationTools::message) << " nodes loaded.";
302 ApplicationTools::message->endLine();
303 }
304
305 return dag;
306}
307
308/**********************************************************/
309/* OUTPUT */
310/**********************************************************/
311
312void ExtendedNewick::write_(const PhyloDAG& dag, ostream& out) const
313{
314 // Checking the existence of specified file, and possibility to open it in write mode
315 if (!out)
316 {
317 throw IOException ("ExtendedNewick::writePhyloDAG: failed to write to stream");
318 }
319 out << dagToParenthesis(dag, writeId_);
320}
321
322
323/******************************************************************************/
324
325void ExtendedNewick::write_(const vector<const PhyloDAG*>& dags, ostream& out) const
326{
327 // Checking the existence of specified file, and possibility to open it in write mode
328 if (!out)
329 {
330 throw IOException ("ExtendedNewick::write: failed to write to stream");
331 }
332 for (unsigned int i = 0; i < dags.size(); i++)
333 {
334 out << dagToParenthesis(*dags[i], writeId_);
335 }
336}
337
338/******************************************************************************/
339
340string ExtendedNewick::edgeToParenthesis(const PhyloDAG& dag, const std::shared_ptr<PhyloBranch> edge, std::vector<std::shared_ptr<PhyloNode>>& writtenNodes, bool writeId) const
341{
342 ostringstream s;
343 shared_ptr<PhyloNode> node = dag.getSon(edge);
344
345 if (std::find(writtenNodes.begin(), writtenNodes.end(), node) != writtenNodes.end())
346 {
347 s << node->getName();
348 if (edge->hasLength())
349 s << ":" << edge->getLength();
350 return s.str();
351 }
352
353 if (dag.getNumberOfSons(node) != 0)
354 {
355 s << "(";
356
357 vector<shared_ptr<PhyloBranch>> vEdges = dag.getOutgoingEdges(node);
358
359 for (vector<shared_ptr<PhyloBranch>>::const_iterator it = vEdges.begin(); it != vEdges.end(); it++)
360 {
361 if (it != vEdges.begin())
362 s << ",";
363
364 s << edgeToParenthesis(dag, *it, writtenNodes, writeId);
365 }
366 s << ")";
367 }
368 s << node->getName();
369
370 if (writeId)
371 {
372 if (dag.isLeaf(node))
373 s << "_";
374 s << dag.getNodeIndex(node);
375 }
376
377 if (edge->hasLength())
378 s << ":" << edge->getLength();
379
380 writtenNodes.push_back(node);
381
382 return s.str();
383}
384
385/******************************************************************************/
386
387string ExtendedNewick::dagToParenthesis(const PhyloDAG& dag, bool writeId) const
388{
389 ostringstream s;
390 s << "(";
391
392 shared_ptr<PhyloNode> root = dag.getRoot();
393
394 std::vector<shared_ptr<PhyloNode>> writtenNodes;
395
396 std::vector<shared_ptr<PhyloBranch>> rEdges = dag.getOutgoingEdges(root);
397
398 if (dag.isRooted())
399 {
400 for (size_t i = 0; i < rEdges.size(); ++i)
401 {
402 if (i != 0)
403 s << ",";
404 s << edgeToParenthesis(dag, rEdges[i], writtenNodes, writeId);
405 }
406 }
407 else
408 {
409 s << root->getName();
410
411 for (size_t i = 0; i < rEdges.size(); ++i)
412 {
413 if (i != 0)
414 s << ",";
415 s << edgeToParenthesis(dag, rEdges[i], writtenNodes, writeId);
416 }
417 }
418
419 s << ")";
420
421 s << ";" << endl;
422
423 return s.str();
424}
static std::shared_ptr< OutputStream > message
static void displayUnlimitedGauge(size_t iter, const std::string &mes="")
std::shared_ptr< N > getSon(const std::shared_ptr< E > edge) const
size_t getNumberOfSons(const std::shared_ptr< N > node) const
AssociationGraphObserver< N, E >::NodeIndex NodeIndex
bool isLeaf(const Nref node) const
virtual void link(std::shared_ptr< N > nodeObjectA, std::shared_ptr< N > nodeObjectB, std::shared_ptr< E > edgeObject=00)=0
virtual NodeIndex setNodeIndex(const std::shared_ptr< N > nodeObject, NodeIndex index)=0
virtual std::shared_ptr< N > getRoot() const=0
virtual bool hasNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual EdgeIndex setEdgeIndex(const std::shared_ptr< E > edgeObject, EdgeIndex index)=0
virtual NodeIndex getNodeIndex(const std::shared_ptr< N > nodeObject) const=0
virtual std::vector< std::shared_ptr< E > > getOutgoingEdges(const std::shared_ptr< N > node) const=0
virtual void createNode(std::shared_ptr< N > newNodeObject)=0
std::shared_ptr< PhyloNode > parenthesisToNode(PhyloDAG &dag, std::shared_ptr< PhyloNode > father, const std::string &description, unsigned int &nodeCounter, unsigned int &branchCounter, std::map< std::string, std::shared_ptr< PhyloNode > > &mapEvent, bool withId, bool verbose) const
void write_(const PhyloDAG &tree, std::ostream &out) const
const std::string getFormatDescription() const override
void readPhyloDAGs(std::istream &in, std::vector< std::unique_ptr< PhyloDAG > > &dags) const override
Read dags from a stream.
Element getElement(const std::string &elt) const override
std::string edgeToParenthesis(const PhyloDAG &dag, std::shared_ptr< PhyloBranch > edge, std::vector< std::shared_ptr< PhyloNode > > &writtenNodes, bool writeId=false) const
Get the ExtendedNewick description of a subdag.
std::string dagToParenthesis(const PhyloDAG &dag, bool writeId=false) const
Get the parenthesis description of a tree.
std::unique_ptr< PhyloDAG > readPhyloDAG(std::istream &in) const override
Read a DAG from a stream.
std::unique_ptr< PhyloDAG > parenthesisToPhyloDAG(const std::string &description, bool withId, bool verbose=false) const
const std::string & nextToken()
static const NodeEvent hybridizationEvent
Definition: PhyloNode.h:51
size_t numberOfRemainingTokens() const
const std::string & getToken(size_t pos) const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string removeSubstrings(const std::string &s, char blockBeginning, char blockEnding)
bool isEmpty(const std::string &s)
Defines the basic types of data flow nodes.
std::string length
Definition: IoDAG.h:32
std::string annotation
Definition: IoDAG.h:33
std::string content
Definition: IoDAG.h:31