bpp-phyl3  3.0.0
ExtendedNewick.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/BppString.h>
6 #include <Bpp/Numeric/Number.h>
9 #include <Bpp/Text/TextTools.h>
10 
11 #include "../Tree/PhyloBranch.h"
12 #include "../Tree/PhyloNode.h"
13 #include "../Tree/PhyloDAG.h"
14 #include "ExtendedNewick.h"
15 
16 using namespace bpp;
17 
18 // From the STL:
19 #include <iostream>
20 #include <fstream>
21 
22 using namespace std;
23 
24 /******************************************************************************/
25 
26 const string ExtendedNewick::getFormatName() const { return "ExtendedNewick"; }
27 
28 /******************************************************************************/
29 
31 {
32  return string("Extended Newick Format. ");
33 }
34 
35 /**********************************************************/
36 /* INPUT */
37 /**********************************************************/
38 
39 
40 unique_ptr<PhyloDAG> ExtendedNewick::readPhyloDAG(istream& in) const
41 {
42  // Checking the existence of specified file
43  if (!in)
44  {
45  throw IOException ("ExtendedNewick::readPhyloDAG: failed to read from stream");
46  }
47 
48  // We concatenate all line in file till we reach the ending semi colon:
49  string temp, description; // Initialization
50  // Main loop : for all file lines
51  while (getline(in, temp, '\n'))
52  {
53  string::size_type index = temp.find(";");
54  if (index != string::npos)
55  {
56  description += temp.substr(0, index + 1);
57  break;
58  }
59  else
60  description += temp;
61  }
62 
63  if (allowComments_)
64  description = TextTools::removeSubstrings(description, '[', ']');
65  if (TextTools::isEmpty(description))
66  throw IOException("ExtendedNewick::read: no dag was found!");
67  return parenthesisToPhyloDAG(description, verbose_);
68 }
69 
70 
71 /******************************************************************************/
72 
73 void ExtendedNewick::readPhyloDAGs(istream& in, vector<unique_ptr<PhyloDAG>>& dags) const
74 {
75  // Checking the existence of specified file
76  if (!in)
77  {
78  throw IOException ("ExtendedNewick::readPhyloDAGs(vector): failed to read from stream");
79  }
80 
81  // Main loop : for all file lines
82  string temp, description; // Initialization
83  string::size_type index;
84  // We concatenate all line in file till we reach the ending semi colon:
85  while (getline(in, temp, '\n'))
86  {
87  index = temp.find(";");
88  if (index != string::npos)
89  {
90  description += temp.substr(0, index + 1);
91  if (allowComments_)
92  description = TextTools::removeSubstrings(description, '[', ']');
93  dags.push_back(parenthesisToPhyloDAG(description, verbose_));
94  description = temp.substr(index + 1);
95  }
96  else
97  description += temp;
98  }
99  // In case the file is empty, the method will not add any neww dag to the vector.
100 }
101 
102 /***************************************/
103 
105 {
107  element.length = ""; // default
108  element.annotation = ""; // default
109  element.isLeaf = false; // default
110 
111  size_t colonIndex;
112  bool hasColon = false;
113  for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--)
114  {
115  if (elt[colonIndex] == ':')
116  {
117  hasColon = true;
118  break;
119  }
120  }
121  try
122  {
123  string elt2;
124  if (hasColon)
125  {
126  // this is an element with length:
127  elt2 = elt.substr(0, colonIndex);
128  element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
129  }
130  else
131  {
132  // this is an element without length;
133  elt2 = elt;
134  }
135 
136  string::size_type lastP = elt2.rfind(')');
137  string::size_type firstP = elt2.find('(');
138  if (firstP == string::npos)
139  {
140  // This is a leaf:
141  element.content = elt2;
142  element.isLeaf = true;
143  }
144  else
145  {
146  // This is a node:
147  if (lastP < firstP)
148  throw IOException("ExtendedNewick::getElement(). Invalid format: bad closing parenthesis in " + elt2);
149  element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
150  string annot = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
151  if (!TextTools::isEmpty(annot))
152  {
153  element.annotation = annot;
154  }
155  }
156  }
157  }
158  catch (exception& e)
159  {
160  throw IOException("Bad dag description: " + elt);
161  }
162  return element;
163 }
164 
165 /************************************************************/
166 
167 shared_ptr<PhyloNode> ExtendedNewick::parenthesisToNode(PhyloDAG& dag, std::shared_ptr<PhyloNode> father, const std::string& description, unsigned int& nodeCounter, unsigned int& branchCounter, std::map<std::string, std::shared_ptr<PhyloNode> >& mapEvent, bool withId, bool verbose) const
168 {
169 // cout << "NODE: " << description << endl;
170  IODAG::Element elt = getElement(description);
171 
172  // Is the node a connecting one?
173 
174  string annot = elt.annot;
175  size_t poshash = elf.find("#");
176 
177  shared_ptr<PhyloNode> node;
178  string label;
179 
180  // Check Event:
181  if (poshash != string::pos)
182  {
183  string evId = annot.substr(poshash+1);
184  label = annot.substr(0, poshash);
185 
186  if (mapEvent.find(evId)!=mapEvent.end())
187  node=mapEvent[evId];
188  else
189  {
190  node = std::make_shared<PhyloNode>();
191  mapEvent[evId]=node;
192  dag.createNode(node);
193  }
194  }
195  else
196  label = annot;
197 
198  shared_ptr<PhyloBranch> branch(father ? new PhyloBranch() : 0);
199 
200  if (father)
201  {
202  dag.link(father, node, branch);
203 
204  if (!TextTools::isEmpty(elt.length))
205  branch->setLength(TextTools::toDouble(elt.length));
206  }
207 
208 
209  if (annot.size()!=0)
210  {
211  if (withId)
212  {
213  auto id = static_cast<PhyloDAG::NodeIndex>(TextTools::toInt(elt.annotation));
214  dag.setNodeIndex(node, id);
215  // no direct connection betwenn nodeid & branchid
216  // if (branch)
217  // dag.setEdgeIndex(branch, id);
218  }
219  }
220 
221  NestedStringTokenizer nt(elt.content, "(", ")", ",");
222  vector<string> elements;
223  while (nt.hasMoreToken())
224  {
225  elements.push_back(nt.nextToken());
226  }
227 
228  if (elt.isLeaf)
229  {
230  // This is a leaf:
231  string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
232  if (withId)
233  {
234  StringTokenizer st(name, "_", true, true);
235  ostringstream realName;
236  for (size_t i = 0; i < st.numberOfRemainingTokens() - 1; ++i)
237  {
238  if (i != 0)
239  {
240  realName << "_";
241  }
242  realName << st.getToken(i);
243  }
244  node->setName(realName.str());
245  dag.setNodeIndex(node, static_cast<PhyloDAG::NodeIndex>(
247  if (branch)
248  dag.setEdgeIndex(branch, static_cast<PhyloDAG::NodeIndex>(
250  }
251  else
252  node->setName(name);
253  }
254  else
255  {
256  // This is a node:
257  for (size_t i = 0; i < elements.size(); i++)
258  {
259  // cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl;
260  parenthesisToNode(dag, node, elements[i], nodeCounter, branchCounter, mapEvent, withId, verbose);
261  }
262  }
263 
264  if (!withId)
265  {
266  if (dag.hasIndex(noode))
267  dag.setNodeIndex(node, nodeCounter);
268  if (branch)
269  dag.setEdgeIndex(branch, branchCounter);
270  }
271 
272  nodeCounter++;
273 
274  if (verbose)
276  return node;
277 }
278 
279 /******************************************************************************/
280 
281 unique_ptr<PhyloDAG> ExtendedNewick::parenthesisToPhyloDAG(const string& description, bool withId, bool verbose) const
282 {
283  string::size_type semi = description.rfind(';');
284  if (semi == string::npos)
285  throw Exception("ExtendedNewick::parenthesisToPhyloDAG(). Bad format: no semi-colon found.");
286  string content = description.substr(0, semi);
287  unsigned int nodeCounter = 0;
288  unsigned int branchCounter = 0;
289  map<std::string, shared_ptr<PhyloNode> > mapEvent;
290 
291  auto dag = make_unique<PhyloDAG>();
292  shared_ptr<PhyloNode> root = parenthesisToNode(*dag, 0, content, nodeCounter, branchCounter, mapEvent, withId, verbose);
293  dag->rootAt(root);
294  if (verbose)
295  {
296  (*ApplicationTools::message) << " nodes loaded.";
297  ApplicationTools::message->endLine();
298  }
299 
300  return dag;
301 }
302 
303 /**********************************************************/
304 /* OUTPUT */
305 /**********************************************************/
306 
307 void ExtendedNewick::write_(const PhyloDAG& dag, ostream& out) const
308 {
309  // Checking the existence of specified file, and possibility to open it in write mode
310  if (!out)
311  {
312  throw IOException ("ExtendedNewick::writePhyloDAG: failed to write to stream");
313  }
314  out << dagToParenthesis(dag, writeId_);
315 }
316 
317 
318 /******************************************************************************/
319 
320 void ExtendedNewick::write_(const vector<const PhyloDAG*>& dags, ostream& out) const
321 {
322  // Checking the existence of specified file, and possibility to open it in write mode
323  if (!out)
324  {
325  throw IOException ("ExtendedNewick::write: failed to write to stream");
326  }
327  for (unsigned int i = 0; i < dags.size(); i++)
328  out << dagToParenthesis(*dags[i], writeId_);
329 
330 }
331 
332 /******************************************************************************/
333 
334 string ExtendedNewick::nodeToParenthesis(const PhyloDAG& dag, const std::shared_ptr<PhyloNode> node, bool writeId) const
335 {
336  ostringstream s;
337  // shared_ptr<PhyloBranch> branch = dag.hasFather(node) ? dag.getEdgeToFather(node) : 0;
338 
339  // if (dag.getNumberOfSons(node) == 0)
340  // {
341  // s << node->getName();
342  // }
343  // else
344  // {
345  // s << "(";
346 
347  // vector<shared_ptr<PhyloNode>> vSons = dag.getSons(node);
348 
349  // for (vector<shared_ptr<PhyloNode>>::const_iterator it = vSons.begin(); it != vSons.end(); it++)
350  // {
351  // if (it != vSons.begin())
352  // s << ",";
353 
354  // s << nodeToParenthesis(dag, *it);
355  // }
356 
357  // s << ")";
358  // }
359 
360  // if (writeId)
361  // {
362  // if (dag.isLeaf(node))
363  // s << "_";
364  // s << dag.getNodeIndex(node);
365  // }
366 
367  // if (branch && branch->hasLength())
368  // s << ":" << branch->getLength();
369  return s.str();
370 }
371 
372 /******************************************************************************/
373 
374 string ExtendedNewick::dagToParenthesis(const PhyloDAG& dag, bool writeId) const
375 {
376  ostringstream s;
377  s << "(";
378 
379  shared_ptr<PhyloNode> root = dag.getRoot();
380 
381  std::vector<shared_ptr<PhyloNode>> rSons = dag.getSons(root);
382 
383  if (dag.isRooted())
384  {
385  for (size_t i = 0; i < rSons.size(); ++i)
386  {
387  if (i != 0)
388  s << ",";
389  s << nodeToParenthesis(dag, rSons[i], writeId);
390  }
391  }
392  else
393  {
394  s << root->getName();
395 
396  for (size_t i = 0; i < rSons.size(); ++i)
397  {
398  if (i != 0)
399  s << ",";
400  s << nodeToParenthesis(dag, rSons[i], writeId);
401  }
402  }
403 
404  s << ")";
405 
406  // const shared_ptr<PhyloBranch> branch = dag.hasFather(root) ? dag.getEdgeToFather(root) : 0;
407 
408  // if (branch && branch->hasLength())
409  // s << ":" << branch->getLength();
410  s << ";" << endl;
411 
412  return s.str();
413 }
414 
415 /******************************************************************************/
416 
417 string ExtendedNewick::dagToParenthesis(const PhyloDAG& dag) const
418 {
419  ostringstream s;
420  s << "(";
421 
422  shared_ptr<PhyloNode> root = dag.getRoot();
423 
424  std::vector<shared_ptr<PhyloNode>> rSons = dag.getSons(root);
425 
426  if (dag.isRooted())
427  {
428  for (size_t i = 0; i < rSons.size(); ++i)
429  {
430  if (i != 0)
431  s << ",";
432  s << nodeToParenthesis(dag, rSons[i]);
433  }
434  }
435  else
436  {
437  s << root->getName();
438 
439  for (size_t i = 0; i < rSons.size(); ++i)
440  {
441  if (i != 0)
442  s << ",";
443  s << nodeToParenthesis(dag, rSons[i]);
444  }
445  }
446 
447  s << ")";
448 
449  // shared_ptr<PhyloBranch> branch = dag.hasFather(root) ? dag.getEdgeToFather(root) : 0;
450 
451  s << ";" << endl;
452  return s.str();
453 }
return element
static std::shared_ptr< OutputStream > message
static void displayUnlimitedGauge(size_t iter, const std::string &mes="")
void write_(const PhyloDAG &tree, std::ostream &out) const
const std::string getFormatDescription() const override
Element getElement(const std::string &elt) const override
std::string dagToParenthesis(const PhyloDAG &dag, bool writeId=false) const
Get the parenthesis description of a tree.
std::unique_ptr< PhyloDAG > readPhyloDAG(std::istream &in) const override=0
virtual void readPhyloDAGs(std::istream &in, std::vector< std::unique_ptr< PhyloDAG >> &dags) const override=0
std::shared_ptr< PhyloNode > parenthesisToNode(PhyloDAG &dag, std::shared_ptr< PhyloNode > father, const std::string &description, unsigned int &nodeCounter, unsigned int &branchCounter, map< str::string, shared_ptr< PhyloNode > > &mapEvent, bool withId, bool verbose) const
std::unique_ptr< PhyloDAG > parenthesisToPhyloDAG(const std::string &description, bool withId, bool verbose=false) const
std::string nodeToParenthesis(const PhyloDAG &dag, std::shared_ptr< PhyloNode > node, bool writeId=false) const
Get the ExtendedNewick description of a subdag.
const std::string getFormatName() const override
const std::string & nextToken()
size_t numberOfRemainingTokens() const
const std::string & getToken(size_t pos) const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string removeSubstrings(const std::string &s, char blockBeginning, char blockEnding)
bool isEmpty(const std::string &s)
Defines the basic types of data flow nodes.
std::string length
Definition: IoDAG.h:32
std::string annotation
Definition: IoDAG.h:33
std::string content
Definition: IoDAG.h:31