bpp-phyl3  3.0.0
NexusIoTree.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Io/FileTools.h>
9 #include <Bpp/Text/TextTools.h>
10 
11 #include "../Tree/PhyloTree.h"
12 #include "../Tree/Tree.h"
13 #include "../Tree/TreeTemplate.h"
14 #include "../Tree/TreeTemplateTools.h"
15 #include "Newick.h"
16 #include "NexusIoTree.h"
17 
18 // From bpp-seq:
19 #include <Bpp/Seq/Io/NexusTools.h>
20 
21 using namespace bpp;
22 
23 // From the STL:
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 
28 using namespace std;
29 
30 /******************************************************************************/
31 
32 const string NexusIOTree::getFormatName() const { return "Nexus"; }
33 
34 /******************************************************************************/
35 
37 {
38  return string("Nexus format (trees only). ");
39 }
40 
41 /******************************************************************************/
42 /* INPUT */
43 /******************************************************************************/
44 
45 unique_ptr<TreeTemplate<Node>> NexusIOTree::readTreeTemplate(istream& in) const
46 {
47  vector<unique_ptr<Tree>> trees;
48  readTrees(in, trees);
49  if (trees.size() == 0)
50  throw IOException("NexusIOTree::readTree(). No tree found in file.");
51  unique_ptr<TreeTemplate<Node>> tree(dynamic_cast<TreeTemplate<Node>*>(trees[0].release()));
52  return tree;
53 }
54 
55 /******************************************************************************/
56 
57 void NexusIOTree::readTrees(istream& in, vector<unique_ptr<Tree>>& trees) const
58 {
59  // Checking the existence of specified file
60  if (!in)
61  {
62  throw IOException ("NexusIOTree::readTrees(). Failed to read from stream");
63  }
64 
65  // Look for the TREES block:
66  string line = "";
67  while (TextTools::toUpper(line) != "BEGIN TREES;")
68  {
69  if (in.eof())
70  throw Exception("NexusIOTree::readTrees(). No trees block was found.");
72  }
73 
74  string cmdName = "", cmdArgs = "";
75  bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
76  if (!cmdFound)
77  throw Exception("NexusIOTree::readTrees(). Missing tree command.");
78  cmdName = TextTools::toUpper(cmdName);
79 
80  // Look for the TRANSLATE command:
81  map<string, string> translation;
82  bool hasTranslation = false;
83  if (cmdName == "TRANSLATE")
84  {
85  // Parse translation:
86  StringTokenizer st(cmdArgs, ",");
87  while (st.hasMoreToken())
88  {
90  NestedStringTokenizer nst(tok, "'", "'", " \t");
91  if (nst.numberOfRemainingTokens() != 2)
92  throw Exception("NexusIOTree::readTrees(). Invalid translation description.");
93  string name = nst.nextToken();
94  string tln = nst.nextToken();
95  translation[name] = tln;
96  }
97  hasTranslation = true;
98  cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
99  if (!cmdFound)
100  throw Exception("NexusIOTree::readTrees(). Missing tree command.");
101  else
102  cmdName = TextTools::toUpper(cmdName);
103  }
104 
105  // Now parse the trees:
106  while (cmdFound && cmdName != "END")
107  {
108  if (cmdName != "TREE")
109  throw Exception("NexusIOTree::readTrees(). Invalid command found: " + cmdName);
110  string::size_type pos = cmdArgs.find("=");
111  if (pos == string::npos)
112  throw Exception("NexusIOTree::readTrees(). invalid format, should be tree-name=tree-description");
113  string description = cmdArgs.substr(pos + 1);
114  auto tree = TreeTemplateTools::parenthesisToTree(description + ";", true);
115 
116  // Now translate leaf names if there is a translation:
117  // (we assume that all trees share the same translation! ===> check!)
118  if (hasTranslation)
119  {
120  vector<Node*> leaves = tree->getLeaves();
121  for (size_t i = 0; i < leaves.size(); i++)
122  {
123  string name = leaves[i]->getName();
124  if (translation.find(name) == translation.end())
125  {
126  throw Exception("NexusIOTree::readTrees(). No translation was given for this leaf: " + name);
127  }
128  leaves[i]->setName(translation[name]);
129  }
130  }
131  trees.push_back(std::move(tree));
132  cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
133  if (cmdFound)
134  cmdName = TextTools::toUpper(cmdName);
135  }
136 }
137 
138 /******************************************************************************/
139 
140 unique_ptr<PhyloTree> NexusIOTree::readPhyloTree(istream& in) const
141 {
142  vector<unique_ptr<PhyloTree>> trees;
143  readPhyloTrees(in, trees);
144  if (trees.size() == 0)
145  throw IOException("NexusIOTree::readPhyloTree(). No tree found in file.");
146  return std::move(trees[0]);
147 }
148 
149 /******************************************************************************/
150 
151 void NexusIOTree::readPhyloTrees(std::istream& in, std::vector<unique_ptr<PhyloTree>>& trees) const
152 {
153  // Checking the existence of specified file
154  if (!in)
155  {
156  throw IOException ("NexusIOTree::readPhyloTrees(). Failed to read from stream");
157  }
158 
159  // Look for the TREES block:
160  string line = "";
161  while (TextTools::toUpper(line) != "BEGIN TREES;")
162  {
163  if (in.eof())
164  throw Exception("NexusIOTree::readPhyloTrees(). No trees block was found.");
166  }
167 
168  string cmdName = "", cmdArgs = "";
169  bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
170  if (!cmdFound)
171  throw Exception("NexusIOTree::readPhyloTrees(). Missing tree command.");
172  cmdName = TextTools::toUpper(cmdName);
173 
174  // Look for the TRANSLATE command:
175  map<string, string> translation;
176  bool hasTranslation = false;
177  if (cmdName == "TRANSLATE")
178  {
179  // Parse translation:
180  StringTokenizer st(cmdArgs, ",");
181  while (st.hasMoreToken())
182  {
184  NestedStringTokenizer nst(tok, "'", "'", " \t");
185  if (nst.numberOfRemainingTokens() != 2)
186  throw Exception("NexusIOTree::readTrees(). Invalid translation description.");
187  string name = nst.nextToken();
188  string tln = nst.nextToken();
189  translation[name] = tln;
190  }
191  hasTranslation = true;
192  cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
193  if (!cmdFound)
194  throw Exception("NexusIOTree::readPhyloTrees(). Missing tree command.");
195  else
196  cmdName = TextTools::toUpper(cmdName);
197  }
198 
199  // Now parse the trees:
200  while (cmdFound && cmdName != "END")
201  {
202  if (cmdName != "TREE")
203  throw Exception("NexusIOTree::readTrees(). Invalid command found: " + cmdName);
204  string::size_type pos = cmdArgs.find("=");
205  if (pos == string::npos)
206  throw Exception("NexusIOTree::readTrees(). invalid format, should be tree-name=tree-description");
207  string description = cmdArgs.substr(pos + 1);
208 
209  Newick treeReader;
210 
211  istringstream ss(description + ";");
212  auto tree = treeReader.readPhyloTree(ss);
213 
214  // Now translate leaf names if there is a translation:
215  // (we assume that all trees share the same translation! ===> check!)
216  if (hasTranslation)
217  {
218  vector<shared_ptr<PhyloNode>> leaves = tree->getAllLeaves();
219  for (size_t i = 0; i < leaves.size(); i++)
220  {
221  string name = leaves[i]->getName();
222  if (translation.find(name) == translation.end())
223  {
224  throw Exception("NexusIOTree::readTrees(). No translation was given for this leaf: " + name);
225  }
226  leaves[i]->setName(translation[name]);
227  }
228  }
229  trees.push_back(std::move(tree));
230  cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
231  if (cmdFound)
232  cmdName = TextTools::toUpper(cmdName);
233  }
234 }
235 
236 /******************************************************************************/
237 /* OUTPUT */
238 /******************************************************************************/
239 
240 void NexusIOTree::write_(const Tree& tree, ostream& out) const
241 {
242  vector<const Tree*> trees;
243  trees.push_back(&const_cast<Tree&>(tree));
244  writeTrees(trees, out);
245 }
246 
247 /******************************************************************************/
248 
249 void NexusIOTree::write_(const PhyloTree& tree, ostream& out) const
250 {
251  vector<const PhyloTree*> trees;
252  trees.push_back(&const_cast<PhyloTree&>(tree));
253  writePhyloTrees(trees, out);
254 }
255 
256 /******************************************************************************/
257 
258 template<class N>
259 void NexusIOTree::write_(const TreeTemplate<N>& tree, ostream& out) const
260 {
261  vector<const Tree*> trees;
262  trees.push_back(&const_cast<Tree&>(tree));
263  writeTrees(trees, out);
264 }
265 
266 /******************************************************************************/
267 
268 void NexusIOTree::write_(const vector<const Tree*>& trees, ostream& out) const
269 {
270  // Checking the existence of specified file, and possibility to open it in write mode
271  if (!out)
272  {
273  throw IOException ("NexusIOTree::write: failed to write to stream");
274  }
275 
276  out << "#NEXUS" << endl;
277  out << endl;
278  out << "BEGIN TREES;" << endl;
279 
280  // First, we retrieve all leaf names from all trees:
281  vector<string> names;
282  for (size_t i = 0; i < trees.size(); i++)
283  {
284  names = VectorTools::vectorUnion(names, trees[i]->getLeavesNames());
285  }
286  // ... and create a translation map:
287  map<string, size_t> translation;
288  size_t code = 0;
289  for (size_t i = 0; i < names.size(); i++)
290  {
291  translation[names[i]] = code++;
292  }
293 
294  // Second we translate all leaf names to their corresponding code:
295  vector<Tree*> translatedTrees(trees.size());
296  for (size_t i = 0; i < trees.size(); i++)
297  {
298  vector<int> leavesId = trees[i]->getLeavesId();
299  Tree* tree = dynamic_cast<Tree*>(trees[i]->clone());
300  for (size_t j = 0; j < leavesId.size(); j++)
301  {
302  tree->setNodeName(leavesId[j], TextTools::toString(translation[tree->getNodeName(leavesId[j])]));
303  }
304  translatedTrees[i] = tree;
305  }
306 
307  // Third we print the translation command:
308  out << " TRANSLATE";
309  size_t count = 0;
310  for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
311  {
312  out << endl << " " << it->second << "\t" << it->first;
313  count++;
314  if (count < translation.size())
315  out << ",";
316  }
317  out << ";";
318 
319  // Finally we print all tree descriptions:
320  for (size_t i = 0; i < trees.size(); i++)
321  {
322  out << endl << " TREE tree" << (i + 1) << " = " << TreeTools::treeToParenthesis(*translatedTrees[i]);
323  }
324  out << "END;" << endl;
325 
326  // Clean trees:
327  for (size_t i = 0; i < translatedTrees.size(); i++)
328  {
329  delete translatedTrees[i];
330  }
331 }
332 
333 /******************************************************************************/
334 
335 void NexusIOTree::write_(const vector<const PhyloTree*>& trees, ostream& out) const
336 {
337  // Checking the existence of specified file, and possibility to open
338  // it in write mode
339  if (!out)
340  {
341  throw IOException ("NexusIOTree::write: failed to write to stream");
342  }
343 
344  out << "#NEXUS" << endl;
345  out << endl;
346  out << "BEGIN TREES;" << endl;
347 
348  // First, we retrieve all leaf names from all trees:
349  vector<string> names;
350  for (size_t i = 0; i < trees.size(); i++)
351  {
352  names = VectorTools::vectorUnion(names, trees[i]->getAllLeavesNames());
353  }
354  // ... and create a translation map:
355  map<string, size_t> translation;
356  size_t code = 0;
357  for (size_t i = 0; i < names.size(); i++)
358  {
359  translation[names[i]] = code++;
360  }
361 
362  // Second we translate all leaf names to their corresponding code:
363  vector<PhyloTree*> translatedTrees;
364  for (size_t i = 0; i < trees.size(); i++)
365  {
366  vector<shared_ptr<PhyloNode>> leaves = trees[i]->getAllLeaves();
367 
368  PhyloTree* tree = trees[i]->clone();
369 
370  for (size_t j = 0; j < leaves.size(); j++)
371  {
372  tree->getNode(trees[i]->getNodeIndex(leaves[j]))->setName(TextTools::toString(translation[leaves[j]->getName()]));
373  }
374  translatedTrees.push_back(tree);
375  }
376 
377  // Third we print the translation command:
378  out << " TRANSLATE";
379  size_t count = 0;
380  for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
381  {
382  out << endl << " " << it->second << "\t" << it->first;
383  count++;
384  if (count < translation.size())
385  out << ",";
386  }
387  out << ";";
388 
389  Newick treeWriter;
390 
391  // Finally we print all tree descriptions:
392  for (size_t i = 0; i < trees.size(); i++)
393  {
394  out << endl << " TREE tree" << (i + 1) << " = ";
395  treeWriter.writePhyloTree(*translatedTrees[i], out);
396  }
397  out << "END;" << endl;
398 
399  // Clean trees:
400  for (size_t i = 0; i < translatedTrees.size(); i++)
401  {
402  delete translatedTrees[i];
403  }
404 }
405 
406 /******************************************************************************/
407 
408 template<class N>
409 void NexusIOTree::write_(const vector<TreeTemplate<N>*>& trees, ostream& out) const
410 {
411  // Checking the existence of specified file, and possibility to open it in write mode
412  if (!out)
413  {
414  throw IOException ("NexusIOTree::write: failed to write to stream");
415  }
416 
417  out << "#NEXUS" << endl;
418  out << endl;
419  out << "BEGIN TREES;" << endl;
420 
421  // First, we retrieve all leaf names from all trees:
422  vector<string> names;
423  for (size_t i = 0; i < trees.size(); i++)
424  {
425  names = VectorTools::vectorUnion(names, trees[i]->getLeavesNames());
426  }
427  // ... and create a translation map:
428  map<string, size_t> translation;
429  size_t code = 0;
430  for (size_t i = 0; i < names.size(); i++)
431  {
432  translation[names[i]] = code++;
433  }
434 
435  // Second we translate all leaf names to their corresponding code:
436  vector<Tree*> translatedTrees(trees.size());
437  for (size_t i = 0; i < trees.size(); i++)
438  {
439  vector<int> leavesId = trees[i]->getLeavesId();
440  Tree* tree = dynamic_cast<Tree*>(trees[i]->clone());
441  for (size_t j = 0; j < leavesId.size(); j++)
442  {
443  tree->setNodeName(leavesId[j], TextTools::toString(translation[tree->getNodeName(leavesId[j])]));
444  }
445  translatedTrees[i] = tree;
446  }
447 
448  // Third we print the translation command:
449  out << " TRANSLATE";
450  size_t count = 0;
451  for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
452  {
453  out << endl << " " << it->second << "\t" << it->first;
454  count++;
455  if (count < translation.size())
456  out << ",";
457  }
458  out << ";";
459 
460  // Finally we print all tree descriptions:
461  for (size_t i = 0; i < trees.size(); i++)
462  {
463  out << endl << " TREE tree" << (i + 1) << " = " << TreeTemplateTools::treeToParenthesis(*translatedTrees[i]);
464  }
465  out << "END;" << endl;
466 
467  // Clean trees:
468  for (size_t i = 0; i < translatedTrees.size(); i++)
469  {
470  delete translatedTrees[i];
471  }
472 }
473 
474 /******************************************************************************/
virtual std::shared_ptr< N > getNode(NodeIndex nodeIndex) const=0
static std::string getNextLine(std::istream &in)
const std::string & nextToken()
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
void writePhyloTree(const PhyloTree &tree, std::ostream &out) const override
Write a tree to a stream.
Definition: Newick.h:157
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override
Read a tree from a stream.
Definition: Newick.cpp:76
virtual std::unique_ptr< TreeTemplate< Node > > readTreeTemplate(std::istream &in) const=0
virtual void readPhyloTrees(std::istream &in, std::vector< std::unique_ptr< PhyloTree >> &trees) const override=0
const std::string getFormatDescription() const override
Definition: NexusIoTree.cpp:36
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override=0
void write_(const Tree &tree, std::ostream &out) const
virtual void readTrees(std::istream &in, std::vector< std::unique_ptr< Tree >> &trees) const override=0
const std::string getFormatName() const override
Definition: NexusIoTree.cpp:32
static bool getNextCommand(std::istream &input, std::string &name, std::string &arguments, bool lineBrk=true)
PhyloTree * clone() const
Definition: PhyloTree.h:43
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
static std::unique_ptr< TreeTemplate< Node > > parenthesisToTree(const std::string &description, bool bootstrap=true, const std::string &propertyName=TreeTools::BOOTSTRAP, bool withId=false, bool verbose=true)
Parse a string in the parenthesis format and convert it to a tree.
static std::string treeToParenthesis(const TreeTemplate< Node > &tree, bool writeId=false)
Get the parenthesis description of a tree.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static std::string treeToParenthesis(const Tree &tree, bool writeId=false)
Get the parenthesis description of a tree.
Definition: TreeTools.cpp:296
Interface for phylogenetic tree objects.
Definition: Tree.h:115
virtual std::string getNodeName(int nodeId) const =0
virtual void setNodeName(int nodeId, const std::string &name)=0
static std::vector< T > vectorUnion(const std::vector< T > &vec1, const std::vector< T > &vec2)
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.