bpp-phyl3 3.0.0
NexusIoTree.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <Bpp/Io/FileTools.h>
10
11#include "../Tree/PhyloTree.h"
12#include "../Tree/Tree.h"
13#include "../Tree/TreeTemplate.h"
14#include "../Tree/TreeTemplateTools.h"
15#include "Newick.h"
16#include "NexusIoTree.h"
17
18// From bpp-seq:
20
21using namespace bpp;
22
23// From the STL:
24#include <iostream>
25#include <fstream>
26#include <sstream>
27
28using namespace std;
29
30/******************************************************************************/
31
32const string NexusIOTree::getFormatName() const { return "Nexus"; }
33
34/******************************************************************************/
35
37{
38 return string("Nexus format (trees only). ");
39}
40
41/******************************************************************************/
42/* INPUT */
43/******************************************************************************/
44
45unique_ptr<TreeTemplate<Node>> NexusIOTree::readTreeTemplate(istream& in) const
46{
47 vector<unique_ptr<Tree>> trees;
48 readTrees(in, trees);
49 if (trees.size() == 0)
50 throw IOException("NexusIOTree::readTree(). No tree found in file.");
51 unique_ptr<TreeTemplate<Node>> tree(dynamic_cast<TreeTemplate<Node>*>(trees[0].release()));
52 return tree;
53}
54
55/******************************************************************************/
56
57void NexusIOTree::readTrees(istream& in, vector<unique_ptr<Tree>>& trees) const
58{
59 // Checking the existence of specified file
60 if (!in)
61 {
62 throw IOException ("NexusIOTree::readTrees(). Failed to read from stream");
63 }
64
65 // Look for the TREES block:
66 string line = "";
67 while (TextTools::toUpper(line) != "BEGIN TREES;")
68 {
69 if (in.eof())
70 throw Exception("NexusIOTree::readTrees(). No trees block was found.");
72 }
73
74 string cmdName = "", cmdArgs = "";
75 bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
76 if (!cmdFound)
77 throw Exception("NexusIOTree::readTrees(). Missing tree command.");
78 cmdName = TextTools::toUpper(cmdName);
79
80 // Look for the TRANSLATE command:
81 map<string, string> translation;
82 bool hasTranslation = false;
83 if (cmdName == "TRANSLATE")
84 {
85 // Parse translation:
86 StringTokenizer st(cmdArgs, ",");
87 while (st.hasMoreToken())
88 {
90 NestedStringTokenizer nst(tok, "'", "'", " \t");
91 if (nst.numberOfRemainingTokens() != 2)
92 throw Exception("NexusIOTree::readTrees(). Invalid translation description.");
93 string name = nst.nextToken();
94 string tln = nst.nextToken();
95 translation[name] = tln;
96 }
97 hasTranslation = true;
98 cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
99 if (!cmdFound)
100 throw Exception("NexusIOTree::readTrees(). Missing tree command.");
101 else
102 cmdName = TextTools::toUpper(cmdName);
103 }
104
105 // Now parse the trees:
106 while (cmdFound && cmdName != "END")
107 {
108 if (cmdName != "TREE")
109 throw Exception("NexusIOTree::readTrees(). Invalid command found: " + cmdName);
110 string::size_type pos = cmdArgs.find("=");
111 if (pos == string::npos)
112 throw Exception("NexusIOTree::readTrees(). invalid format, should be tree-name=tree-description");
113 string description = cmdArgs.substr(pos + 1);
114 auto tree = TreeTemplateTools::parenthesisToTree(description + ";", true);
115
116 // Now translate leaf names if there is a translation:
117 // (we assume that all trees share the same translation! ===> check!)
118 if (hasTranslation)
119 {
120 vector<Node*> leaves = tree->getLeaves();
121 for (size_t i = 0; i < leaves.size(); i++)
122 {
123 string name = leaves[i]->getName();
124 if (translation.find(name) == translation.end())
125 {
126 throw Exception("NexusIOTree::readTrees(). No translation was given for this leaf: " + name);
127 }
128 leaves[i]->setName(translation[name]);
129 }
130 }
131 trees.push_back(std::move(tree));
132 cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
133 if (cmdFound)
134 cmdName = TextTools::toUpper(cmdName);
135 }
136}
137
138/******************************************************************************/
139
140unique_ptr<PhyloTree> NexusIOTree::readPhyloTree(istream& in) const
141{
142 vector<unique_ptr<PhyloTree>> trees;
143 readPhyloTrees(in, trees);
144 if (trees.size() == 0)
145 throw IOException("NexusIOTree::readPhyloTree(). No tree found in file.");
146 return std::move(trees[0]);
147}
148
149/******************************************************************************/
150
151void NexusIOTree::readPhyloTrees(std::istream& in, std::vector<unique_ptr<PhyloTree>>& trees) const
152{
153 // Checking the existence of specified file
154 if (!in)
155 {
156 throw IOException ("NexusIOTree::readPhyloTrees(). Failed to read from stream");
157 }
158
159 // Look for the TREES block:
160 string line = "";
161 while (TextTools::toUpper(line) != "BEGIN TREES;")
162 {
163 if (in.eof())
164 throw Exception("NexusIOTree::readPhyloTrees(). No trees block was found.");
166 }
167
168 string cmdName = "", cmdArgs = "";
169 bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
170 if (!cmdFound)
171 throw Exception("NexusIOTree::readPhyloTrees(). Missing tree command.");
172 cmdName = TextTools::toUpper(cmdName);
173
174 // Look for the TRANSLATE command:
175 map<string, string> translation;
176 bool hasTranslation = false;
177 if (cmdName == "TRANSLATE")
178 {
179 // Parse translation:
180 StringTokenizer st(cmdArgs, ",");
181 while (st.hasMoreToken())
182 {
184 NestedStringTokenizer nst(tok, "'", "'", " \t");
185 if (nst.numberOfRemainingTokens() != 2)
186 throw Exception("NexusIOTree::readTrees(). Invalid translation description.");
187 string name = nst.nextToken();
188 string tln = nst.nextToken();
189 translation[name] = tln;
190 }
191 hasTranslation = true;
192 cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
193 if (!cmdFound)
194 throw Exception("NexusIOTree::readPhyloTrees(). Missing tree command.");
195 else
196 cmdName = TextTools::toUpper(cmdName);
197 }
198
199 // Now parse the trees:
200 while (cmdFound && cmdName != "END")
201 {
202 if (cmdName != "TREE")
203 throw Exception("NexusIOTree::readTrees(). Invalid command found: " + cmdName);
204 string::size_type pos = cmdArgs.find("=");
205 if (pos == string::npos)
206 throw Exception("NexusIOTree::readTrees(). invalid format, should be tree-name=tree-description");
207 string description = cmdArgs.substr(pos + 1);
208
209 Newick treeReader;
210
211 istringstream ss(description + ";");
212 auto tree = treeReader.readPhyloTree(ss);
213
214 // Now translate leaf names if there is a translation:
215 // (we assume that all trees share the same translation! ===> check!)
216 if (hasTranslation)
217 {
218 vector<shared_ptr<PhyloNode>> leaves = tree->getAllLeaves();
219 for (size_t i = 0; i < leaves.size(); i++)
220 {
221 string name = leaves[i]->getName();
222 if (translation.find(name) == translation.end())
223 {
224 throw Exception("NexusIOTree::readTrees(). No translation was given for this leaf: " + name);
225 }
226 leaves[i]->setName(translation[name]);
227 }
228 }
229 trees.push_back(std::move(tree));
230 cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
231 if (cmdFound)
232 cmdName = TextTools::toUpper(cmdName);
233 }
234}
235
236/******************************************************************************/
237/* OUTPUT */
238/******************************************************************************/
239
240void NexusIOTree::write_(const Tree& tree, ostream& out) const
241{
242 vector<const Tree*> trees;
243 trees.push_back(&const_cast<Tree&>(tree));
244 writeTrees(trees, out);
245}
246
247/******************************************************************************/
248
249void NexusIOTree::write_(const PhyloTree& tree, ostream& out) const
250{
251 vector<const PhyloTree*> trees;
252 trees.push_back(&const_cast<PhyloTree&>(tree));
253 writePhyloTrees(trees, out);
254}
255
256/******************************************************************************/
257
258template<class N>
259void NexusIOTree::write_(const TreeTemplate<N>& tree, ostream& out) const
260{
261 vector<const Tree*> trees;
262 trees.push_back(&const_cast<Tree&>(tree));
263 writeTrees(trees, out);
264}
265
266/******************************************************************************/
267
268void NexusIOTree::write_(const vector<const Tree*>& trees, ostream& out) const
269{
270 // Checking the existence of specified file, and possibility to open it in write mode
271 if (!out)
272 {
273 throw IOException ("NexusIOTree::write: failed to write to stream");
274 }
275
276 out << "#NEXUS" << endl;
277 out << endl;
278 out << "BEGIN TREES;" << endl;
279
280 // First, we retrieve all leaf names from all trees:
281 vector<string> names;
282 for (size_t i = 0; i < trees.size(); i++)
283 {
284 names = VectorTools::vectorUnion(names, trees[i]->getLeavesNames());
285 }
286 // ... and create a translation map:
287 map<string, size_t> translation;
288 size_t code = 0;
289 for (size_t i = 0; i < names.size(); i++)
290 {
291 translation[names[i]] = code++;
292 }
293
294 // Second we translate all leaf names to their corresponding code:
295 vector<Tree*> translatedTrees(trees.size());
296 for (size_t i = 0; i < trees.size(); i++)
297 {
298 vector<int> leavesId = trees[i]->getLeavesId();
299 Tree* tree = dynamic_cast<Tree*>(trees[i]->clone());
300 for (size_t j = 0; j < leavesId.size(); j++)
301 {
302 tree->setNodeName(leavesId[j], TextTools::toString(translation[tree->getNodeName(leavesId[j])]));
303 }
304 translatedTrees[i] = tree;
305 }
306
307 // Third we print the translation command:
308 out << " TRANSLATE";
309 size_t count = 0;
310 for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
311 {
312 out << endl << " " << it->second << "\t" << it->first;
313 count++;
314 if (count < translation.size())
315 out << ",";
316 }
317 out << ";";
318
319 // Finally we print all tree descriptions:
320 for (size_t i = 0; i < trees.size(); i++)
321 {
322 out << endl << " TREE tree" << (i + 1) << " = " << TreeTools::treeToParenthesis(*translatedTrees[i]);
323 }
324 out << "END;" << endl;
325
326 // Clean trees:
327 for (size_t i = 0; i < translatedTrees.size(); i++)
328 {
329 delete translatedTrees[i];
330 }
331}
332
333/******************************************************************************/
334
335void NexusIOTree::write_(const vector<const PhyloTree*>& trees, ostream& out) const
336{
337 // Checking the existence of specified file, and possibility to open
338 // it in write mode
339 if (!out)
340 {
341 throw IOException ("NexusIOTree::write: failed to write to stream");
342 }
343
344 out << "#NEXUS" << endl;
345 out << endl;
346 out << "BEGIN TREES;" << endl;
347
348 // First, we retrieve all leaf names from all trees:
349 vector<string> names;
350 for (size_t i = 0; i < trees.size(); i++)
351 {
352 names = VectorTools::vectorUnion(names, trees[i]->getAllLeavesNames());
353 }
354 // ... and create a translation map:
355 map<string, size_t> translation;
356 size_t code = 0;
357 for (size_t i = 0; i < names.size(); i++)
358 {
359 translation[names[i]] = code++;
360 }
361
362 // Second we translate all leaf names to their corresponding code:
363 vector<PhyloTree*> translatedTrees;
364 for (size_t i = 0; i < trees.size(); i++)
365 {
366 vector<shared_ptr<PhyloNode>> leaves = trees[i]->getAllLeaves();
367
368 PhyloTree* tree = trees[i]->clone();
369
370 for (size_t j = 0; j < leaves.size(); j++)
371 {
372 tree->getNode(trees[i]->getNodeIndex(leaves[j]))->setName(TextTools::toString(translation[leaves[j]->getName()]));
373 }
374 translatedTrees.push_back(tree);
375 }
376
377 // Third we print the translation command:
378 out << " TRANSLATE";
379 size_t count = 0;
380 for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
381 {
382 out << endl << " " << it->second << "\t" << it->first;
383 count++;
384 if (count < translation.size())
385 out << ",";
386 }
387 out << ";";
388
389 Newick treeWriter;
390
391 // Finally we print all tree descriptions:
392 for (size_t i = 0; i < trees.size(); i++)
393 {
394 out << endl << " TREE tree" << (i + 1) << " = ";
395 treeWriter.writePhyloTree(*translatedTrees[i], out);
396 }
397 out << "END;" << endl;
398
399 // Clean trees:
400 for (size_t i = 0; i < translatedTrees.size(); i++)
401 {
402 delete translatedTrees[i];
403 }
404}
405
406/******************************************************************************/
407
408template<class N>
409void NexusIOTree::write_(const vector<TreeTemplate<N>*>& trees, ostream& out) const
410{
411 // Checking the existence of specified file, and possibility to open it in write mode
412 if (!out)
413 {
414 throw IOException ("NexusIOTree::write: failed to write to stream");
415 }
416
417 out << "#NEXUS" << endl;
418 out << endl;
419 out << "BEGIN TREES;" << endl;
420
421 // First, we retrieve all leaf names from all trees:
422 vector<string> names;
423 for (size_t i = 0; i < trees.size(); i++)
424 {
425 names = VectorTools::vectorUnion(names, trees[i]->getLeavesNames());
426 }
427 // ... and create a translation map:
428 map<string, size_t> translation;
429 size_t code = 0;
430 for (size_t i = 0; i < names.size(); i++)
431 {
432 translation[names[i]] = code++;
433 }
434
435 // Second we translate all leaf names to their corresponding code:
436 vector<Tree*> translatedTrees(trees.size());
437 for (size_t i = 0; i < trees.size(); i++)
438 {
439 vector<int> leavesId = trees[i]->getLeavesId();
440 Tree* tree = dynamic_cast<Tree*>(trees[i]->clone());
441 for (size_t j = 0; j < leavesId.size(); j++)
442 {
443 tree->setNodeName(leavesId[j], TextTools::toString(translation[tree->getNodeName(leavesId[j])]));
444 }
445 translatedTrees[i] = tree;
446 }
447
448 // Third we print the translation command:
449 out << " TRANSLATE";
450 size_t count = 0;
451 for (map<string, size_t>::iterator it = translation.begin(); it != translation.end(); it++)
452 {
453 out << endl << " " << it->second << "\t" << it->first;
454 count++;
455 if (count < translation.size())
456 out << ",";
457 }
458 out << ";";
459
460 // Finally we print all tree descriptions:
461 for (size_t i = 0; i < trees.size(); i++)
462 {
463 out << endl << " TREE tree" << (i + 1) << " = " << TreeTemplateTools::treeToParenthesis(*translatedTrees[i]);
464 }
465 out << "END;" << endl;
466
467 // Clean trees:
468 for (size_t i = 0; i < translatedTrees.size(); i++)
469 {
470 delete translatedTrees[i];
471 }
472}
473
474/******************************************************************************/
virtual std::shared_ptr< N > getNode(NodeIndex nodeIndex) const=0
static std::string getNextLine(std::istream &in)
const std::string & nextToken()
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
void writePhyloTree(const PhyloTree &tree, std::ostream &out) const override
Write a tree to a stream.
Definition: Newick.h:157
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override
Read a tree from a stream.
Definition: Newick.cpp:76
void writeTrees(const std::vector< const Tree * > &trees, std::ostream &out) const override
Write trees to a stream.
Definition: NexusIoTree.h:111
std::unique_ptr< PhyloTree > readPhyloTree(std::istream &in) const override
Read a tree from a stream.
const std::string getFormatDescription() const override
Definition: NexusIoTree.cpp:36
void readPhyloTrees(std::istream &in, std::vector< std::unique_ptr< PhyloTree > > &trees) const override
Read trees from a stream.
void readTrees(std::istream &in, std::vector< std::unique_ptr< Tree > > &trees) const override
Read trees from a stream.
Definition: NexusIoTree.cpp:57
void writePhyloTrees(const std::vector< const PhyloTree * > &trees, std::ostream &out) const override
Write trees to a stream.
Definition: NexusIoTree.h:118
void write_(const Tree &tree, std::ostream &out) const
std::unique_ptr< TreeTemplate< Node > > readTreeTemplate(std::istream &in) const override
Definition: NexusIoTree.cpp:45
static bool getNextCommand(std::istream &input, std::string &name, std::string &arguments, bool lineBrk=true)
PhyloTree * clone() const
Definition: PhyloTree.h:43
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
static std::unique_ptr< TreeTemplate< Node > > parenthesisToTree(const std::string &description, bool bootstrap=true, const std::string &propertyName=TreeTools::BOOTSTRAP, bool withId=false, bool verbose=true)
Parse a string in the parenthesis format and convert it to a tree.
static std::string treeToParenthesis(const TreeTemplate< Node > &tree, bool writeId=false)
Get the parenthesis description of a tree.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static std::string treeToParenthesis(const Tree &tree, bool writeId=false)
Get the parenthesis description of a tree.
Definition: TreeTools.cpp:295
Interface for phylogenetic tree objects.
Definition: Tree.h:115
virtual std::string getNodeName(int nodeId) const =0
virtual void setNodeName(int nodeId, const std::string &name)=0
static std::vector< T > vectorUnion(const std::vector< T > &vec1, const std::vector< T > &vec2)
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.