bpp-phyl3 3.0.0
IoPairedSiteLikelihoods.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5// From the STL
6#include <vector>
7#include <numeric>
8
9// From Bio++
10#include <Bpp/Text/TextTools.h>
11#include "../Likelihood/PairedSiteLikelihoods.h"
12
14
15using namespace std;
16using namespace bpp;
17
18/*
19 * Read from a stream in Tree-puzzle, phylip-like format
20 */
21PairedSiteLikelihoods IOTreepuzzlePairedSiteLikelihoods::readPairedSiteLikelihoods(std::istream& is)
22{
23 // The first line contains the number of models and the number of sites
24 string line;
25 getline(is, line);
26 istringstream iss (line);
27 size_t nmodels, nsites;
28 iss >> nmodels;
29 iss >> nsites;
30
31 // Then each line contains a model name and site loglikelihoods under this model
32 vector<string> names;
33 vector<vector<double>> loglikelihoods;
34 loglikelihoods.reserve(nmodels);
35
36 // The exact format is determined upon the first line
37 // Name and likelihoods fields can be delimited by either a tab or two spaces
38 string delim;
39 streampos pos (is.tellg());
40 getline(is, line);
41 if (line.find("\t") != string::npos)
42 delim = "\t";
43 else if (line.find(" ") != string::npos)
44 delim = " ";
45 else
46 throw;
47 is.seekg(pos);
48
49 // Read the data
50 while (getline(is, line))
51 {
52 size_t delim_pos (line.find(delim));
53 if (delim_pos == string::npos)
54 {
55 ostringstream msg;
56 msg << "IOTreepuzzlePairedSiteLikelihoods::read: Couldn't find delimiter. The beginning of the line was : "
57 << endl << line.substr(0, 100);
58 throw Exception(msg.str());
59 }
60
61 // The name
62 names.push_back( TextTools::removeSurroundingWhiteSpaces(line.substr(0, delim_pos)) );
63
64 // The sites
65 loglikelihoods.push_back(vector<double>());
66 loglikelihoods.back().reserve(nsites);
67
68 istringstream liks_stream ( line.substr(delim_pos) );
69 double currllik;
70 while (liks_stream >> currllik)
71 loglikelihoods.back().push_back(currllik);
72
73 // Check the number of sites
74 if (loglikelihoods.back().size() != nsites)
75 {
76 ostringstream oss;
77 oss << "IOTreepuzzlePairedSiteLikelihoods::read: Model '" << names.back()
78 << "' does not have the correct number of sites. ("
79 << loglikelihoods.back().size() << ", expected: " << nsites << ")";
80 throw Exception(oss.str());
81 }
82 }
83
84 // Check the number of models
85 if (loglikelihoods.size() != nmodels)
86 throw Exception("IOTreepuzzlePairedSiteLikelihoods::read: Wrong number of models.");
87
88 PairedSiteLikelihoods psl (loglikelihoods, names);
89
90 return psl;
91}
92
93/*
94 * Read from a file in Tree-puzzle, phylip-like format
95 */
97{
98 ifstream iF (path.c_str());
100 return psl;
101}
102
103/*
104 * Write to stream in Tree-puzzle, phylip-like format
105 */
107{
108 if (psl.getLikelihoods().size() == 0)
109 throw Exception("Writing an empty PairedSiteLikelihoods object to file.");
110
111 // Header line
112 os << psl.getNumberOfModels() << " " << psl.getNumberOfSites() << endl;
113
114 // Data lines
115
116 if (delim == "\t")
117 {
118 // The delimiter is a tab
119 for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
120 {
121 os << psl.getModelNames().at(i) << "\t";
122 for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
123 sitelik != psl.getLikelihoods().at(i).end();
124 ++sitelik)
125 {
126 if (sitelik == psl.getLikelihoods().at(i).end() - 1)
127 os << *sitelik;
128 else
129 os << *sitelik << " ";
130 }
131 os << endl;
132 }
133 }
134
135 else if (delim == " ")
136 // The delimiter is two spaces
137 {
138 // First we must get the length of the names field
139 vector<size_t> name_sizes;
140 for (vector<string>::const_iterator name = psl.getModelNames().begin();
141 name != psl.getModelNames().end();
142 ++name)
143 {
144 name_sizes.push_back(name->size());
145 }
146 size_t names_field_size = *max_element(name_sizes.begin(), name_sizes.end()) + 2;
147
148 // Data lines
149 for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
150 {
151 // name field
152 string name (psl.getModelNames().at(i));
153 while (name.size() != names_field_size)
154 name.append(" ");
155 os << name;
156
157 // site-likelihoods field
158 for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
159 sitelik != psl.getLikelihoods().at(i).end();
160 ++sitelik)
161 {
162 if (sitelik == psl.getLikelihoods().at(i).end() - 1)
163 os << *sitelik;
164 else
165 os << *sitelik << " ";
166 }
167 os << endl;
168 }
169 }
170
171 else
172 {
173 ostringstream msg;
174 msg << "IOTreepuzzlePairedSiteLikelihoods::write: Unknown field delimiter "
175 << "\"" << delim << "\".";
176 os << msg.str() << endl;
177 throw Exception(msg.str());
178 }
179}
180
181
182/*
183 * Write to file in Tree-puzzle, phylip-like format
184 */
185void IOTreepuzzlePairedSiteLikelihoods::writePairedSiteLikelihoods(const bpp::PairedSiteLikelihoods& psl, const std::string& path, const string& delim)
186{
187 ofstream oF (path.c_str());
189}
190
191
192/*
193 * Read from a stream in Phyml format
194 */
196{
197 vector<double> loglikelihoods;
198 string str;
199
200 // check the format, with first line
201 getline(is, str);
202 string expected ("Note : P(D|M) is the probability of site D given the model M (i.e., the site likelihood)");
203 if (str.find(expected) == string::npos) // \r\n handling
204 {
205 ostringstream msg;
206 msg << "IOPhymlPairedSiteLikelihoods::read: The first line was expected to be :"
207 << expected << endl
208 << "and was :" << endl
209 << str << endl;
210 throw Exception(msg.str());
211 }
212
213 // skip header lines
214 for (int i = 0; i < 6; ++i)
215 {
216 getline(is, str);
217 }
218
219 while (getline(is, str))
220 {
221 // the site likelihood is the second field on the line
222 istringstream ss (str);
223 ss >> str;
224 double lik;
225 ss >> lik;
226 loglikelihoods.push_back(log(lik));
227 }
228
229 return loglikelihoods;
230}
231
232/*
233 * Read from a file in Phyml format
234 */
235vector<double> IOPhymlPairedSiteLikelihoods::readPairedSiteLikelihoods(const std::string& path)
236{
237 ifstream iF (path.c_str());
238 vector<double> loglikelihoods(IOPhymlPairedSiteLikelihoods::readPairedSiteLikelihoods(iF));
239 return loglikelihoods;
240}
static std::vector< double > readPairedSiteLikelihoods(std::istream &is)
Read paired-site likelihoods from a Phyml-formatted stream.
static void writePairedSiteLikelihoods(const PairedSiteLikelihoods &psl, std::ostream &os, const std::string &delim="\t")
Write paired-site likelihoods to a stream.
static PairedSiteLikelihoods readPairedSiteLikelihoods(std::istream &is)
Read paired-site likelihoods from a Treepuzzle/RAxML-formatted stream.
A container for paired-site likelihoods (likelihoods over the same sites for different models,...
size_t getNumberOfModels() const
Get the number of models in the container.
std::size_t getNumberOfSites() const
const std::vector< std::vector< double > > & getLikelihoods() const
const std::vector< std::string > & getModelNames() const
std::string removeSurroundingWhiteSpaces(const std::string &s)
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)