bpp-phyl3  3.0.0
IoPairedSiteLikelihoods.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 // From the STL
6 #include <vector>
7 #include <numeric>
8 
9 // From Bio++
10 #include <Bpp/Text/TextTools.h>
11 #include "../Likelihood/PairedSiteLikelihoods.h"
12 
14 
15 using namespace std;
16 using namespace bpp;
17 
18 /*
19  * Read from a stream in Tree-puzzle, phylip-like format
20  */
21 PairedSiteLikelihoods IOTreepuzzlePairedSiteLikelihoods::readPairedSiteLikelihoods(std::istream& is)
22 {
23  // The first line contains the number of models and the number of sites
24  string line;
25  getline(is, line);
26  istringstream iss (line);
27  size_t nmodels, nsites;
28  iss >> nmodels;
29  iss >> nsites;
30 
31  // Then each line contains a model name and site loglikelihoods under this model
32  vector<string> names;
33  vector<vector<double>> loglikelihoods;
34  loglikelihoods.reserve(nmodels);
35 
36  // The exact format is determined upon the first line
37  // Name and likelihoods fields can be delimited by either a tab or two spaces
38  string delim;
39  streampos pos (is.tellg());
40  getline(is, line);
41  if (line.find("\t") != string::npos)
42  delim = "\t";
43  else if (line.find(" ") != string::npos)
44  delim = " ";
45  else
46  throw;
47  is.seekg(pos);
48 
49  // Read the data
50  while (getline(is, line))
51  {
52  size_t delim_pos (line.find(delim));
53  if (delim_pos == string::npos)
54  {
55  ostringstream msg;
56  msg << "IOTreepuzzlePairedSiteLikelihoods::read: Couldn't find delimiter. The beginning of the line was : "
57  << endl << line.substr(0, 100);
58  throw Exception(msg.str());
59  }
60 
61  // The name
62  names.push_back( TextTools::removeSurroundingWhiteSpaces(line.substr(0, delim_pos)) );
63 
64  // The sites
65  loglikelihoods.push_back(vector<double>());
66  loglikelihoods.back().reserve(nsites);
67 
68  istringstream liks_stream ( line.substr(delim_pos) );
69  double currllik;
70  while (liks_stream >> currllik)
71  loglikelihoods.back().push_back(currllik);
72 
73  // Check the number of sites
74  if (loglikelihoods.back().size() != nsites)
75  {
76  ostringstream oss;
77  oss << "IOTreepuzzlePairedSiteLikelihoods::read: Model '" << names.back()
78  << "' does not have the correct number of sites. ("
79  << loglikelihoods.back().size() << ", expected: " << nsites << ")";
80  throw Exception(oss.str());
81  }
82  }
83 
84  // Check the number of models
85  if (loglikelihoods.size() != nmodels)
86  throw Exception("IOTreepuzzlePairedSiteLikelihoods::read: Wrong number of models.");
87 
88  PairedSiteLikelihoods psl (loglikelihoods, names);
89 
90  return psl;
91 }
92 
93 /*
94  * Read from a file in Tree-puzzle, phylip-like format
95  */
96 PairedSiteLikelihoods IOTreepuzzlePairedSiteLikelihoods::readPairedSiteLikelihoods(const std::string& path)
97 {
98  ifstream iF (path.c_str());
99  PairedSiteLikelihoods psl (IOTreepuzzlePairedSiteLikelihoods::readPairedSiteLikelihoods(iF));
100  return psl;
101 }
102 
103 /*
104  * Write to stream in Tree-puzzle, phylip-like format
105  */
106 void IOTreepuzzlePairedSiteLikelihoods::writePairedSiteLikelihoods(const bpp::PairedSiteLikelihoods& psl, ostream& os, const string& delim)
107 {
108  if (psl.getLikelihoods().size() == 0)
109  throw Exception("Writing an empty PairedSiteLikelihoods object to file.");
110 
111  // Header line
112  os << psl.getNumberOfModels() << " " << psl.getNumberOfSites() << endl;
113 
114  // Data lines
115 
116  if (delim == "\t")
117  {
118  // The delimiter is a tab
119  for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
120  {
121  os << psl.getModelNames().at(i) << "\t";
122  for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
123  sitelik != psl.getLikelihoods().at(i).end();
124  ++sitelik)
125  {
126  if (sitelik == psl.getLikelihoods().at(i).end() - 1)
127  os << *sitelik;
128  else
129  os << *sitelik << " ";
130  }
131  os << endl;
132  }
133  }
134 
135  else if (delim == " ")
136  // The delimiter is two spaces
137  {
138  // First we must get the length of the names field
139  vector<size_t> name_sizes;
140  for (vector<string>::const_iterator name = psl.getModelNames().begin();
141  name != psl.getModelNames().end();
142  ++name)
143  {
144  name_sizes.push_back(name->size());
145  }
146  size_t names_field_size = *max_element(name_sizes.begin(), name_sizes.end()) + 2;
147 
148  // Data lines
149  for (size_t i = 0; i < psl.getNumberOfModels(); ++i)
150  {
151  // name field
152  string name (psl.getModelNames().at(i));
153  while (name.size() != names_field_size)
154  name.append(" ");
155  os << name;
156 
157  // site-likelihoods field
158  for (vector<double>::const_iterator sitelik = psl.getLikelihoods().at(i).begin();
159  sitelik != psl.getLikelihoods().at(i).end();
160  ++sitelik)
161  {
162  if (sitelik == psl.getLikelihoods().at(i).end() - 1)
163  os << *sitelik;
164  else
165  os << *sitelik << " ";
166  }
167  os << endl;
168  }
169  }
170 
171  else
172  {
173  ostringstream msg;
174  msg << "IOTreepuzzlePairedSiteLikelihoods::write: Unknown field delimiter "
175  << "\"" << delim << "\".";
176  os << msg.str() << endl;
177  throw Exception(msg.str());
178  }
179 }
180 
181 
182 /*
183  * Write to file in Tree-puzzle, phylip-like format
184  */
185 void IOTreepuzzlePairedSiteLikelihoods::writePairedSiteLikelihoods(const bpp::PairedSiteLikelihoods& psl, const std::string& path, const string& delim)
186 {
187  ofstream oF (path.c_str());
188  IOTreepuzzlePairedSiteLikelihoods::writePairedSiteLikelihoods(psl, oF, delim);
189 }
190 
191 
192 /*
193  * Read from a stream in Phyml format
194  */
195 vector<double> IOPhymlPairedSiteLikelihoods::readPairedSiteLikelihoods(std::istream& is)
196 {
197  vector<double> loglikelihoods;
198  string str;
199 
200  // check the format, with first line
201  getline(is, str);
202  string expected ("Note : P(D|M) is the probability of site D given the model M (i.e., the site likelihood)");
203  if (str.find(expected) == string::npos) // \r\n handling
204  {
205  ostringstream msg;
206  msg << "IOPhymlPairedSiteLikelihoods::read: The first line was expected to be :"
207  << expected << endl
208  << "and was :" << endl
209  << str << endl;
210  throw Exception(msg.str());
211  }
212 
213  // skip header lines
214  for (int i = 0; i < 6; ++i)
215  {
216  getline(is, str);
217  }
218 
219  while (getline(is, str))
220  {
221  // the site likelihood is the second field on the line
222  istringstream ss (str);
223  ss >> str;
224  double lik;
225  ss >> lik;
226  loglikelihoods.push_back(log(lik));
227  }
228 
229  return loglikelihoods;
230 }
231 
232 /*
233  * Read from a file in Phyml format
234  */
235 vector<double> IOPhymlPairedSiteLikelihoods::readPairedSiteLikelihoods(const std::string& path)
236 {
237  ifstream iF (path.c_str());
238  vector<double> loglikelihoods(IOPhymlPairedSiteLikelihoods::readPairedSiteLikelihoods(iF));
239  return loglikelihoods;
240 }
A container for paired-site likelihoods (likelihoods over the same sites for different models,...
size_t getNumberOfModels() const
Get the number of models in the container.
const std::vector< std::string > & getModelNames() const
std::size_t getNumberOfSites() const
const std::vector< std::vector< double > > & getLikelihoods() const
Defines the basic types of data flow nodes.