bpp-phyl3  3.0.0
Coala.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <Bpp/Io/FileTools.h>
10 #include <Bpp/Seq/SequenceTools.h>
12 #include <Bpp/Text/TextTools.h>
13 
14 #include "Coala.h"
15 
16 using namespace bpp;
17 
18 // From the STL:
19 #include <iostream>
20 #include <fstream>
21 #include <string>
22 
23 using namespace std;
24 
25 
26 /******************************************************************************/
27 
29  shared_ptr<const ProteicAlphabet> alpha,
31  unsigned int nbAxes,
32  bool param) :
34  AbstractReversibleProteinSubstitutionModel(alpha, model.getStateMap(), "Coala."),
35  CoalaCore(nbAxes),
36  init_(true),
37  nbrOfAxes_(nbAxes),
38  exch_(model.getName()),
39  file_(),
40  param_(param)
41 {
42  setNamespace(getName() + ".");
43 
44  // Setting the exchangeability matrix
47 }
48 
49 /******************************************************************************/
50 
51 void Coala::readFromFile(string& file)
52 {
53  ifstream in(file.c_str(), ios::in);
54  // Read exchangeability matrix:
55  for (unsigned int i = 1; i < 20; ++i)
56  {
57  string line = FileTools::getNextLine(in);
58  StringTokenizer st(line);
59  for (unsigned int j = 0; j < i; ++j)
60  {
61  double s = TextTools::toDouble(st.nextToken());
62  exchangeability_(i, j) = exchangeability_(j, i) = s;
63  }
64  }
65 
66  // Now build diagonal of the exchangeability matrix:
67  for (unsigned int i = 0; i < 20; ++i)
68  {
69  double sum = 0;
70  for (unsigned int j = 0; j < 20; ++j)
71  {
72  if (j != i)
73  sum += exchangeability_(i, j);
74  }
75  exchangeability_(i, i) = -sum;
76  }
77 
78  // Closing stream:
79  in.close();
80 }
81 
82 
83 /******************************************************************************/
84 
86 {
87  // Computes the equilibrium frequencies from a set of coordinates along the principal axes of the COA.
88  if (init_)
89  init_ = false;
90  else
91  {
92  // We get the coordinates:
93  vector<double> coord;
94  for (unsigned int i = 0; i < nbrOfAxes_; ++i)
95  {
96  coord.push_back(parameter("AxPos" + TextTools::toString(i)).getValue());
97  }
98 
99  // Now, frequencies are computed from the vector of coordinates and the transpose of the principal axes matrix (P_):
100  vector<double> tmpFreqs;
101  tmpFreqs = prodMatrixVector(P_, coord);
102  for (unsigned int i = 0; i < tmpFreqs.size(); ++i)
103  {
104  tmpFreqs[i] = (tmpFreqs[i] + 1) * colWeights_[i];
105  }
106  freq_ = tmpFreqs;
107 
108  // Frequencies are not allowed to be lower than 10^-3 or higher than 0.5:
109  bool norm = false;
110  for (unsigned int i = 0; i < 20; ++i)
111  {
112  if (freq_[i] < 0.001)
113  {
114  norm = true;
115  freq_[i] = 0.001;
116  }
117  if (freq_[i] > 0.2)
118  {
119  norm = true;
120  freq_[i] = 0.2;
121  }
122  }
123  if (norm == true)
124  {
125  double s = VectorTools::sum(freq_);
126  for (size_t i = 0; i < 20; i++)
127  {
128  freq_[i] = freq_[i] / s;
129  }
130  }
131  }
132 }
133 
134 /******************************************************************************/
135 
137 {
140 }
141 
142 /******************************************************************************/
143 
144 void Coala::setFreqFromData(const SequenceDataInterface& data, double pseudoCount)
145 {
146  // Compute the COA from the observed frequencies, add the axis position parameters and update the Markov matrix
147  auto plist = computeCOA(data, pseudoCount, param_);
148  addParameters_(plist);
149  updateMatrices_();
150 }
151 
152 /******************************************************************************/
void addParameters_(const ParameterList &parameters)
void setNamespace(const std::string &prefix)
const Parameter & parameter(const std::string &name) const override
Specialisation abstract class for reversible protein substitution model.
virtual void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
Vdouble freq_
The vector of equilibrium frequencies.
This class is the core class inherited by the Coala class. COaLA is a branch-heterogeneous amino-acid...
Definition: CoalaCore.h:32
std::vector< double > colWeights_
Definition: CoalaCore.h:38
RowMatrix< double > P_
Definition: CoalaCore.h:36
std::vector< double > prodMatrixVector(RowMatrix< double > &P, std::vector< double > &V)
Definition: CoalaCore.cpp:156
ParameterList computeCOA(const SequenceDataInterface &data, double pseudoCount=0, bool param=true)
Comoute COA from data and return the axis position parameters.
Definition: CoalaCore.cpp:40
void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
Definition: Coala.cpp:136
unsigned int nbrOfAxes_
Definition: Coala.h:60
void readFromFile(std::string &file)
Definition: Coala.cpp:51
void setFreqFromData(const SequenceDataInterface &data, double pseudoCount=0) override
Set equilibrium frequencies equal to the frequencies estimated from the data.
Definition: Coala.cpp:144
bool init_
Definition: Coala.h:59
std::string getName() const override
Get the name of the model.
Definition: Coala.h:76
Coala(std::shared_ptr< const ProteicAlphabet > alpha, const ProteinSubstitutionModelInterface &model, unsigned int nbAxes=0, bool param=true)
Definition: Coala.cpp:28
void computeEquilibriumFrequencies()
Definition: Coala.cpp:85
bool param_
Definition: Coala.h:63
static std::string getNextLine(std::istream &in)
Specialized interface for protein substitution model.
const std::string & nextToken()
virtual const Matrix< double > & exchangeabilityMatrix() const =0
static T sum(const std::vector< T > &v1)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.