bpp-phyl3 3.0.0
Coala.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6#include <Bpp/Io/FileTools.h>
12#include <Bpp/Text/TextTools.h>
13
14#include "Coala.h"
15
16using namespace bpp;
17
18// From the STL:
19#include <iostream>
20#include <fstream>
21#include <string>
22
23using namespace std;
24
25
26/******************************************************************************/
27
28Coala::Coala(
29 shared_ptr<const ProteicAlphabet> alpha,
31 unsigned int nbAxes,
32 bool param) :
34 AbstractReversibleProteinSubstitutionModel(alpha, model.getStateMap(), "Coala."),
35 CoalaCore(nbAxes),
36 init_(true),
37 nbrOfAxes_(nbAxes),
38 exch_(model.getName()),
39 file_(),
40 param_(param)
41{
42 setNamespace(getName() + ".");
43
44 // Setting the exchangeability matrix
47}
48
49/******************************************************************************/
50
51void Coala::readFromFile(string& file)
52{
53 ifstream in(file.c_str(), ios::in);
54 // Read exchangeability matrix:
55 for (unsigned int i = 1; i < 20; ++i)
56 {
57 string line = FileTools::getNextLine(in);
58 StringTokenizer st(line);
59 for (unsigned int j = 0; j < i; ++j)
60 {
61 double s = TextTools::toDouble(st.nextToken());
62 exchangeability_(i, j) = exchangeability_(j, i) = s;
63 }
64 }
65
66 // Now build diagonal of the exchangeability matrix:
67 for (unsigned int i = 0; i < 20; ++i)
68 {
69 double sum = 0;
70 for (unsigned int j = 0; j < 20; ++j)
71 {
72 if (j != i)
73 sum += exchangeability_(i, j);
74 }
75 exchangeability_(i, i) = -sum;
76 }
77
78 // Closing stream:
79 in.close();
80}
81
82
83/******************************************************************************/
84
86{
87 // Computes the equilibrium frequencies from a set of coordinates along the principal axes of the COA.
88 if (init_)
89 init_ = false;
90 else
91 {
92 // We get the coordinates:
93 vector<double> coord;
94 for (unsigned int i = 0; i < nbrOfAxes_; ++i)
95 {
96 coord.push_back(parameter("AxPos" + TextTools::toString(i)).getValue());
97 }
98
99 // Now, frequencies are computed from the vector of coordinates and the transpose of the principal axes matrix (P_):
100 vector<double> tmpFreqs;
101 tmpFreqs = prodMatrixVector(P_, coord);
102 for (unsigned int i = 0; i < tmpFreqs.size(); ++i)
103 {
104 tmpFreqs[i] = (tmpFreqs[i] + 1) * colWeights_[i];
105 }
106 freq_ = tmpFreqs;
107
108 // Frequencies are not allowed to be lower than 10^-3 or higher than 0.5:
109 bool norm = false;
110 for (unsigned int i = 0; i < 20; ++i)
111 {
112 if (freq_[i] < 0.001)
113 {
114 norm = true;
115 freq_[i] = 0.001;
116 }
117 if (freq_[i] > 0.2)
118 {
119 norm = true;
120 freq_[i] = 0.2;
121 }
122 }
123 if (norm == true)
124 {
125 double s = VectorTools::sum(freq_);
126 for (size_t i = 0; i < 20; i++)
127 {
128 freq_[i] = freq_[i] / s;
129 }
130 }
131 }
132}
133
134/******************************************************************************/
135
137{
140}
141
142/******************************************************************************/
143
144void Coala::setFreqFromData(const SequenceDataInterface& data, double pseudoCount)
145{
146 // Compute the COA from the observed frequencies, add the axis position parameters and update the Markov matrix
147 auto plist = computeCOA(data, pseudoCount, param_);
148 addParameters_(plist);
150}
151
152/******************************************************************************/
void addParameters_(const ParameterList &parameters)
void setNamespace(const std::string &prefix)
const Parameter & parameter(const std::string &name) const override
Specialisation abstract class for reversible protein substitution model.
virtual void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
Vdouble freq_
The vector of equilibrium frequencies.
This class is the core class inherited by the Coala class. COaLA is a branch-heterogeneous amino-acid...
Definition: CoalaCore.h:32
std::vector< double > colWeights_
Definition: CoalaCore.h:38
RowMatrix< double > P_
Definition: CoalaCore.h:36
std::vector< double > prodMatrixVector(RowMatrix< double > &P, std::vector< double > &V)
Definition: CoalaCore.cpp:156
ParameterList computeCOA(const SequenceDataInterface &data, double pseudoCount=0, bool param=true)
Comoute COA from data and return the axis position parameters.
Definition: CoalaCore.cpp:40
void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
Definition: Coala.cpp:136
unsigned int nbrOfAxes_
Definition: Coala.h:60
void readFromFile(std::string &file)
Definition: Coala.cpp:51
void setFreqFromData(const SequenceDataInterface &data, double pseudoCount=0) override
Set equilibrium frequencies equal to the frequencies estimated from the data.
Definition: Coala.cpp:144
bool init_
Definition: Coala.h:59
std::string getName() const override
Get the name of the model.
Definition: Coala.h:76
void computeEquilibriumFrequencies()
Definition: Coala.cpp:85
bool param_
Definition: Coala.h:63
static std::string getNextLine(std::istream &in)
Specialized interface for protein substitution model.
const std::string & nextToken()
virtual const Matrix< double > & exchangeabilityMatrix() const =0
static T sum(const std::vector< T > &v1)
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.