bpp-core3  3.0.0
LowMemoryRescaledHmmLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: LowMemoryRescaledHmmLikelihood.cpp
3 // Authors:
4 // Julien Dutheil
5 // Created: 2009-12-16 10:47:00
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 
43 
44 // from the STL:
45 #include <iostream>
46 #include <algorithm>
47 using namespace bpp;
48 using namespace std;
49 
51  HmmStateAlphabet* hiddenAlphabet,
52  HmmTransitionMatrix* transitionMatrix,
53  HmmEmissionProbabilities* emissionProbabilities,
54  const std::string& prefix,
55  size_t maxSize) :
57  AbstractParametrizable(prefix),
58  hiddenAlphabet_(hiddenAlphabet),
59  transitionMatrix_(transitionMatrix),
60  emissionProbabilities_(emissionProbabilities),
61  likelihood1_(),
62  likelihood2_(),
63  logLik_(),
64  maxSize_(maxSize),
65  breakPoints_(),
66  nbStates_(),
67  nbSites_()
68 {
69  if (!hiddenAlphabet)
70  throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmStateAlphabet.");
71  if (!transitionMatrix)
72  throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmTransitionMatrix.");
73  if (!emissionProbabilities)
74  throw Exception("LowMemoryRescaledHmmLikelihood: null pointer passed for HmmEmissionProbabilities.");
75  if (!hiddenAlphabet_->worksWith(transitionMatrix->getHmmStateAlphabet()))
76  throw Exception("LowMemoryRescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
77  if (!hiddenAlphabet_->worksWith(emissionProbabilities->getHmmStateAlphabet()))
78  throw Exception("LowMemoryRescaledHmmLikelihood: HmmTransitionMatrix and HmmEmissionProbabilities should point toward the same HmmStateAlphabet object.");
79  nbStates_ = hiddenAlphabet_->getNumberOfStates();
80  nbSites_ = emissionProbabilities_->getNumberOfPositions();
81 
82  // Manage parameters:
83  addParameters_(hiddenAlphabet_->getParameters());
84  addParameters_(transitionMatrix_->getParameters());
85  addParameters_(emissionProbabilities_->getParameters());
86 
87  // Init arrays:
88  likelihood1_.resize(nbStates_);
89  likelihood2_.resize(nbStates_);
90 
91  // Compute:
93 }
94 
95 void LowMemoryRescaledHmmLikelihood::setNamespace(const std::string& nameSpace)
96 {
98 
99  hiddenAlphabet_->setNamespace(nameSpace);
100  transitionMatrix_->setNamespace(nameSpace);
101  emissionProbabilities_->setNamespace(nameSpace);
102 }
103 
105 {
106  bool alphabetChanged = hiddenAlphabet_->matchParametersValues(pl);
107  bool transitionsChanged = transitionMatrix_->matchParametersValues(pl);
108  bool emissionChanged = emissionProbabilities_->matchParametersValues(pl);
109  // these lines are necessary because the transitions and emissions can depend on the alphabet.
110  // we could use a StateChangeEvent, but this would result in computing some calculations twice in some cases
111  // (when both the alphabet and other parameter changed).
112  if (alphabetChanged && !transitionsChanged)
113  transitionMatrix_->setParametersValues(transitionMatrix_->getParameters());
114  if (alphabetChanged && !emissionChanged)
115  emissionProbabilities_->setParametersValues(emissionProbabilities_->getParameters());
116 
117  computeForward_();
118 }
119 
120 /***************************************************************************************************************************/
121 
123 {
124  double x;
125  vector<double> tmp(nbStates_);
126  vector<double> lScales(min(maxSize_, nbSites_));
127  vector<double> trans(nbStates_ * nbStates_);
128 
129  // Transition probabilities:
130  for (size_t i = 0; i < nbStates_; i++)
131  {
132  size_t ii = i * nbStates_;
133  for (size_t j = 0; j < nbStates_; j++)
134  {
135  trans[ii + j] = transitionMatrix_->Pij(j, i);
136  }
137  }
138 
139  // Initialisation:
140  double scale = 0;
141  const vector<double>* emissions = &(*emissionProbabilities_)(0);
142  for (size_t j = 0; j < nbStates_; j++)
143  {
144  size_t jj = j * nbStates_;
145  x = 0;
146  for (size_t k = 0; k < nbStates_; k++)
147  {
148  x += trans[k + jj] * transitionMatrix_->getEquilibriumFrequencies()[k];
149  }
150  tmp[j] = (*emissions)[j] * x;
151  scale += tmp[j];
152  }
153  for (size_t j = 0; j < nbStates_; j++)
154  {
155  likelihood1_[j] = tmp[j] / scale;
156  }
157  lScales[0] = log(scale);
158 
159  vector<double>* previousLikelihood = &likelihood2_, * currentLikelihood = &likelihood1_, * tmpLikelihood;
160 
161  // Recursion:
162  size_t nextBrkPt = nbSites_; // next break point
163  vector<size_t>::const_iterator bpIt = breakPoints_.begin();
164  if (bpIt != breakPoints_.end())
165  nextBrkPt = *bpIt;
166 
167  double a;
168  logLik_ = 0;
169  size_t offset = 0;
170  greater<double> cmp;
171  for (size_t i = 1; i < nbSites_; i++)
172  {
173  // Swap pointers:
174  tmpLikelihood = previousLikelihood;
175  previousLikelihood = currentLikelihood;
176  currentLikelihood = tmpLikelihood;
177 
178  scale = 0;
179  emissions = &(*emissionProbabilities_)(i);
180  if (i < nextBrkPt)
181  {
182  for (size_t j = 0; j < nbStates_; j++)
183  {
184  size_t jj = j * nbStates_;
185  x = 0;
186  for (size_t k = 0; k < nbStates_; k++)
187  {
188  a = trans[jj + k] * (*previousLikelihood)[k];
189  if (a < 0)
190  {
191  // *ApplicationTools::warning << "Negative value for likelihood at " << i << ", state " << j << ": " << _likelihood[i-1][k] << ", Pij = " << _hiddenModel->Pij(k, j) << endl;
192  a = 0;
193  }
194  x += a;
195  }
196  tmp[j] = (*emissions)[j] * x;
197  if (tmp[j] < 0)
198  {
199  // *ApplicationTools::warning << "Negative emission probability at " << i << ", state " << j << ": " << _emissions[i][j] << endl;
200  tmp[j] = 0;
201  }
202  scale += tmp[j];
203  }
204  }
205  else // Reset markov chain:
206  {
207  for (size_t j = 0; j < nbStates_; j++)
208  {
209  size_t jj = j * nbStates_;
210  x = 0;
211  for (size_t k = 0; k < nbStates_; k++)
212  {
213  a = trans[jj + k] * transitionMatrix_->getEquilibriumFrequencies()[k];
214  if (a < 0)
215  {
216  // *ApplicationTools::warning << "Negative value for likelihood at " << i << ", state " << j << ": " << _likelihood[i-1][k] << ", Pij = " << _hiddenModel->Pij(k, j) << endl;
217  a = 0;
218  }
219  x += a;
220  }
221  tmp[j] = (*emissions)[j] * x;
222  if (tmp[j] < 0)
223  {
224  // *ApplicationTools::warning << "Negative emission probability at " << i << ", state " << j << ": " << _emissions[i][j] << endl;
225  tmp[j] = 0;
226  }
227  scale += tmp[j];
228  }
229  bpIt++;
230  if (bpIt != breakPoints_.end())
231  nextBrkPt = *bpIt;
232  else
233  nextBrkPt = nbSites_;
234  }
235 
236  for (size_t j = 0; j < nbStates_; j++)
237  {
238  if (scale > 0)
239  (*currentLikelihood)[j] = tmp[j] / scale;
240  else
241  (*currentLikelihood)[j] = 0;
242  }
243  lScales[i - offset] = log(scale);
244 
245  if (i - offset == maxSize_ - 1)
246  {
247  // We make partial calculations and reset the arrays:
248  double partialLogLik = 0;
249  sort(lScales.begin(), lScales.end(), cmp);
250  for (size_t j = 0; j < maxSize_; ++j)
251  {
252  partialLogLik += lScales[j];
253  }
254  logLik_ += partialLogLik;
255  offset += maxSize_;
256  }
257  }
258  sort(lScales.begin(), lScales.begin() + static_cast<ptrdiff_t>(nbSites_ - offset), cmp);
259  double partialLogLik = 0;
260  for (size_t i = 0; i < nbSites_ - offset; ++i)
261  {
262  partialLogLik += lScales[i];
263  }
264  logLik_ += partialLogLik;
265 }
266 
267 /***************************************************************************************************************************/
A partial implementation of the Parametrizable interface.
void setNamespace(const std::string &prefix)
Set the namespace for the parameter names.
virtual void addParameters_(const ParameterList &parameters)
Exception base class. Overload exception constructor (to control the exceptions mechanism)....
Definition: Exceptions.h:59
Interface for computing emission probabilities in a Hidden Markov Model.
virtual const HmmStateAlphabet * getHmmStateAlphabet() const =0
Hidden states alphabet.
Describe the transition probabilities between hidden states of a Hidden Markov Model.
virtual const HmmStateAlphabet * getHmmStateAlphabet() const =0
void setNamespace(const std::string &nameSpace)
Set the namespace for the parameter names.
std::unique_ptr< HmmStateAlphabet > hiddenAlphabet_
The alphabet describing the hidden states.
std::unique_ptr< HmmTransitionMatrix > transitionMatrix_
void fireParameterChanged(const ParameterList &pl)
Notify the class when one or several parameters have changed.
std::vector< double > likelihood1_
The likelihood array.
LowMemoryRescaledHmmLikelihood(HmmStateAlphabet *hiddenAlphabet, HmmTransitionMatrix *transitionMatrix, HmmEmissionProbabilities *emissionProbabilities, const std::string &prefix, size_t maxSize=1000000)
Build a new LowMemoryRescaledHmmLikelihood object.
std::unique_ptr< HmmEmissionProbabilities > emissionProbabilities_
The parameter list object.
Definition: ParameterList.h:65