bpp-phyl3  3.0.0
WordFrequencySet.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "WordFrequencySet.h"
6 
7 using namespace bpp;
8 
9 #include <cmath>
10 using namespace std;
11 
12 size_t AbstractWordFrequencySet::getSizeFromVector(const std::vector<std::unique_ptr<FrequencySetInterface>>& freqVector)
13 {
14  size_t s = 1;
15  size_t l = freqVector.size();
16 
17  for (size_t i = 0; i < l; i++)
18  {
19  s *= freqVector[i]->getAlphabet()->getSize();
20  }
21 
22  return s;
23 }
24 
25 AbstractWordFrequencySet::AbstractWordFrequencySet(std::shared_ptr<const StateMapInterface> stateMap, const string& prefix, const string& name) :
26  AbstractFrequencySet(stateMap, prefix, name)
27 {}
28 
30 {
31  return getWordAlphabet()->getLength();
32 }
33 
35 {}
36 
37 // ///////////////////////////////////////////////////////////////////
38 // // WordFromIndependentFrequencySet
39 
40 
42  std::shared_ptr<const WordAlphabet> pWA,
43  vector<std::unique_ptr<FrequencySetInterface>>& freqVector,
44  const string& prefix,
45  const string& name) :
47  make_shared<CanonicalStateMap>(pWA, false),
48  prefix,
49  name),
50  vFreq_(),
51  vNestedPrefix_()
52 {
53  size_t sf = getSizeFromVector(freqVector);
54  if (pWA->getSize() != sf)
55  throw Exception("WordFromIndependentFrequencySet: Size of the frequencies does not match size of the alphabet : " + TextTools::toString(sf) + " vs " + TextTools::toString(pWA->getSize()));
56 
57  size_t l = freqVector.size();
58 
59  for (size_t i = 0; i < l; ++i)
60  {
61  vFreq_.push_back(std::move(freqVector[i]));
62  vNestedPrefix_.push_back(vFreq_[i]->getNamespace());
63  vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]);
65  }
67 }
68 
70  std::shared_ptr<const CodonAlphabet> pWA,
71  vector<std::unique_ptr<FrequencySetInterface>>& freqVector,
72  const string& prefix,
73  const string& name) :
75  make_shared<CanonicalStateMap>(pWA, false),
76  prefix,
77  name),
78  vFreq_(),
79  vNestedPrefix_()
80 {
81  size_t sf = getSizeFromVector(freqVector);
82  if (pWA->getSize() != sf)
83  throw Exception("WordFromIndependentFrequencySet: Size of the frequencies does not match size of the alphabet : " + TextTools::toString(sf) + " vs " + TextTools::toString(pWA->getSize()));
84 
85  size_t l = freqVector.size();
86 
87  for (size_t i = 0; i < l; ++i)
88  {
89  vNestedPrefix_.push_back(freqVector[i]->getNamespace());
90  vFreq_.push_back(std::move(freqVector[i]));
91  vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]);
93  }
94 
96 }
97 
100  vFreq_(iwfs.vFreq_.size()),
101  vNestedPrefix_(iwfs.vNestedPrefix_)
102 {
103  for (unsigned i = 0; i < iwfs.vFreq_.size(); i++)
104  {
105  vFreq_[i].reset(iwfs.vFreq_[i]->clone());
106  }
108 }
109 
111 {}
112 
114 {
117 
118  // Clean current frequencies first:
119  vFreq_.resize(iwfs.vFreq_.size());
120  for (unsigned i = 0; i < vFreq_.size(); i++)
121  {
122  vFreq_[i].reset(iwfs.vFreq_[i]->clone());
123  }
125 
126  return *this;
127 }
128 
130 {
131  size_t l = vFreq_.size();
132 
133  bool f = 0;
134  for (size_t i = 0; i < l; i++)
135  {
136  f |= vFreq_[i]->matchParametersValues(pl);
137  }
138 
139  if (f)
141 }
142 
144 {
145  size_t l = vFreq_.size();
146  size_t s = getWordAlphabet()->getSize();
147  vector< vector<double>> f(l);
148 
149  size_t i, p, t, i2;
150 
151  for (i = 0; i < l; i++)
152  {
153  f[i] = vFreq_[i]->getFrequencies();
154  }
155 
156  for (i = 0; i < s; i++)
157  {
158  i2 = i;
159  getFreq_(i) = 1;
160  for (p = l; p > 0; p--)
161  {
162  t = vFreq_[p - 1]->getAlphabet()->getSize();
163  getFreq_(i) *= f[p - 1][i2 % t];
164  i2 /= t;
165  }
166  }
167 }
168 
169 void WordFromIndependentFrequencySet::setFrequencies(const vector<double>& frequencies)
170 {
171  if (frequencies.size() != getWordAlphabet()->getSize())
172  throw DimensionException("WordFromIndependentFrequencySet::setFrequencies", frequencies.size(), getWordAlphabet()->getSize());
173  double sum = 0.0;
174  size_t size = frequencies.size();
175  for (size_t i = 0; i < size; i++)
176  {
177  sum += frequencies[i];
178  }
179  if (fabs(1. - sum) > 0.000001)
180  throw Exception("WordFromIndependentFrequencySet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ").");
181 
182  size_t d, i, j, k, s, l = vFreq_.size();
183  vector<double> freq;
184 
185  d = size;
186  for (i = 0; i < l; i++)
187  {
188  s = vFreq_[i]->getAlphabet()->getSize();
189  freq.resize(s);
190  d /= s;
191  for (j = 0; j < s; j++)
192  {
193  freq[j] = 0;
194  }
195  for (k = 0; k < size; k++)
196  {
197  freq[(k / d) % s] += frequencies[k];
198  }
199  vFreq_[i]->setFrequencies(freq);
200  }
201 
202  for (i = 0; i < l; i++)
203  {
205  }
206 
208 }
209 
210 
212 {
213  return vFreq_.size();
214 }
215 
216 void WordFromIndependentFrequencySet::setNamespace(const std::string& prefix)
217 {
219  for (size_t i = 0; i < vFreq_.size(); i++)
220  {
221  vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]);
222  }
223 }
224 
226 {
227  string s = getName() + " : " + vFreq_[0]->getName();
228  for (size_t i = 1; i < vFreq_.size(); i++)
229  {
230  s += " * " + vFreq_[i]->getName();
231  }
232  return s;
233 }
234 
235 // ///////////////////////////////////////////////////////////////////
236 // // WordFromUniqueFrequencySet
237 
238 
240  std::shared_ptr<const WordAlphabet> pWA,
241  std::unique_ptr<FrequencySetInterface> pabsfreq,
242  const string& prefix,
243  const string& name) :
245  make_shared<CanonicalStateMap>(pWA, false),
246  prefix,
247  name),
248  pFreq_(std::move(pabsfreq)),
249  NestedPrefix_(pFreq_->getNamespace()),
250  length_(pWA->getLength())
251 {
252  size_t i;
253 
254  string st = "";
255  for (i = 0; i < length_; ++i)
256  {
257  st += TextTools::toString(i + 1);
258  }
259 
260  pFreq_->setNamespace(prefix + st + "_" + NestedPrefix_);
261  addParameters_(pFreq_->getParameters());
262 
264 }
265 
267  std::shared_ptr<const CodonAlphabet> pWA,
268  std::unique_ptr<FrequencySetInterface> pabsfreq,
269  const string& prefix,
270  const string& name) :
272  make_shared<CanonicalStateMap>(pWA, false),
273  prefix,
274  name),
275  pFreq_(std::move(pabsfreq)),
276  NestedPrefix_(pFreq_->getNamespace()),
277  length_(pWA->getLength())
278 {
279  size_t i;
280 
281  string st = "";
282  for (i = 0; i < length_; ++i)
283  {
284  st += TextTools::toString(i + 1);
285  }
286 
287  pFreq_->setNamespace(prefix + st + "_" + NestedPrefix_);
288  addParameters_(pFreq_->getParameters());
289 
291 }
292 
295  pFreq_(iwfs.pFreq_->clone()),
296  NestedPrefix_(iwfs.NestedPrefix_),
297  length_(iwfs.length_)
298 {
300 }
301 
302 
304 {
306  pFreq_.reset(iwfs.pFreq_->clone());
308  length_ = iwfs.length_;
309 
311  return *this;
312 }
313 
315 {
316  pFreq_ = 0;
317 }
318 
320 {
321  if (pFreq_->matchParametersValues(pl))
323 }
324 
326 {
327  size_t s = getWordAlphabet()->getSize();
328  vector<double> f;
329  size_t letsi = pFreq_->getAlphabet()->getSize();
330 
331  size_t i, p, i2;
332 
333  f = pFreq_->getFrequencies();
334 
335  for (i = 0; i < s; i++)
336  {
337  i2 = i;
338  getFreq_(i2) = 1;
339  for (p = length_; p > 0; p--)
340  {
341  getFreq_(i) *= f[i2 % letsi];
342  i2 /= letsi;
343  }
344  }
345 }
346 
347 void WordFromUniqueFrequencySet::setFrequencies(const vector<double>& frequencies)
348 {
349  if (frequencies.size() != getWordAlphabet()->getSize())
350  throw DimensionException("WordFromUniqueFrequencySet::setFrequencies", frequencies.size(), getWordAlphabet()->getSize());
351  double sum = 0.0;
352  size_t size = frequencies.size();
353  for (size_t i = 0; i < size; i++)
354  {
355  sum += frequencies[i];
356  }
357  if (fabs(1. - sum) > 0.000001)
358  throw Exception("WordFromUniqueFrequencySet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ").");
359 
360  size_t d, i, j, k;
361  vector<double> freq;
362 
363  size_t letsi = pFreq_->getAlphabet()->getSize();
364  freq.resize(letsi);
365 
366  for (j = 0; j < letsi; j++)
367  {
368  freq[j] = 0;
369  }
370 
371  d = size;
372  for (i = 0; i < length_; i++)
373  {
374  d /= letsi;
375  for (k = 0; k < size; k++)
376  {
377  freq[(k / d) % letsi] += frequencies[k];
378  }
379  }
380  for (j = 0; j < letsi; j++)
381  {
382  freq[j] /= static_cast<double>(length_);
383  }
384 
385  pFreq_->setFrequencies(freq);
386  matchParametersValues(pFreq_->getParameters());
388 }
389 
390 
392 {
394  string st = "";
395  for (unsigned i = 0; i < length_; i++)
396  {
397  st += TextTools::toString(i + 1);
398  }
399  pFreq_->setNamespace(prefix + st + "_" + NestedPrefix_);
400 }
401 
402 
404 {
405  return getName() + " : " + pFreq_->getName() + " * " + TextTools::toString(length_);
406 }
Basic implementation of the FrequencySet interface.
Definition: FrequencySet.h:102
double & getFreq_(size_t i)
Definition: FrequencySet.h:179
std::string getName() const override
Definition: FrequencySet.h:165
void addParameters_(const ParameterList &parameters)
void setNamespace(const std::string &prefix)
bool matchParametersValues(const ParameterList &parameters) override
std::string getNamespace() const override
const ParameterList & getParameters() const override
size_t getLength() const override
std::shared_ptr< const CoreWordAlphabet > getWordAlphabet() const override
AbstractWordFrequencySet & operator=(const AbstractWordFrequencySet &af)
AbstractWordFrequencySet(std::shared_ptr< const StateMapInterface > stateMap, const std::string &prefix="", const std::string &name="")
size_t getSizeFromVector(const std::vector< std::unique_ptr< FrequencySetInterface >> &freqVector) override
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:168
the Frequencies in words are the product of Independent Frequencies in letters
WordFromIndependentFrequencySet(std::shared_ptr< const WordAlphabet > pWA, std::vector< std::unique_ptr< FrequencySetInterface >> &freqVector, const std::string &prefix="", const std::string &name="WordFromIndependent")
Constructor from a WordAlphabet* and a vector of different std::shared_ptr<FrequencySet>....
std::vector< std::string > vNestedPrefix_
WordFromIndependentFrequencySet & operator=(const WordFromIndependentFrequencySet &iwfs)
virtual size_t getLength() const override
Return the length of the words.
void setNamespace(const std::string &prefix) override
void fireParameterChanged(const ParameterList &pl) override
std::vector< std::unique_ptr< FrequencySetInterface > > vFreq_
virtual void setFrequencies(const std::vector< double > &frequencies) override
Independent letter frequencies from given word frequencies. The frequencies of a letter at a position...
virtual void fireParameterChanged(const ParameterList &pl) override
std::unique_ptr< FrequencySetInterface > pFreq_
WordFromUniqueFrequencySet & operator=(const WordFromUniqueFrequencySet &iwfs)
virtual void setFrequencies(const std::vector< double > &frequencies) override
letter frequencies from given word frequencies. The frequencies of a letter at a position is the sum ...
WordFromUniqueFrequencySet(std::shared_ptr< const WordAlphabet > pWA, std::unique_ptr< FrequencySetInterface > pabsfreq, const std::string &prefix="", const std::string &name="WordFromUnique")
Constructor from a WordAlphabet* and a std::shared_ptr<FrequencySet> repeated as many times as the le...
void setNamespace(const std::string &prefix) override
std::string toString(T t)
Defines the basic types of data flow nodes.