5 #include "../StateMap.h"
19 std::shared_ptr<const GeneticCode> gCode,
21 unsigned short method,
28 sFreq_(gCode->sourceAlphabet().getSize() - gCode->getNumberOfStopCodons(), method, allowNullFreqs,
"Full.")
44 std::shared_ptr<const GeneticCode> gCode,
45 const vector<double>& initFreqs,
47 unsigned short method,
54 sFreq_(gCode->sourceAlphabet().getSize() - gCode->getNumberOfStopCodons(), method, allowNullFreqs,
"Full.")
57 throw Exception(
"FullCodonFrequencySet(constructor). There must be " +
TextTools::toString(gCode->getSourceAlphabet()->getSize()) +
" frequencies.");
62 if (!
pgc_->isStop(
static_cast<int>(i)))
69 if (!gCode->isStop(
static_cast<int>(i)))
70 vd.push_back(initFreqs[i] / sum);
107 if (!
pgc_->isStop(
static_cast<int>(i)))
108 sum += frequencies[i];
114 if (!
pgc_->isStop(
static_cast<int>(i)))
115 vd.push_back(frequencies[i] / sum);
135 if (
pgc_->isStop(
static_cast<int>(j)))
150 std::shared_ptr<const GeneticCode> gencode,
151 std::unique_ptr<ProteinFrequencySetInterface> ppfs,
152 unsigned short method) :
158 ppfs_(std::move(ppfs)),
161 auto& ppa =
pgc_->proteicAlphabet();
162 auto& aaStates =
ppfs_->stateMap();
163 for (
size_t i = 0; i < aaStates.getNumberOfModelStates(); ++i)
165 int aa = aaStates.getAlphabetStateAsInt(i);
166 vector<int> vc =
pgc_->getSynonymous(aa);
167 vS_.push_back(
Simplex(vc.size(), method, 0,
""));
175 ppfs_->setNamespace(
"FullPerAA." +
ppfs_->getName() +
".");
182 std::shared_ptr<const GeneticCode> gencode,
183 unsigned short method) :
192 auto ppa =
pgc_->getProteicAlphabet();
193 auto& aaStates =
ppfs_->stateMap();
194 for (
size_t i = 0; i < aaStates.getNumberOfModelStates(); ++i)
196 int aa = aaStates.getAlphabetStateAsInt(i);
197 vector<int> vc =
pgc_->getSynonymous(aa);
198 vS_.push_back(
Simplex(vc.size(), method, 0,
""));
211 ppfs_(ffs.ppfs_->clone()),
231 for (
size_t i = 0; i <
vS_.size(); i++)
233 vS_[i].matchParametersValues(parameters);
240 auto& aaStates =
ppfs_->stateMap();
241 for (
size_t i = 0; i < aaStates.getNumberOfModelStates(); ++i)
243 int aa = aaStates.getAlphabetStateAsInt(i);
244 std::vector<int> vc =
pgc_->getSynonymous(aa);
245 for (
size_t j = 0; j < vc.size(); j++)
249 static_cast<double>(vc.size()) *
ppfs_->getFrequencies()[i] *
vS_[i].prob(j);
263 auto& aaStates =
ppfs_->stateMap();
264 for (
size_t i = 0; i < aaStates.getNumberOfModelStates(); ++i)
266 int aa = aaStates.getAlphabetStateAsInt(i);
267 std::vector<int> vc =
pgc_->getSynonymous(aa);
271 for (
size_t j = 0; j < vc.size(); j++)
273 size_t index =
pgc_->getSourceAlphabet()->getStateIndex(vc[j]);
274 vp.push_back(frequencies[index - 1]);
275 s += frequencies[index - 1];
279 vS_[i].setFrequencies(vp);
283 bigS += s /
static_cast<double>(vc.size());
284 vaa.push_back(s /
static_cast<double>(vc.size()));
288 ppfs_->setFrequencies(vaa);
295 auto ppa =
pgc_->getProteicAlphabet();
298 ppfs_->setNamespace(prefix +
ppfs_->getName() +
".");
299 for (
size_t i = 0; i <
vS_.size(); ++i)
301 vS_[i].setNamespace(prefix + ppa->getAbbr(
static_cast<int>(i)) +
"_");
310 std::shared_ptr<const GeneticCode> gCode,
311 const vector<double>& initFreqs,
312 const string& name) :
323 std::shared_ptr<const GeneticCode> gCode,
324 const string& name) :
331 size_t size = gCode->sourceAlphabet().getSize() - gCode->getNumberOfStopCodons();
333 for (
size_t i = 0; i < gCode->sourceAlphabet().getSize(); i++)
335 getFreq_(i) = (gCode->isStop(
static_cast<int>(i))) ? 0 : 1. /
static_cast<double>(size);
342 if (frequencies.size() != ca->getSize())
343 throw DimensionException(
"FixedFrequencySet::setFrequencies", frequencies.size(), ca->getSize());
346 for (
size_t i = 0; i < frequencies.size(); ++i)
348 if (!(
pgc_->isStop(
static_cast<int>(i))))
349 sum += frequencies[i];
352 for (
size_t i = 0; i < ca->getSize(); ++i)
354 getFreq_(i) = (
pgc_->isStop(
static_cast<int>(i))) ? 0 : frequencies[i] / sum;
363 std::shared_ptr<const GeneticCode> gCode,
364 const std::string& path,
376 if (frequencies.size() != ca->getSize())
377 throw DimensionException(
"UserFrequencySet::setFrequencies", frequencies.size(), ca->getSize());
380 for (
size_t i = 0; i < frequencies.size(); ++i)
382 if (!(
pgc_->isStop(
static_cast<int>(i))))
383 sum += frequencies[i];
386 for (
size_t i = 0; i < ca->getSize(); ++i)
388 getFreq_(i) = (
pgc_->isStop(
static_cast<int>(i))) ? 0 : frequencies[i] / sum;
398 std::shared_ptr<const GeneticCode> gCode,
399 vector<std::unique_ptr<FrequencySetInterface>>& freqvector,
401 const string& mgmtStopCodon) :
403 gCode->getCodonAlphabet(),
411 if (mgmtStopCodon ==
"uniform")
413 else if (mgmtStopCodon ==
"linear")
418 vector<int> vspcod = gCode->getStopCodonsAsInt();
419 for (
size_t ispcod = 0; ispcod < vspcod.size(); ispcod++)
422 int nspcod = vspcod[ispcod];
423 for (
size_t ph = 0; ph < 3; ph++)
425 size_t nspcod0 =
static_cast<size_t>(nspcod) -
pow *
static_cast<size_t>(
getCodonAlphabet()->getNPosition(nspcod, 2 - ph));
426 for (
size_t dec = 0; dec < 4; dec++)
428 size_t vois = nspcod0 +
pow * dec;
429 if (!
pgc_->isStop(
static_cast<int>(vois)))
430 mStopNeigh_[nspcod].push_back(
static_cast<int>(vois));
445 mStopNeigh_(iwfs.mStopNeigh_),
446 mgmtStopCodon_(iwfs.mgmtStopCodon_),
472 for (
size_t i = 0; i < s; i++)
477 std::map<int, Vint>::iterator mStopNeigh_it(
mStopNeigh_.begin());
480 int stNb = mStopNeigh_it->first;
481 Vint vneigh = mStopNeigh_it->second;
483 for (
size_t vn = 0; vn < vneigh.size(); vn++)
487 double x =
getFreq_(
static_cast<size_t>(stNb)) / sneifreq;
488 for (
size_t vn = 0; vn < vneigh.size(); vn++)
492 getFreq_(
static_cast<size_t>(stNb)) = 0;
496 for (
size_t i = 0; i < s; i++)
504 for (
size_t i = 0; i < s; i++)
506 if (!
pgc_->isStop(
static_cast<int>(i)))
510 for (
size_t i = 0; i < s; i++)
512 if (
pgc_->isStop(
static_cast<int>(i)))
525 std::shared_ptr<const GeneticCode> gCode,
526 std::unique_ptr<FrequencySetInterface> pfreq,
528 const string& mgmtStopCodon) :
534 if (mgmtStopCodon ==
"uniform")
536 else if (mgmtStopCodon ==
"linear")
541 vector<int> vspcod = gCode->getStopCodonsAsInt();
542 for (
size_t ispcod = 0; ispcod < vspcod.size(); ispcod++)
545 int nspcod = vspcod[ispcod];
546 for (
int ph = 0; ph < 3; ph++)
548 size_t nspcod0 =
static_cast<size_t>(nspcod) -
pow *
static_cast<size_t>(
getCodonAlphabet()->getNPosition(nspcod,
static_cast<unsigned int>(2 - ph)));
549 for (
size_t dec = 0; dec < 4; dec++)
551 size_t vois = nspcod0 +
pow * dec;
552 if (!
pgc_->isStop(
static_cast<int>(vois)))
553 mStopNeigh_[nspcod].push_back(
static_cast<int>(vois));
570 mStopNeigh_(iwfs.mStopNeigh_),
571 mgmtStopCodon_(iwfs.mgmtStopCodon_),
600 int stNb = mStopNeigh_it.first;
601 Vint vneigh = mStopNeigh_it.second;
603 for (
size_t vn = 0; vn < vneigh.size(); vn++)
607 double x =
getFreq_(
static_cast<size_t>(stNb)) / sneifreq;
608 for (
size_t vn = 0; vn < vneigh.size(); vn++)
612 getFreq_(
static_cast<size_t>(stNb)) = 0;
615 for (
size_t i = 0; i < s; i++)
623 for (
size_t i = 0; i < s; i++)
625 if (!
pgc_->isStop(
static_cast<int>(i)))
629 for (
unsigned int i = 0; i < s; i++)
631 if (
pgc_->isStop(
static_cast<int>(i)))
643 std::shared_ptr<const GeneticCode> gCode,
644 const string& mgmtStopCodon,
645 unsigned short method)
647 unique_ptr<CodonFrequencySetInterface> codonFreqs;
653 else if (option ==
F1X4)
655 codonFreqs.reset(
new CodonFromUniqueFrequencySet(gCode, make_unique<FullNucleotideFrequencySet>(gCode->codonAlphabet().getNucleicAlphabet()),
"F1X4", mgmtStopCodon));
657 else if (option ==
F3X4)
659 vector<unique_ptr<FrequencySetInterface>> v_AFS(3);
660 v_AFS[0] = make_unique<FullNucleotideFrequencySet>(gCode->codonAlphabet().getNucleicAlphabet());
661 v_AFS[1] = make_unique<FullNucleotideFrequencySet>(gCode->codonAlphabet().getNucleicAlphabet());
662 v_AFS[2] = make_unique<FullNucleotideFrequencySet>(gCode->codonAlphabet().getNucleicAlphabet());
663 codonFreqs = make_unique<CodonFromIndependentFrequencySet>(gCode, v_AFS,
"F3X4", mgmtStopCodon);
665 else if (option ==
F61)
668 throw Exception(
"FrequencySet::getFrequencySetForCodons(). Invalid codon frequency set argument.");
Basic implementation of the FrequencySet interface.
std::shared_ptr< const Alphabet > getAlphabet() const override
double & getFreq_(size_t i)
AbstractFrequencySet & operator=(const AbstractFrequencySet &af)
const StateMapInterface & stateMap() const override
void addParameters_(const ParameterList ¶meters)
void setNamespace(const std::string &prefix)
void setParametersValues(const ParameterList ¶meters) override
bool matchParametersValues(const ParameterList ¶meters) override
const ParameterList & getParameters() const override
std::shared_ptr< const CoreWordAlphabet > getWordAlphabet() const override
This class implements a state map where all resolved states are modeled.
static std::unique_ptr< CodonFrequencySetInterface > getFrequencySetForCodons(short option, std::shared_ptr< const GeneticCode > gCode, const std::string &mgmtStopCodon="quadratic", unsigned short method=1)
A helper function that provide frequencies set for codon models according to PAML option.
the Frequencies in codons are the product of Independent Frequencies in letters with the frequencies ...
std::map< int, Vint > mStopNeigh_
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
void updateFrequencies() override
Update the frequencies given the parameters.
unsigned short mgmtStopCodon_
std::shared_ptr< const GeneticCode > pgc_
CodonFromIndependentFrequencySet(std::shared_ptr< const GeneticCode > gCode, std::vector< std::unique_ptr< FrequencySetInterface >> &freqvector, const std::string &name="Codon", const std::string &mgmtStopCodon="quadratic")
Constructor from a CodonAlphabet* and a vector of different std::shared_ptr<FrequencySet>....
CodonFromIndependentFrequencySet & operator=(const CodonFromIndependentFrequencySet &iwfs)
the Frequencies in codons are the product of the frequencies for a unique FrequencySet in letters,...
void updateFrequencies() override
Update the frequencies given the parameters.
CodonFromUniqueFrequencySet(std::shared_ptr< const GeneticCode > gCode, std::unique_ptr< FrequencySetInterface > pfreq, const std::string &name="Codon", const std::string &mgmtStopCodon="quadratic")
Constructor from a CodonAlphabet* and a std::shared_ptr<FrequencySet> repeated three times.
unsigned short mgmtStopCodon_
std::shared_ptr< const GeneticCode > pgc_
std::map< int, Vint > mStopNeigh_
CodonFromUniqueFrequencySet & operator=(const CodonFromUniqueFrequencySet &iwfs)
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
FrequencySet useful for homogeneous and stationary models, codon implementation.
void setFrequencies(const std::vector< double > &frequencies) override
the given frequencies are normalized such thaat the sum of the frequencies on the non-stop codons equ...
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
std::shared_ptr< const GeneticCode > pgc_
FixedCodonFrequencySet(std::shared_ptr< const GeneticCode > gCode, const std::vector< double > &initFreqs, const std::string &name="Fixed")
FrequencySet useful for homogeneous and stationary models, protein implementation.
A generic FrequencySet for Full Codon alphabets.
FullCodonFrequencySet & operator=(const FullCodonFrequencySet &fcfs)
FullCodonFrequencySet(std::shared_ptr< const GeneticCode > gCode, bool allowNullFreqs=false, unsigned short method=1, const std::string &name="Full")
Construction with uniform frequencies on the letters of the alphabet. The stop codon frequencies are ...
Simplex sFreq_
Simplex to handle the probabilities and the parameters.
std::shared_ptr< const GeneticCode > pgc_
void setFrequencies(const std::vector< double > &frequencies) override
the given frequencies are normalized such that the sum of the frequencies on the non-stop codons equa...
void fireParameterChanged(const ParameterList ¶meters) override
void setNamespace(const std::string &nameSpace) override
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
FrequencySet integrating ProteinFrequencySet inside CodonFrequencySet. In this case,...
std::unique_ptr< ProteinFrequencySetInterface > ppfs_
std::shared_ptr< const GeneticCode > pgc_
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
void setFrequencies(const std::vector< double > &frequencies) override
the given frequencies are normalized such thaat the sum of the frequencies on the non-stop codons equ...
void setNamespace(const std::string &prefix) override
std::vector< Simplex > vS_
vector of the simplexes, one for each AA
FullPerAACodonFrequencySet(std::shared_ptr< const GeneticCode > gencode, std::unique_ptr< ProteinFrequencySetInterface > ppfs, unsigned short method=1)
Create a new FullPerAACodonFrequencySet object.
FullPerAACodonFrequencySet & operator=(const FullPerAACodonFrequencySet &ffs)
void updateFrequencies_()
void fireParameterChanged(const ParameterList ¶meters) override
double prob(size_t i) const
void setFrequencies(const std::vector< double > &)
UserCodonFrequencySet(std::shared_ptr< const GeneticCode > gCode, const std::string &path, size_t nCol=1)
std::shared_ptr< const GeneticCode > pgc_
std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const override
void setFrequencies(const std::vector< double > &frequencies) override
the given frequencies are normalized such thaat the sum of the frequencies on the non-stop codons equ...
FrequencySet to be read in a file. More specifically, a frequency set is read in a column of a given ...
the Frequencies in words are the product of Independent Frequencies in letters
virtual void updateFrequencies()
WordFromIndependentFrequencySet & operator=(const WordFromIndependentFrequencySet &iwfs)
virtual void updateFrequencies()
WordFromUniqueFrequencySet & operator=(const WordFromUniqueFrequencySet &iwfs)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
ExtendedFloat pow(const ExtendedFloat &ef, double exp)