5#include "../Model/FrequencySet/CodonFrequencySet.h"
6#include "../Model/FrequencySet/MvaFrequencySet.h"
7#include "../Model/FrequencySet/NucleotideFrequencySet.h"
8#include "../Model/FrequencySet/ProteinFrequencySet.h"
35unsigned char BppOFrequencySetFormat::DNA = 1;
36unsigned char BppOFrequencySetFormat::RNA = 2;
37unsigned char BppOFrequencySetFormat::NUCLEOTIDE = 1 | 2;
38unsigned char BppOFrequencySetFormat::PROTEIN = 4;
39unsigned char BppOFrequencySetFormat::CODON = 8;
40unsigned char BppOFrequencySetFormat::WORD = 16;
41unsigned char BppOFrequencySetFormat::ALL = 1 | 2 | 4 | 8 | 16;
44std::unique_ptr<FrequencySetInterface> BppOFrequencySetFormat::readFrequencySet(
45 std::shared_ptr<const Alphabet> alphabet,
46 const std::string& freqDescription,
47 const std::map<
size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
53 map<string, string> args;
55 unique_ptr<FrequencySetInterface> pFS;
57 if (args.find(
"data") != args.end())
58 nData = TextTools::to<size_t>(args[
"data"]);
60 if (nData && mData.find(nData) == mData.end())
63 if (freqName ==
"Fixed")
68 pFS = make_unique<FixedNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
70 throw Exception(
"Nucleotide alphabet not supported.");
75 pFS = make_unique<FixedProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
77 throw Exception(
"Protein alphabet not supported.");
84 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
89 throw Exception(
"Codon alphabet not supported.");
94 pFS = make_unique<FixedFrequencySet>(make_shared<const CanonicalStateMap>(alphabet,
false));
97 else if (freqName ==
"Full")
99 unsigned short method = 1;
100 if (args.find(
"method") != args.end())
102 if (args[
"method"] ==
"local")
104 else if (args[
"method"] ==
"binary")
111 pFS = make_unique<FullNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
113 throw Exception(
"Nucleotide alphabet not supported.");
118 pFS = make_unique<FullProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet),
false, method);
120 throw Exception(
"Protein alphabet not supported.");
127 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
132 throw Exception(
"Codon alphabet not supported.");
138 pFS = make_unique<FullFrequencySet>(make_shared<CanonicalStateMap>(alphabet,
false),
false, method);
141 else if (freqName ==
"Empirical")
143 string fname = args[
"file"];
145 throw Exception(
"'file' argument missing for user-defined frequencies.");
148 if (args.find(
"col") != args.end())
154 pFS = make_unique<UserNucleotideFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet), fname, nCol);
156 throw Exception(
"Nucleotide alphabet not supported.");
161 pFS = make_unique<UserProteinFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet), fname, nCol);
163 throw Exception(
"Protein alphabet not supported.");
170 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
171 pFS = make_unique<UserCodonFrequencySet>(
geneticCode_, fname, nCol);
175 throw Exception(
"Codon alphabet not supported.");
180 pFS = make_unique<UserFrequencySet>(make_shared<const CanonicalStateMap>(alphabet,
false), fname, nCol);
183 else if (freqName ==
"GC")
186 throw Exception(
"Error, invalid frequencies " + freqName +
" with non-nucleic alphabet.");
188 pFS = make_unique<GCFrequencySet>(dynamic_pointer_cast<const NucleicAlphabet>(alphabet));
190 throw Exception(
"Nucleotide alphabet not supported.");
194 else if (freqName ==
"Word")
197 throw Exception(
"Word alphabet not supported.");
199 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(...).\n\t Bad alphabet type "
200 + alphabet->getAlphabetType() +
" for frequencies set " + freqName +
".");
202 auto pWA = dynamic_pointer_cast<const WordAlphabet>(alphabet);
205 throw Exception(
"BppOFrequencySetFormat::read : Word freq name is from WordAlphabet.");
207 if (args.find(
"frequency") != args.end())
209 string sAFS = args[
"frequency"];
211 unsigned int nbfreq = pWA->getLength();
213 for (
unsigned i = 0; i < nbfreq; ++i)
219 auto pFS2 = nestedReader.
readFrequencySet(pWA->getNAlphabet(0), sAFS, mData, nData,
false);
221 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
225 pFS = make_unique<WordFromUniqueFrequencySet>(pWA, std::move(pFS2));
229 if (args.find(
"frequency1") == args.end())
230 throw Exception(
"BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set 'Word'.");
231 vector<string> v_sAFS;
232 vector<unique_ptr<FrequencySetInterface>> v_AFS;
233 unsigned int nbfreq = 1;
240 if (v_sAFS.size() != pWA->getLength())
243 for (
size_t i = 0; i < v_sAFS.size(); ++i)
246 pFS = nestedReader.
readFrequencySet(pWA->getNAlphabet(i), v_sAFS[i], mData, nData,
false);
248 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
252 v_AFS.push_back(std::move(pFS));
255 pFS = make_unique<WordFromIndependentFrequencySet>(pWA, v_AFS);
259 else if (freqName ==
"FromModel")
261 if (args.find(
"model") == args.end())
262 throw Exception(
"Missing argument 'model' for frequencies " + freqName +
".");
268 auto model = nestedReader.
readTransitionModel(alphabet, args[
"model"], mData, nData,
false);
269 pFS = make_unique<FromModelFrequencySet>(std::move(model));
271 for (
auto& it : unparsedParameterValuesNested)
279 else if (freqName ==
"Codon")
282 throw Exception(
"Codon alphabet not supported.");
284 throw Exception(
"BppOFrequencySetFormat::read.\n\t Bad alphabet type "
285 + alphabet->getAlphabetType() +
" for frequenciesset " + freqName +
".");
287 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
289 auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
291 if (args.find(
"frequency") != args.end())
293 string sAFS = args[
"frequency"];
296 auto pFS2 = nestedReader.
readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData,
false);
299 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
304 pFS = make_unique<CodonFromUniqueFrequencySet>(
geneticCode_, std::move(pFS2),
"Codon");
308 if (args.find(
"frequency1") == args.end())
309 throw Exception(
"BppOFrequencySetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set.");
310 vector<string> v_sAFS;
311 vector<unique_ptr<FrequencySetInterface>> v_AFS;
312 unsigned int nbfreq = 1;
319 if (v_sAFS.size() != 3)
322 for (
size_t i = 0; i < v_sAFS.size(); ++i)
325 pFS = nestedReader.
readFrequencySet(pWA->getNucleicAlphabet(), v_sAFS[i], mData, nData,
false);
327 for (
auto& it : unparsedParameterValuesNested)
331 v_AFS.push_back(std::move(pFS));
335 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
336 pFS = make_unique<CodonFromIndependentFrequencySet>(
geneticCode_, v_AFS,
"Codon");
341 else if (freqName ==
"FullPerAA")
344 throw Exception(
"Codon alphabet not supported.");
346 throw Exception(
"BppOFrequencySetFormat::read.\n\t Bad alphabet type "
347 + alphabet->getAlphabetType() +
" for frequenciesset " + freqName +
".");
350 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
352 auto pPA = dynamic_pointer_cast<const ProteicAlphabet>(
geneticCode_->getTargetAlphabet());
354 unsigned short method = 1;
355 if (args.find(
"method") != args.end())
357 if (args[
"method"] ==
"local")
359 else if (args[
"method"] ==
"binary")
363 if (args.find(
"protein_frequencies") != args.end())
365 string sPFS = args[
"protein_frequencies"];
367 auto tmpPtr = nestedReader.
readFrequencySet(pPA, sPFS, mData, nData,
false);
371 for (
auto& it : unparsedParameterValuesNested)
375 pFS = make_unique<FullPerAACodonFrequencySet>(
geneticCode_, std::move(pPFS), method);
378 pFS = make_unique<FullPerAACodonFrequencySet>(
geneticCode_, method);
386 throw Exception(
"Codon alphabet not supported.");
388 throw Exception(
"BppOFrequencySetFormat::readFrequencySet(). No genetic code specified! Consider using 'setGeneticCode'.");
390 auto pWA = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
392 if (args.find(
"genetic_code") != args.end())
394 ApplicationTools::displayWarning(
"'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
395 throw Exception(
"BppOFrequencySetFormat::read. Deprecated 'genetic_code' argument.");
398 string mgmtStopCodon =
"quadratic";
400 if (freqName ==
"F0")
404 else if (freqName ==
"F1X4")
408 if (args.find(
"mgmtStopCodons") != args.end())
409 mgmtStopCodon = args[
"mgmtStopCodons"];
410 else if (args.find(
"mgmtStopCodon") != args.end())
411 mgmtStopCodon = args[
"mgmtStopCodon"];
416 if (args.find(
"frequency") != args.end())
418 string sAFS = args[
"frequency"];
421 auto pFS2 = nestedReader.
readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData,
false);
422 if (pFS2->getName() !=
"Full")
423 throw Exception(
"BppOFrequencySetFormat::read. The frequency option in F1X4 can only be Full");
427 for (
auto& it : unparsedParameterValuesNested)
433 if (args.find(
"123_theta") != args.end())
435 if (args.find(
"123_theta1") != args.end())
437 if (args.find(
"123_theta2") != args.end())
439 if (args.find(
"theta") != args.end())
441 if (args.find(
"theta1") != args.end())
443 if (args.find(
"theta2") != args.end())
446 else if (freqName ==
"F3X4")
450 if (args.find(
"mgmtStopCodons") != args.end())
451 mgmtStopCodon = args[
"mgmtStopCodons"];
452 else if (args.find(
"mgmtStopCodon") != args.end())
453 mgmtStopCodon = args[
"mgmtStopCodon"];
458 if (args.find(
"frequency1") != args.end() ||
459 args.find(
"frequency2") != args.end() ||
460 args.find(
"frequency3") != args.end())
462 vector<string> v_sAFS;
464 for (
unsigned int nbfreq = 1; nbfreq <= 3; ++nbfreq)
469 v_sAFS.push_back(
"");
472 for (
size_t i = 0; i < v_sAFS.size(); ++i)
476 auto sAFS = v_sAFS[i];
479 pFS = nestedReader.
readFrequencySet(pWA->getNucleicAlphabet(), sAFS, mData, nData,
false);
480 if (pFS->getName() !=
"Full")
481 throw Exception(
"BppOFrequencySetFormat::read. The frequency options in F3X4 can only be Full");
484 for (
auto& it : unparsedParameterValuesNested)
491 for (
unsigned int i = 1; i <= 3; ++i)
501 else if (freqName ==
"F61")
507 unsigned short method = 1;
508 if (args.find(
"method") != args.end())
510 if (args[
"method"] ==
"local")
512 else if (args[
"method"] ==
"binary")
518 throw Exception(
"Unknown frequency option: " + freqName);
522 else if (freqName ==
"MVAprotein")
524 if (args.find(
"model") == args.end())
525 throw Exception(
"Missing argument 'model' for frequencies " + freqName +
".");
529 auto model = nestedReader.
readTransitionModel(alphabet, args[
"model"], mData, nData,
false);
530 if (
dynamic_cast<Coala*
>(model.get()) == 0)
531 throw Exception(
"MVAprotein frequency set needs a Coala model");
533 auto coala = unique_ptr<Coala>(
dynamic_cast<Coala*
>(model.release()));
536 auto mvaFS = make_unique<MvaFrequencySet>(dynamic_pointer_cast<const ProteicAlphabet>(alphabet));
538 mvaFS->initSet(*coala);
540 pFS = std::move(mvaFS);
543 throw Exception(
"Unknown frequency option: " + freqName);
546 vector<string> pnames = pFS->getParameters().getParameterNames();
548 string pref = pFS->getNamespace();
549 for (
size_t i = 0; i < pnames.size(); i++)
551 string name = pFS->getParameterNameWithoutNamespace(pnames[i]);
552 if (args.find(name) != args.end())
558 string pname, pval, pname2;
559 for (
size_t i = 0; i < pl.
size(); i++)
561 pname = pFS->getParameterNameWithoutNamespace(pl[i].getName());
563 if (args.find(pname) == args.end())
568 (pval.find(
"(") != string::npos))
571 for (
size_t j = 0; j < pl.
size() && !found; j++)
573 pname2 = pFS->getParameterNameWithoutNamespace(pl[j].getName());
581 pFS->aliasParameters(pname2, pname);
592 if (args.find(
"init") != args.end())
595 if (args.find(
"init.observedPseudoCount") != args.end())
598 std::shared_ptr<const AlignmentDataInterface> data(0);
600 data = mData.at(nData);
602 if (args.find(
"values") != args.end())
607 else if (parseArguments)
616 std::map<std::string, std::string>& globalAliases,
617 std::vector<std::string>& writtenNames)
const
624 if (find(writtenNames.begin(), writtenNames.end(), pn) != writtenNames.end())
634 string name = freqset.
getName();
637 bool oValues =
false;
639 if ((name ==
"Fixed") || (name ==
"F0") || (name ==
"Full"))
643 for (
size_t i = 0; i < vf.size(); ++i)
652 writtenNames.insert(writtenNames.end(), npl.begin(), npl.end());
662 out <<
"file=" << ufs.getPath();
663 size_t nCol = ufs.getColumnNumber();
666 out <<
", col=" << nCol;
679 for (
size_t i = 0; i < pWFI.getLength(); ++i)
683 out <<
"frequency" << i + 1 <<
"=";
684 writeFrequencySet(pWFI.frequencySetForLetter(i), out, globalAliases, writtenNames);
697 for (
size_t i = 0; i < pWFU.getLength(); ++i)
702 writeFrequencySet(pWFU.frequencySetForLetter(i), out, globalAliases, writtenNames);
721 bIO.
write(*(pFMFS.getModel()), out, globalAliases, writtenNames);
732 out <<
"protein_frequencies=";
734 if (pFPA.hasProteinFrequencySet())
744 unsigned short meth = pFPA.getMethod();
749 out <<
"method=" << ((meth == 2) ?
"local" :
"binary");
765 out <<
"method=" << ((meth == 2) ?
"local" :
"binary");
780 out <<
"method=" << ((meth == 2) ?
"local" :
"binary");
791 if (pCFU.getMgmtStopCodon() !=
"quadratic")
795 out <<
"mgmtStopCodons=" << pCFU.getMgmtStopCodon();
805 if (pCFI.getMgmtStopCodon() !=
"quadratic")
809 out <<
"mgmtStopCodons=" << pCFI.getMgmtStopCodon();
829 std::shared_ptr<const AlignmentDataInterface> data)
836 if (init ==
"observed")
839 throw Exception(
"Missing data number for init 'observed'.");
841 unsigned int psc = 0;
845 map<int, double> freqs;
850 else if (init.substr(0, 6) ==
"values")
853 vector<double> frequencies;
854 string rf = init.substr(6);
861 else if (init ==
"balanced")
866 throw Exception(
"Unknown init argument");
876 for (
size_t i = 0; i < pl.
size(); ++i)
884 for (
size_t i = 0; i < pl.
size(); ++i)
886 const string pName = pl[i].getName();
892 pl[i].setValue(value);
virtual void setMessageHandler(std::shared_ptr< OutputStream > mh)
The Coala branch-heterogeneous amino-acid substitution model.
static std::unique_ptr< CodonFrequencySetInterface > getFrequencySetForCodons(short option, std::shared_ptr< const GeneticCode > gCode, const std::string &mgmtStopCodon="quadratic", unsigned short method=1)
A helper function that provide frequencies set for codon models according to PAML option.
the Frequencies in codons are the product of Independent Frequencies in letters with the frequencies ...
the Frequencies in codons are the product of the frequencies for a unique FrequencySet in letters,...
FrequencySet for codons, with no parameter.
Parametrize a set of state frequencies.
virtual void setFrequenciesFromAlphabetStatesFrequencies(const std::map< int, double > &frequencies)=0
Set the Frequencies from the one of the map which keys match with a letter of the Alphabet....
virtual std::string getName() const =0
virtual const Vdouble & getFrequencies() const =0
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
FrequencySet defined from the equilibrium distribution of a given model.
A generic FrequencySet for Full Codon alphabets.
unsigned short getMethod() const
FrequencySet integrating ProteinFrequencySet inside CodonFrequencySet. In this case,...
Protein FrequencySet using 19 independent parameters to model the 20 frequencies.
virtual int getPrecision() const=0
virtual OutputStream & setPrecision(int digit)=0
virtual const ParameterList & getIndependentParameters() const=0
virtual std::vector< std::string > getParameterNames() const
virtual void deleteParameter(const std::string &name)
virtual void setParameter(size_t index, const Parameter ¶m)
virtual bool matchParametersValues(const ParameterList ¶meters)=0
virtual const ParameterList & getParameters() const=0
Parametrize a set of state frequencies for proteins.
const std::string & nextToken()
bool hasMoreToken() const
FrequencySet to be read in a file. More specifically, a frequency set is read in a column of a given ...
the Frequencies in words are the product of Independent Frequencies in letters
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isEmpty(const std::string &s)
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.