18 nbAlleles_(nbAlleles),
24 unsigned int size =
alph_->getSize();
26 auto sword =
alph_->getStateCodingSize();
31 string gapchar =
alph_->getState(
alph_->getGapCharacterCode()).getLetter();
33 auto gapword = gapchar + std::string(
"0", snb);
40 for (
int i = 0; i < static_cast<int>(size); ++i)
48 for (
int i = 0; i < static_cast<int>(size) - 1; ++i)
50 for (
int j = i + 1; j < static_cast<int>(size); ++j)
52 auto nbl = (i *
static_cast<int>(size) + j) *
static_cast<int>(
nbAlleles_ - 1) +
static_cast<int>(size);
53 for (
int nba = 1; nba < static_cast<int>(
nbAlleles_); ++nba)
55 auto sni = std::string(
"0", snb - std::to_string(
static_cast<int>(
nbAlleles_) - nba).size()) + std::to_string(
static_cast<int>(
nbAlleles_) - nba);
56 auto snj = std::string(
"0", snb - std::to_string(nba).size()) + std::to_string(nba);
57 auto desc =
alph_->intToChar(i) + sni +
alph_->intToChar(j) + snj;
67 auto desc = std::string(
"?", sword) + std::to_string(
nbAlleles_) + gapword;
75 return "Allelic(alphabet=" +
alph_->getAlphabetType() +
",nbAlleles_=" + std::to_string(
nbAlleles_) +
")";
84 throw BadIntException(state1,
"AllelicAlphabet::isResolvedIn(int, int): Specified base unknown.",
this);
87 throw BadIntException(state2,
"AllelicAlphabet::isResolvedIn(int, int): Specified base unknown.",
this);
90 throw BadIntException(state2,
"AllelicAlphabet::isResolvedIn(int, int): Unresolved base.",
this);
92 auto size =
alph_->getSize();
93 return (
static_cast<unsigned int>(state1) == size * size * (
nbAlleles_ - 1)) ? (state2 >= 0) : (state1 == state2);
101 throw BadIntException(state,
"AllelicAlphabet::getAlias(int): Specified base unknown.",
this);
106 return vector<int>(1, state);
115 throw BadCharException(locstate,
"AllelicAlphabet::getAlias(string): Specified base unknown.",
this);
120 return vector<string>(1, state);
130 if (!seq && !probseq)
131 throw Exception(
"AllelicAlphabet::convertFromStateAlphabet: unknown type for sequence: " + sequence.
getName());
133 auto alphabet = seq ? seq->getAlphabet() : probseq->getAlphabet();
138 auto alphaPtr = shared_from_this();
139 auto tSeq = make_unique<ProbabilisticSequence>(alphaPtr);
140 tSeq->setName(sequence.
getName());
143 auto tt = tSeq->getTable();
145 auto size = seq ? seq->size() : probseq->size();
147 auto alphasize = alphabet->getSize();
149 int gap = alphabet->getGapCharacterCode();
154 for (
unsigned int pos = 0; pos < size; ++pos)
159 int state = seq->getValue(pos);
162 for (
size_t a = 0; a < alphasize; ++a)
166 tSeq->addElement(tstate);
171 for (
size_t a = 0; a < alphasize; a++)
175 tstate[(size_t)state] = 1;
179 tstate = probseq->getValue(pos);
182 tSeq->addElement(likelihood);
192 if (counts.size() != alphasize)
193 throw BadSizeException(
"AllelicAlphabet::computeLikelihoods: bad vector size for counts.", alphasize, counts.size());
195 likelihoods.resize(
getSize(), 0);
198 std::vector<bool> presence(alphasize);
199 for (
size_t state = 0; state < alphasize; ++state)
205 for (
size_t state = 0; state < alphasize; ++state)
209 likelihoods[state] = 1;
210 for (
size_t ns = 0; ns < alphasize; ns++)
217 likelihoods[state] = 0;
223 likelihoods[state] = 0;
228 auto numetat = alphasize;
229 for (
unsigned int i = 0; i < alphasize - 1; ++i)
231 for (
unsigned int j = i + 1; j < alphasize; ++j)
234 for (
unsigned int ns = 0; ns < alphasize; ++ns)
236 if ((ns == i) || (ns == j))
240 for (
unsigned int nba = 1; nba <
nbAlleles_; ++nba)
242 likelihoods[numetat] = 0;
252 auto lnorm = std::lgamma(counts[
size_t(i)] + counts[
size_t(j)] + 1) - std::lgamma(counts[
size_t(i)] + 1) - std::lgamma(counts[
size_t(j)] + 1);
256 likelihoods[numetat] = std::exp(lprob + lnorm);
258 likelihoods[numetat] = 0;
const std::vector< int > & getSupportedInts() const
const AlphabetState & getState(const std::string &letter) const
Get a state by its letter.
const std::vector< std::string > & getSupportedChars() const
virtual void registerState(AlphabetState *st)
Add a state to the Alphabet.
bool isIntInAlphabet(int state) const
Tell if a state (specified by its int description) is allowed by the the alphabet.
bool isCharInAlphabet(const std::string &state) const
Tell if a state (specified by its string description) is allowed by the the alphabet.
unsigned int nbAlleles_
the number of alleles.
std::unique_ptr< ProbabilisticSequence > convertFromStateAlphabet(const CoreSequenceInterface &sequence) const
Convert a CoreSequence in StateAlphabet to a ProbabilisticSequence of the likelihoods of the di-allel...
std::shared_ptr< const Alphabet > getStateAlphabet() const
Returns Base Alphabet.
void computeLikelihoods(const Vdouble &counts, Vdouble &likelihoods) const
Fills the vector of the likelihoods of a vector of counts in the states alphabet, given the di-allele...
std::string getAlphabetType() const override
Identification method.
int nbUnknown_
the unknown state number
std::shared_ptr< const Alphabet > alph_
std::vector< int > getAlias(int state) const override
Get all resolved states that match a generic state.
AllelicAlphabet(std::shared_ptr< const Alphabet > alph, unsigned int nbAlleles)
Builds a new word alphabet from an Alphabet.
bool isUnresolved(int state) const override
bool isResolvedIn(int state1, int state2) const override
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
unsigned int getSize() const override
Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet)....
Exception thrown when two alphabets do not match.
This is the base class to describe states in an Alphabet.
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
The core sequence interface.
virtual const std::string & getName() const =0
Get the name of this sequence.
The probabilistic sequence interface.
std::string toUpper(const std::string &s)
This alphabet is used to deal NumericAlphabet.
std::vector< double > Vdouble