bpp-phyl3 3.0.0
DRASRTreeLikelihoodData.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../../PatternTools.h"
7
8// From SeqLib:
9#include <Bpp/Seq/SiteTools.h>
13
14using namespace bpp;
15using namespace std;
16
17/******************************************************************************/
18
19void DRASRTreeLikelihoodData::initLikelihoods(
20 const AlignmentDataInterface& sites,
21 const TransitionModelInterface& model)
22{
23 if (sites.getNumberOfSequences() == 1)
24 throw Exception("Error, only 1 sequence!");
25 if (sites.getNumberOfSequences() == 0)
26 throw Exception("Error, no sequence!");
27 if (sites.getAlphabet()->getAlphabetType()
28 != model.getAlphabet()->getAlphabetType())
29 throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
30 sites.getAlphabet(),
31 model.getAlphabet());
32 alphabet_ = sites.getAlphabet();
34 nbSites_ = sites.getNumberOfSites();
35 shared_ptr<SitePatterns> patterns;
36
37 if (usePatterns_)
38 {
39 patterns = initLikelihoodsWithPatterns(tree_->getRootNode(), sites, model);
40 shrunkData_ = patterns->getSites();
41 rootWeights_ = patterns->getWeights();
42 rootPatternLinks_.resize((size_t)patterns->getIndices().size());
43 SitePatterns::IndicesType::Map(&rootPatternLinks_[0], patterns->getIndices().size()) = patterns->getIndices();
44 nbDistinctSites_ = shrunkData_->getNumberOfSites();
45 }
46 else
47 {
48 patterns = std::make_unique<SitePatterns>(sites);
49 shrunkData_ = patterns->getSites();
50 rootWeights_ = patterns->getWeights();
51 rootPatternLinks_.resize(size_t(patterns->getIndices().size()));
52 SitePatterns::IndicesType::Map(&rootPatternLinks_[0], patterns->getIndices().size()) = patterns->getIndices();
53 nbDistinctSites_ = shrunkData_->getNumberOfSites();
54 initLikelihoods(tree_->getRootNode(), *shrunkData_, model);
55 }
56}
57
58/******************************************************************************/
59
61 const Node* node,
62 const AlignmentDataInterface& sequences,
63 const TransitionModelInterface& model)
64{
65 // Initialize likelihood vector:
66 DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
67 nodeData->setNode(node);
68 VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
69 VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
70 VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
71
72 _likelihoods_node->resize(nbDistinctSites_);
73 _dLikelihoods_node->resize(nbDistinctSites_);
74 _d2Likelihoods_node->resize(nbDistinctSites_);
75
76 for (size_t i = 0; i < nbDistinctSites_; i++)
77 {
78 VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
79 VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
80 VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
81 _likelihoods_node_i->resize(nbClasses_);
82 _dLikelihoods_node_i->resize(nbClasses_);
83 _d2Likelihoods_node_i->resize(nbClasses_);
84 for (size_t c = 0; c < nbClasses_; c++)
85 {
86 Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
87 Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
88 Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
89 _likelihoods_node_i_c->resize(nbStates_);
90 _dLikelihoods_node_i_c->resize(nbStates_);
91 _d2Likelihoods_node_i_c->resize(nbStates_);
92 for (size_t s = 0; s < nbStates_; s++)
93 {
94 (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
95 (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
96 (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
97 }
98 }
99 }
100
101 // Now initialize likelihood values and pointers:
102 if (node->isLeaf())
103 {
104 size_t posSeq;
105 try
106 {
107 posSeq = sequences.getSequencePosition(node->getName());
108 }
109 catch (SequenceNotFoundException& snfe)
110 {
111 throw SequenceNotFoundException("DRASRTreeLikelihoodData::initTreelikelihoods. Leaf name in tree not found in site container: ", (node->getName()));
112 }
113 for (size_t i = 0; i < nbDistinctSites_; i++)
114 {
115 VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
116 for (size_t c = 0; c < nbClasses_; c++)
117 {
118 Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
119 double test = 0.;
120
121 for (size_t s = 0; s < nbStates_; s++)
122 {
123 // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
124 // otherwise value set to 0:
125
126 (*_likelihoods_node_i_c)[s] = sequences.getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
127 test += (*_likelihoods_node_i_c)[s];
128 }
129 if (test < 0.000001)
130 std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
131 }
132 }
133 }
134 else
135 {
136 // 'node' is an internal node.
137 std::map<int, std::vector<size_t>>* patternLinks__node = &patternLinks_[node->getId()];
138 size_t nbSonNodes = node->getNumberOfSons();
139 for (size_t l = 0; l < nbSonNodes; l++)
140 {
141 // For each son node,
142 const Node* son = (*node)[static_cast<int>(l)];
143 initLikelihoods(son, sequences, model);
144 std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
145
146 // Init map:
147 patternLinks__node_son->resize(nbDistinctSites_);
148
149 for (size_t i = 0; i < nbDistinctSites_; i++)
150 {
151 (*patternLinks__node_son)[i] = i;
152 }
153 }
154 }
155}
156
157/******************************************************************************/
158
160 const Node* node,
161 const AlignmentDataInterface& sequences,
162 const TransitionModelInterface& model)
163{
164 vector<const Node*> leaves = TreeTemplateTools::getLeaves(*node);
165
166 auto tmp = PatternTools::getSequenceSubset(sequences, *node);
167
168 auto patterns = std::make_unique<SitePatterns>(*tmp);
169
170 auto subSequences = patterns->getSites();
171 size_t nbSites = subSequences->getNumberOfSites();
172
173 // Initialize likelihood vector:
174 DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
175 nodeData->setNode(node);
176 VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
177 VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
178 VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
179 _likelihoods_node->resize(nbSites);
180 _dLikelihoods_node->resize(nbSites);
181 _d2Likelihoods_node->resize(nbSites);
182
183 for (size_t i = 0; i < nbSites; i++)
184 {
185 VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
186 VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
187 VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
188 _likelihoods_node_i->resize(nbClasses_);
189 _dLikelihoods_node_i->resize(nbClasses_);
190 _d2Likelihoods_node_i->resize(nbClasses_);
191 for (size_t c = 0; c < nbClasses_; c++)
192 {
193 Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
194 Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
195 Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
196 _likelihoods_node_i_c->resize(nbStates_);
197 _dLikelihoods_node_i_c->resize(nbStates_);
198 _d2Likelihoods_node_i_c->resize(nbStates_);
199 for (size_t s = 0; s < nbStates_; s++)
200 {
201 (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
202 (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
203 (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
204 }
205 }
206 }
207
208 // Now initialize likelihood values and pointers:
209
210 if (node->isLeaf())
211 {
212 size_t posSeq;
213 try
214 {
215 posSeq = subSequences->getSequencePosition(node->getName());
216 }
217 catch (SequenceNotFoundException& snfe)
218 {
219 throw SequenceNotFoundException("HomogeneousTreeLikelihood::initTreelikelihoodsWithPatterns. Leaf name in tree not found in site container: ", (node->getName()));
220 }
221
222 for (size_t i = 0; i < nbSites; i++)
223 {
224 VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
225
226 for (size_t c = 0; c < nbClasses_; c++)
227 {
228 Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
229 double test = 0.;
230 for (size_t s = 0; s < nbStates_; s++)
231 {
232 // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
233 // otherwise value set to 0:
234 // cout << "i=" << i << "\tc=" << c << "\ts=" << s << endl;
235 (*_likelihoods_node_i_c)[s] = subSequences->getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
236 test += (*_likelihoods_node_i_c)[s];
237 }
238 if (test < 0.000001)
239 std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
240 }
241 }
242 }
243 else
244 {
245 // 'node' is an internal node.
246 std::map<int, std::vector<size_t>>* patternLinks__node = &patternLinks_[node->getId()];
247
248 // Now initialize pattern links:
249 size_t nbSonNodes = node->getNumberOfSons();
250 for (int l = 0; l < static_cast<int>(nbSonNodes); l++)
251 {
252 // For each son node,
253 const Node* son = (*node)[l];
254
255 std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
256
257 // Initialize subtree 'l' and retrieves corresponding
258 // subSequences:
259
260 shared_ptr<SitePatterns> subPatterns(initLikelihoodsWithPatterns(son, *subSequences, model));
261
262 patternLinks__node_son->resize(size_t(subPatterns->getIndices().size()));
263 SitePatterns::IndicesType::Map(&((*patternLinks__node_son)[0]), subPatterns->getIndices().size()) = subPatterns->getIndices();
264 }
265 }
266 return patterns;
267}
268
269/******************************************************************************/
std::vector< size_t > rootPatternLinks_
Links between sites and patterns.
std::shared_ptr< const Alphabet > alphabet_
std::vector< unsigned int > rootWeights_
The frequency of each site.
std::shared_ptr< const TreeTemplate< Node > > tree_
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
std::shared_ptr< AlignmentDataInterface > shrunkData_
virtual std::unique_ptr< SitePatterns > initLikelihoodsWithPatterns(const Node *node, const AlignmentDataInterface &sequences, const TransitionModelInterface &model)
This method initializes the leaves according to a sequence file.
void initLikelihoods(const AlignmentDataInterface &sites, const TransitionModelInterface &model)
std::map< int, std::map< int, std::vector< size_t > > > patternLinks_
This map defines the pattern network.
std::map< int, DRASRTreeLikelihoodNodeData > nodeData_
This contains all likelihood values used for computation.
Likelihood data structure for a node.
void setNode(const Node *node)
Set the node associated to this data.
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual bool isLeaf() const
Definition: Node.h:679
virtual size_t getNumberOfSons() const
Definition: Node.h:355
static std::unique_ptr< AlignmentDataInterface > getSequenceSubset(const AlignmentDataInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:44
virtual size_t getNumberOfSites() const=0
virtual size_t getNumberOfSequences() const =0
virtual double getStateValueAt(size_t sitePosition, const std::string &sequenceKey, int state) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
Interface for all transition models.
static std::vector< N * > getLeaves(N &node)
Retrieve all leaves from a subtree.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble