bpp-phyl3 3.0.0
DRASDRTreeLikelihoodData.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../../PatternTools.h"
7
8// From SeqLib:
9#include <Bpp/Seq/SiteTools.h>
10
11using namespace bpp;
12using namespace std;
13
14/******************************************************************************/
15
16void DRASDRTreeLikelihoodData::initLikelihoods(
17 const AlignmentDataInterface& sites,
18 const TransitionModelInterface& model)
19{
20 if (sites.getNumberOfSequences() == 1)
21 throw Exception("Error, only 1 sequence!");
22 if (sites.getNumberOfSequences() == 0)
23 throw Exception("Error, no sequence!");
24 if (sites.getAlphabet()->getAlphabetType()
25 != model.getAlphabet()->getAlphabetType())
26 throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
27 sites.getAlphabet(),
28 model.getAlphabet());
29 alphabet_ = sites.getAlphabet();
31 nbSites_ = sites.getNumberOfSites();
32
33 SitePatterns pattern(sites);
34 shrunkData_ = pattern.getSites();
35 rootWeights_ = pattern.getWeights();
36
37 rootPatternLinks_.resize(static_cast<size_t>(pattern.getIndices().size()));
38 SitePatterns::IndicesType::Map(&rootPatternLinks_[0], pattern.getIndices().size()) = pattern.getIndices();
39 nbDistinctSites_ = shrunkData_->getNumberOfSites();
40
41 // Init data:
42 initLikelihoods(tree_->getRootNode(), *shrunkData_, model);
43
44 // Now initialize root likelihoods and derivatives:
48 for (size_t i = 0; i < nbDistinctSites_; ++i)
49 {
50 VVdouble* rootLikelihoods_i_ = &rootLikelihoods_[i];
51 Vdouble* rootLikelihoodsS_i_ = &rootLikelihoodsS_[i];
52 rootLikelihoods_i_->resize(nbClasses_);
53 rootLikelihoodsS_i_->resize(nbClasses_);
54 for (size_t c = 0; c < nbClasses_; c++)
55 {
56 Vdouble* rootLikelihoods_i_c_ = &(*rootLikelihoods_i_)[c];
57 rootLikelihoods_i_c_->resize(nbStates_);
58 for (size_t x = 0; x < nbStates_; x++)
59 {
60 (*rootLikelihoods_i_c_)[x] = 1.;
61 }
62 }
63 }
64}
65
66/******************************************************************************/
67
69 const Node* node,
70 const AlignmentDataInterface& sites,
71 const TransitionModelInterface& model)
72{
73 if (node->isLeaf())
74 {
75 // Init leaves likelihoods:
76 size_t posSeq;
77 try
78 {
79 posSeq = sites.getSequencePosition(node->getName());
80 }
81 catch (SequenceNotFoundException& snfe)
82 {
83 throw SequenceNotFoundException("DRASDRTreeLikelihoodData::initlikelihoods. Leaf name in tree not found in site container: ", (node->getName()));
84 }
85 DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
86 VVdouble* leavesLikelihoods_leaf = &leafData->getLikelihoodArray();
87 leafData->setNode(node);
88 leavesLikelihoods_leaf->resize(nbDistinctSites_);
89 for (size_t i = 0; i < nbDistinctSites_; i++)
90 {
91 Vdouble* leavesLikelihoods_leaf_i = &(*leavesLikelihoods_leaf)[i];
92 leavesLikelihoods_leaf_i->resize(nbStates_);
93 double test = 0.;
94
95 for (size_t s = 0; s < nbStates_; s++)
96 {
97 // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
98 // otherwise value set to 0:
99 ( *leavesLikelihoods_leaf_i)[s] = sites.getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
100 test += ( *leavesLikelihoods_leaf_i)[s];
101 }
102 if (test < 0.000001)
103 std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
104 }
105 }
106
107 // We initialize each son node first:
108 size_t nbSonNodes = node->getNumberOfSons();
109 for (size_t l = 0; l < nbSonNodes; l++)
110 {
111 // For each son node,
112 initLikelihoods(node->getSon(l), sites, model);
113 }
114
115 // Initialize likelihood vector:
116 DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
117 std::map<int, VVVdouble>* likelihoods_node_ = &nodeData->getLikelihoodArrays();
118 nodeData->setNode(node);
119
120 int nbSons = static_cast<int>(node->getNumberOfSons());
121
122 for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
123 {
124 const Node* neighbor = (*node)[n];
125 VVVdouble* likelihoods_node_neighbor_ = &(*likelihoods_node_)[neighbor->getId()];
126
127 likelihoods_node_neighbor_->resize(nbDistinctSites_);
128
129 if (neighbor->isLeaf())
130 {
131 VVdouble* leavesLikelihoods_leaf_ = &leafData_[neighbor->getId()].getLikelihoodArray();
132 for (size_t i = 0; i < nbDistinctSites_; i++)
133 {
134 Vdouble* leavesLikelihoods_leaf_i_ = &(*leavesLikelihoods_leaf_)[i];
135 VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
136 likelihoods_node_neighbor_i_->resize(nbClasses_);
137 for (size_t c = 0; c < nbClasses_; c++)
138 {
139 Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
140 likelihoods_node_neighbor_i_c_->resize(nbStates_);
141 for (size_t s = 0; s < nbStates_; s++)
142 {
143 (*likelihoods_node_neighbor_i_c_)[s] = (*leavesLikelihoods_leaf_i_)[s];
144 }
145 }
146 }
147 }
148 else
149 {
150 for (size_t i = 0; i < nbDistinctSites_; i++)
151 {
152 VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
153 likelihoods_node_neighbor_i_->resize(nbClasses_);
154 for (size_t c = 0; c < nbClasses_; c++)
155 {
156 Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
157 likelihoods_node_neighbor_i_c_->resize(nbStates_);
158 for (size_t s = 0; s < nbStates_; s++)
159 {
160 (*likelihoods_node_neighbor_i_c_)[s] = 1.; // All likelihoods are initialized to 1.
161 }
162 }
163 }
164 }
165 }
166
167 // Initialize d and d2 likelihoods:
168 Vdouble* dLikelihoods_node_ = &nodeData->getDLikelihoodArray();
169 Vdouble* d2Likelihoods_node_ = &nodeData->getD2LikelihoodArray();
170 dLikelihoods_node_->resize(nbDistinctSites_);
171 d2Likelihoods_node_->resize(nbDistinctSites_);
172}
173
174/******************************************************************************/
175
177{
178 reInit(tree_->getRootNode());
179}
180
182{
183 if (node->isLeaf())
184 {
185 DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
186 leafData->setNode(node);
187 }
188
189 DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
190 nodeData->setNode(node);
191 nodeData->eraseNeighborArrays();
192
193 int nbSons = static_cast<int>(node->getNumberOfSons());
194
195 for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
196 {
197 const Node* neighbor = (*node)[n];
198 VVVdouble* array = &nodeData->getLikelihoodArrayForNeighbor(neighbor->getId());
199
200 array->resize(nbDistinctSites_);
201 for (size_t i = 0; i < nbDistinctSites_; i++)
202 {
203 VVdouble* array_i = &(*array)[i];
204 array_i->resize(nbClasses_);
205 for (size_t c = 0; c < nbClasses_; c++)
206 {
207 Vdouble* array_i_c = &(*array_i)[c];
208 array_i_c->resize(nbStates_);
209 for (size_t s = 0; s < nbStates_; s++)
210 {
211 (*array_i_c)[s] = 1.; // All likelihoods are initialized to 1.
212 }
213 }
214 }
215 }
216
217 // We re-initialize each son node:
218 size_t nbSonNodes = node->getNumberOfSons();
219 for (size_t l = 0; l < nbSonNodes; l++)
220 {
221 // For each son node,
222 reInit(node->getSon(l));
223 }
224
225 nodeData->getDLikelihoodArray().resize(nbDistinctSites_);
226 nodeData->getD2LikelihoodArray().resize(nbDistinctSites_);
227}
228
229/******************************************************************************/
std::vector< size_t > rootPatternLinks_
Links between sites and patterns.
std::shared_ptr< const Alphabet > alphabet_
std::vector< unsigned int > rootWeights_
The frequency of each site.
std::shared_ptr< const TreeTemplate< Node > > tree_
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
void reInit()
Rebuild likelihood arrays at inner nodes.
void initLikelihoods(const AlignmentDataInterface &sites, const TransitionModelInterface &model)
Resize and initialize all likelihood arrays according to the given data set and substitution model.
std::map< int, DRASDRTreeLikelihoodNodeData > nodeData_
std::map< int, DRASDRTreeLikelihoodLeafData > leafData_
std::shared_ptr< AlignmentDataInterface > shrunkData_
Likelihood data structure for a leaf.
void setNode(const Node *node)
Set the node associated to this data.
Likelihood data structure for a node.
void setNode(const Node *node)
Set the node associated to this data.
const std::map< int, VVVdouble > & getLikelihoodArrays() const
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool isLeaf() const
Definition: Node.h:679
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual size_t getNumberOfSons() const
Definition: Node.h:355
Data structure for site patterns.
Definition: SitePatterns.h:35
const IndicesType & getIndices() const
Definition: SitePatterns.h:103
const std::vector< unsigned int > & getWeights() const
Definition: SitePatterns.h:99
std::unique_ptr< AlignmentDataInterface > getSites() const
virtual size_t getNumberOfSites() const=0
virtual size_t getNumberOfSequences() const =0
virtual double getStateValueAt(size_t sitePosition, const std::string &sequenceKey, int state) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
Interface for all transition models.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble