bpp-phyl3  3.0.0
DRASDRTreeLikelihoodData.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../../PatternTools.h"
7 
8 // From SeqLib:
9 #include <Bpp/Seq/SiteTools.h>
10 
11 using namespace bpp;
12 using namespace std;
13 
14 /******************************************************************************/
15 
17  const AlignmentDataInterface& sites,
18  const TransitionModelInterface& model)
19 {
20  if (sites.getNumberOfSequences() == 1)
21  throw Exception("Error, only 1 sequence!");
22  if (sites.getNumberOfSequences() == 0)
23  throw Exception("Error, no sequence!");
24  if (sites.getAlphabet()->getAlphabetType()
25  != model.getAlphabet()->getAlphabetType())
26  throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
27  sites.getAlphabet(),
28  model.getAlphabet());
29  alphabet_ = sites.getAlphabet();
30  nbStates_ = model.getNumberOfStates();
31  nbSites_ = sites.getNumberOfSites();
32 
33  SitePatterns pattern(sites);
34  shrunkData_ = pattern.getSites();
35  rootWeights_ = pattern.getWeights();
36 
37  rootPatternLinks_.resize(static_cast<size_t>(pattern.getIndices().size()));
38  SitePatterns::IndicesType::Map(&rootPatternLinks_[0], pattern.getIndices().size()) = pattern.getIndices();
39  nbDistinctSites_ = shrunkData_->getNumberOfSites();
40 
41  // Init data:
42  initLikelihoods(tree_->getRootNode(), *shrunkData_, model);
43 
44  // Now initialize root likelihoods and derivatives:
45  rootLikelihoods_.resize(nbDistinctSites_);
46  rootLikelihoodsS_.resize(nbDistinctSites_);
47  rootLikelihoodsSR_.resize(nbDistinctSites_);
48  for (size_t i = 0; i < nbDistinctSites_; ++i)
49  {
50  VVdouble* rootLikelihoods_i_ = &rootLikelihoods_[i];
51  Vdouble* rootLikelihoodsS_i_ = &rootLikelihoodsS_[i];
52  rootLikelihoods_i_->resize(nbClasses_);
53  rootLikelihoodsS_i_->resize(nbClasses_);
54  for (size_t c = 0; c < nbClasses_; c++)
55  {
56  Vdouble* rootLikelihoods_i_c_ = &(*rootLikelihoods_i_)[c];
57  rootLikelihoods_i_c_->resize(nbStates_);
58  for (size_t x = 0; x < nbStates_; x++)
59  {
60  (*rootLikelihoods_i_c_)[x] = 1.;
61  }
62  }
63  }
64 }
65 
66 /******************************************************************************/
67 
69  const Node* node,
70  const AlignmentDataInterface& sites,
71  const TransitionModelInterface& model)
72 {
73  if (node->isLeaf())
74  {
75  // Init leaves likelihoods:
76  size_t posSeq;
77  try
78  {
79  posSeq = sites.getSequencePosition(node->getName());
80  }
81  catch (SequenceNotFoundException& snfe)
82  {
83  throw SequenceNotFoundException("DRASDRTreeLikelihoodData::initlikelihoods. Leaf name in tree not found in site container: ", (node->getName()));
84  }
85  DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
86  VVdouble* leavesLikelihoods_leaf = &leafData->getLikelihoodArray();
87  leafData->setNode(node);
88  leavesLikelihoods_leaf->resize(nbDistinctSites_);
89  for (size_t i = 0; i < nbDistinctSites_; i++)
90  {
91  Vdouble* leavesLikelihoods_leaf_i = &(*leavesLikelihoods_leaf)[i];
92  leavesLikelihoods_leaf_i->resize(nbStates_);
93  double test = 0.;
94 
95  for (size_t s = 0; s < nbStates_; s++)
96  {
97  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
98  // otherwise value set to 0:
99  ( *leavesLikelihoods_leaf_i)[s] = sites.getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
100  test += ( *leavesLikelihoods_leaf_i)[s];
101  }
102  if (test < 0.000001)
103  std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
104  }
105  }
106 
107  // We initialize each son node first:
108  size_t nbSonNodes = node->getNumberOfSons();
109  for (size_t l = 0; l < nbSonNodes; l++)
110  {
111  // For each son node,
112  initLikelihoods(node->getSon(l), sites, model);
113  }
114 
115  // Initialize likelihood vector:
116  DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
117  std::map<int, VVVdouble>* likelihoods_node_ = &nodeData->getLikelihoodArrays();
118  nodeData->setNode(node);
119 
120  int nbSons = static_cast<int>(node->getNumberOfSons());
121 
122  for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
123  {
124  const Node* neighbor = (*node)[n];
125  VVVdouble* likelihoods_node_neighbor_ = &(*likelihoods_node_)[neighbor->getId()];
126 
127  likelihoods_node_neighbor_->resize(nbDistinctSites_);
128 
129  if (neighbor->isLeaf())
130  {
131  VVdouble* leavesLikelihoods_leaf_ = &leafData_[neighbor->getId()].getLikelihoodArray();
132  for (size_t i = 0; i < nbDistinctSites_; i++)
133  {
134  Vdouble* leavesLikelihoods_leaf_i_ = &(*leavesLikelihoods_leaf_)[i];
135  VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
136  likelihoods_node_neighbor_i_->resize(nbClasses_);
137  for (size_t c = 0; c < nbClasses_; c++)
138  {
139  Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
140  likelihoods_node_neighbor_i_c_->resize(nbStates_);
141  for (size_t s = 0; s < nbStates_; s++)
142  {
143  (*likelihoods_node_neighbor_i_c_)[s] = (*leavesLikelihoods_leaf_i_)[s];
144  }
145  }
146  }
147  }
148  else
149  {
150  for (size_t i = 0; i < nbDistinctSites_; i++)
151  {
152  VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
153  likelihoods_node_neighbor_i_->resize(nbClasses_);
154  for (size_t c = 0; c < nbClasses_; c++)
155  {
156  Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
157  likelihoods_node_neighbor_i_c_->resize(nbStates_);
158  for (size_t s = 0; s < nbStates_; s++)
159  {
160  (*likelihoods_node_neighbor_i_c_)[s] = 1.; // All likelihoods are initialized to 1.
161  }
162  }
163  }
164  }
165  }
166 
167  // Initialize d and d2 likelihoods:
168  Vdouble* dLikelihoods_node_ = &nodeData->getDLikelihoodArray();
169  Vdouble* d2Likelihoods_node_ = &nodeData->getD2LikelihoodArray();
170  dLikelihoods_node_->resize(nbDistinctSites_);
171  d2Likelihoods_node_->resize(nbDistinctSites_);
172 }
173 
174 /******************************************************************************/
175 
177 {
178  reInit(tree_->getRootNode());
179 }
180 
182 {
183  if (node->isLeaf())
184  {
185  DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
186  leafData->setNode(node);
187  }
188 
189  DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
190  nodeData->setNode(node);
191  nodeData->eraseNeighborArrays();
192 
193  int nbSons = static_cast<int>(node->getNumberOfSons());
194 
195  for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
196  {
197  const Node* neighbor = (*node)[n];
198  VVVdouble* array = &nodeData->getLikelihoodArrayForNeighbor(neighbor->getId());
199 
200  array->resize(nbDistinctSites_);
201  for (size_t i = 0; i < nbDistinctSites_; i++)
202  {
203  VVdouble* array_i = &(*array)[i];
204  array_i->resize(nbClasses_);
205  for (size_t c = 0; c < nbClasses_; c++)
206  {
207  Vdouble* array_i_c = &(*array_i)[c];
208  array_i_c->resize(nbStates_);
209  for (size_t s = 0; s < nbStates_; s++)
210  {
211  (*array_i_c)[s] = 1.; // All likelihoods are initialized to 1.
212  }
213  }
214  }
215  }
216 
217  // We re-initialize each son node:
218  size_t nbSonNodes = node->getNumberOfSons();
219  for (size_t l = 0; l < nbSonNodes; l++)
220  {
221  // For each son node,
222  reInit(node->getSon(l));
223  }
224 
225  nodeData->getDLikelihoodArray().resize(nbDistinctSites_);
226  nodeData->getD2LikelihoodArray().resize(nbDistinctSites_);
227 }
228 
229 /******************************************************************************/
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
void reInit()
Rebuild likelihood arrays at inner nodes.
void initLikelihoods(const AlignmentDataInterface &sites, const TransitionModelInterface &model)
Resize and initialize all likelihood arrays according to the given data set and substitution model.
Likelihood data structure for a leaf.
void setNode(const Node *node)
Set the node associated to this data.
Likelihood data structure for a node.
void setNode(const Node *node)
Set the node associated to this data.
const std::map< int, VVVdouble > & getLikelihoodArrays() const
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool isLeaf() const
Definition: Node.h:667
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual size_t getNumberOfSons() const
Definition: Node.h:355
Data structure for site patterns.
Definition: SitePatterns.h:35
const std::vector< unsigned int > & getWeights() const
Definition: SitePatterns.h:99
const IndicesType & getIndices() const
Definition: SitePatterns.h:103
std::unique_ptr< AlignmentDataInterface > getSites() const
virtual size_t getNumberOfSites() const=0
virtual size_t getNumberOfSequences() const =0
virtual double getStateValueAt(size_t sitePosition, const std::string &sequenceKey, int state) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
Interface for all transition models.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble