bpp-phyl3  3.0.0
DRASRTreeLikelihoodData.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../../PatternTools.h"
7 
8 // From SeqLib:
9 #include <Bpp/Seq/SiteTools.h>
13 
14 using namespace bpp;
15 using namespace std;
16 
17 /******************************************************************************/
18 
20  const AlignmentDataInterface& sites,
21  const TransitionModelInterface& model)
22 {
23  if (sites.getNumberOfSequences() == 1)
24  throw Exception("Error, only 1 sequence!");
25  if (sites.getNumberOfSequences() == 0)
26  throw Exception("Error, no sequence!");
27  if (sites.getAlphabet()->getAlphabetType()
28  != model.getAlphabet()->getAlphabetType())
29  throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
30  sites.getAlphabet(),
31  model.getAlphabet());
32  alphabet_ = sites.getAlphabet();
33  nbStates_ = model.getNumberOfStates();
34  nbSites_ = sites.getNumberOfSites();
35  shared_ptr<SitePatterns> patterns;
36 
37  if (usePatterns_)
38  {
39  patterns = initLikelihoodsWithPatterns(tree_->getRootNode(), sites, model);
40  shrunkData_ = patterns->getSites();
41  rootWeights_ = patterns->getWeights();
42  rootPatternLinks_.resize((size_t)patterns->getIndices().size());
43  SitePatterns::IndicesType::Map(&rootPatternLinks_[0], patterns->getIndices().size()) = patterns->getIndices();
44  nbDistinctSites_ = shrunkData_->getNumberOfSites();
45  }
46  else
47  {
48  patterns = std::make_unique<SitePatterns>(sites);
49  shrunkData_ = patterns->getSites();
50  rootWeights_ = patterns->getWeights();
51  rootPatternLinks_.resize(size_t(patterns->getIndices().size()));
52  SitePatterns::IndicesType::Map(&rootPatternLinks_[0], patterns->getIndices().size()) = patterns->getIndices();
53  nbDistinctSites_ = shrunkData_->getNumberOfSites();
54  initLikelihoods(tree_->getRootNode(), *shrunkData_, model);
55  }
56 }
57 
58 /******************************************************************************/
59 
61  const Node* node,
62  const AlignmentDataInterface& sequences,
63  const TransitionModelInterface& model)
64 {
65  // Initialize likelihood vector:
66  DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
67  nodeData->setNode(node);
68  VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
69  VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
70  VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
71 
72  _likelihoods_node->resize(nbDistinctSites_);
73  _dLikelihoods_node->resize(nbDistinctSites_);
74  _d2Likelihoods_node->resize(nbDistinctSites_);
75 
76  for (size_t i = 0; i < nbDistinctSites_; i++)
77  {
78  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
79  VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
80  VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
81  _likelihoods_node_i->resize(nbClasses_);
82  _dLikelihoods_node_i->resize(nbClasses_);
83  _d2Likelihoods_node_i->resize(nbClasses_);
84  for (size_t c = 0; c < nbClasses_; c++)
85  {
86  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
87  Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
88  Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
89  _likelihoods_node_i_c->resize(nbStates_);
90  _dLikelihoods_node_i_c->resize(nbStates_);
91  _d2Likelihoods_node_i_c->resize(nbStates_);
92  for (size_t s = 0; s < nbStates_; s++)
93  {
94  (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
95  (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
96  (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
97  }
98  }
99  }
100 
101  // Now initialize likelihood values and pointers:
102  if (node->isLeaf())
103  {
104  size_t posSeq;
105  try
106  {
107  posSeq = sequences.getSequencePosition(node->getName());
108  }
109  catch (SequenceNotFoundException& snfe)
110  {
111  throw SequenceNotFoundException("DRASRTreeLikelihoodData::initTreelikelihoods. Leaf name in tree not found in site container: ", (node->getName()));
112  }
113  for (size_t i = 0; i < nbDistinctSites_; i++)
114  {
115  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
116  for (size_t c = 0; c < nbClasses_; c++)
117  {
118  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
119  double test = 0.;
120 
121  for (size_t s = 0; s < nbStates_; s++)
122  {
123  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
124  // otherwise value set to 0:
125 
126  (*_likelihoods_node_i_c)[s] = sequences.getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
127  test += (*_likelihoods_node_i_c)[s];
128  }
129  if (test < 0.000001)
130  std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
131  }
132  }
133  }
134  else
135  {
136  // 'node' is an internal node.
137  std::map<int, std::vector<size_t>>* patternLinks__node = &patternLinks_[node->getId()];
138  size_t nbSonNodes = node->getNumberOfSons();
139  for (size_t l = 0; l < nbSonNodes; l++)
140  {
141  // For each son node,
142  const Node* son = (*node)[static_cast<int>(l)];
143  initLikelihoods(son, sequences, model);
144  std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
145 
146  // Init map:
147  patternLinks__node_son->resize(nbDistinctSites_);
148 
149  for (size_t i = 0; i < nbDistinctSites_; i++)
150  {
151  (*patternLinks__node_son)[i] = i;
152  }
153  }
154  }
155 }
156 
157 /******************************************************************************/
158 
160  const Node* node,
161  const AlignmentDataInterface& sequences,
162  const TransitionModelInterface& model)
163 {
164  vector<const Node*> leaves = TreeTemplateTools::getLeaves(*node);
165 
166  auto tmp = PatternTools::getSequenceSubset(sequences, *node);
167 
168  auto patterns = std::make_unique<SitePatterns>(*tmp);
169 
170  auto subSequences = patterns->getSites();
171  size_t nbSites = subSequences->getNumberOfSites();
172 
173  // Initialize likelihood vector:
174  DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
175  nodeData->setNode(node);
176  VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
177  VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
178  VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
179  _likelihoods_node->resize(nbSites);
180  _dLikelihoods_node->resize(nbSites);
181  _d2Likelihoods_node->resize(nbSites);
182 
183  for (size_t i = 0; i < nbSites; i++)
184  {
185  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
186  VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
187  VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
188  _likelihoods_node_i->resize(nbClasses_);
189  _dLikelihoods_node_i->resize(nbClasses_);
190  _d2Likelihoods_node_i->resize(nbClasses_);
191  for (size_t c = 0; c < nbClasses_; c++)
192  {
193  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
194  Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
195  Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
196  _likelihoods_node_i_c->resize(nbStates_);
197  _dLikelihoods_node_i_c->resize(nbStates_);
198  _d2Likelihoods_node_i_c->resize(nbStates_);
199  for (size_t s = 0; s < nbStates_; s++)
200  {
201  (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
202  (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
203  (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
204  }
205  }
206  }
207 
208  // Now initialize likelihood values and pointers:
209 
210  if (node->isLeaf())
211  {
212  size_t posSeq;
213  try
214  {
215  posSeq = subSequences->getSequencePosition(node->getName());
216  }
217  catch (SequenceNotFoundException& snfe)
218  {
219  throw SequenceNotFoundException("HomogeneousTreeLikelihood::initTreelikelihoodsWithPatterns. Leaf name in tree not found in site container: ", (node->getName()));
220  }
221 
222  for (size_t i = 0; i < nbSites; i++)
223  {
224  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
225 
226  for (size_t c = 0; c < nbClasses_; c++)
227  {
228  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
229  double test = 0.;
230  for (size_t s = 0; s < nbStates_; s++)
231  {
232  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
233  // otherwise value set to 0:
234  // cout << "i=" << i << "\tc=" << c << "\ts=" << s << endl;
235  (*_likelihoods_node_i_c)[s] = subSequences->getStateValueAt(i, posSeq, model.getAlphabetStateAsInt(s));
236  test += (*_likelihoods_node_i_c)[s];
237  }
238  if (test < 0.000001)
239  std::cerr << "WARNING!!! Likelihood will be 0 for site " << i << std::endl;
240  }
241  }
242  }
243  else
244  {
245  // 'node' is an internal node.
246  std::map<int, std::vector<size_t>>* patternLinks__node = &patternLinks_[node->getId()];
247 
248  // Now initialize pattern links:
249  size_t nbSonNodes = node->getNumberOfSons();
250  for (int l = 0; l < static_cast<int>(nbSonNodes); l++)
251  {
252  // For each son node,
253  const Node* son = (*node)[l];
254 
255  std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
256 
257  // Initialize subtree 'l' and retrieves corresponding
258  // subSequences:
259 
260  shared_ptr<SitePatterns> subPatterns(initLikelihoodsWithPatterns(son, *subSequences, model));
261 
262  patternLinks__node_son->resize(size_t(subPatterns->getIndices().size()));
263  SitePatterns::IndicesType::Map(&((*patternLinks__node_son)[0]), subPatterns->getIndices().size()) = subPatterns->getIndices();
264  }
265  }
266  return patterns;
267 }
268 
269 /******************************************************************************/
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual size_t getNumberOfStates() const =0
Get the number of states.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual std::unique_ptr< SitePatterns > initLikelihoodsWithPatterns(const Node *node, const AlignmentDataInterface &sequences, const TransitionModelInterface &model)
This method initializes the leaves according to a sequence file.
void initLikelihoods(const AlignmentDataInterface &sites, const TransitionModelInterface &model)
Likelihood data structure for a node.
void setNode(const Node *node)
Set the node associated to this data.
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual bool isLeaf() const
Definition: Node.h:667
virtual size_t getNumberOfSons() const
Definition: Node.h:355
static std::unique_ptr< AlignmentDataInterface > getSequenceSubset(const AlignmentDataInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:44
virtual size_t getNumberOfSites() const=0
virtual size_t getNumberOfSequences() const =0
virtual double getStateValueAt(size_t sitePosition, const std::string &sequenceKey, int state) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
Interface for all transition models.
static std::vector< N * > getLeaves(N &node)
Retrieve all leaves from a subtree.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble