bpp-phyl3  3.0.0
PartitionProcessPhyloLikelihood.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 
10 
11 using namespace std;
12 using namespace bpp;
13 
14 /******************************************************************************/
15 
16 PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood(
17  Context& context,
18  std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
19  size_t nSeqEvol) :
20  AbstractPhyloLikelihood(context),
23  context, processSeqEvol->getNumberOfSites(),
24  (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, 0),
26  AbstractSequencePhyloLikelihood(context, processSeqEvol, nSeqEvol),
27  AbstractPhyloLikelihoodSet(context, std::make_shared<PhyloLikelihoodContainer>(context, processSeqEvol->getCollection())),
28  mSeqEvol_(processSeqEvol),
29  vProcPos_(),
30  mData_(),
31  likCal_(new AlignedLikelihoodCalculation(context))
32 {
33  // make new shared Parameters
34  auto collNodes = pPhyloCont_->getCollectionNodes();
35 
36  //
37 
38  map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
39 
40  vProcPos_.resize(processSeqEvol->getNumberOfSites());
41 
42  // Build the PhyloLikelihoodContainer
43  auto pC = getPhyloContainer();
44 
45  for (const auto& it : mProcPos)
46  {
47  auto nProcess = it.first;
48 
49  auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
50 
51  auto nPL = make_shared<SingleProcessPhyloLikelihood>(context, l, nProcess);
52 
53  pC->addPhyloLikelihood(nProcess, nPL);
54 
55  for (size_t i = 0; i < it.second.size(); ++i)
56  {
57  vProcPos_[it.second[i]].nProc = nProcess;
58  vProcPos_[it.second[i]].pos = i;
59  }
60 
61  addPhyloLikelihood(nProcess);
62  }
63 }
64 
65 
66 /******************************************************************************/
67 
69  Context& context,
70  std::shared_ptr<const AlignmentDataInterface> data,
71  std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
72  size_t nSeqEvol,
73  size_t nData) :
74  AbstractPhyloLikelihood(context),
75  AbstractAlignedPhyloLikelihood(context, data->getNumberOfSites()),
77  context, data->getNumberOfSites(),
78  (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
80  AbstractSequencePhyloLikelihood(context, processSeqEvol, nSeqEvol, nData),
81  AbstractPhyloLikelihoodSet(context, std::make_shared<PhyloLikelihoodContainer>(context, processSeqEvol->getCollection())),
82  mSeqEvol_(processSeqEvol),
83  vProcPos_(),
84  mData_(),
85  likCal_(new AlignedLikelihoodCalculation(context))
86 {
87  if (data->getNumberOfSites() != processSeqEvol->getNumberOfSites())
88  throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
89 
90  // get new shared Parameters
91  auto collNodes = pPhyloCont_->getCollectionNodes();
92 
93 
94  map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
95 
96  vProcPos_.resize(processSeqEvol->getNumberOfSites());
97 
98  // Build the PhyloLikelihoodContainer
99  auto pC = getPhyloContainer();
100 
101  for (const auto& it:mProcPos)
102  {
103  auto nProcess = it.first;
104 
105  auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
106 
107  auto nPL = make_shared<SingleProcessPhyloLikelihood>(context, l, nProcess, nData);
108 
109  pC->addPhyloLikelihood(nProcess, nPL);
110 
111  for (size_t i = 0; i < it.second.size(); ++i)
112  {
113  vProcPos_[it.second[i]].nProc = nProcess;
114  vProcPos_[it.second[i]].pos = i;
115  }
116 
117  addPhyloLikelihood(nProcess);
118  }
119 
120  // set Data (will build calculations)
121  setData(data, nData);
122 }
123 
124 /******************************************************************************/
125 
127  std::shared_ptr<const AlignmentDataInterface> data,
128  std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
129  std::shared_ptr<CollectionNodes> collNodes,
130  size_t nSeqEvol,
131  size_t nData) :
132  AbstractPhyloLikelihood(collNodes->context()),
133  AbstractAlignedPhyloLikelihood(collNodes->context(), data->getNumberOfSites()),
135  collNodes->context(), data->getNumberOfSites(),
136  (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
138  AbstractSequencePhyloLikelihood(collNodes->context(), processSeqEvol, nSeqEvol, nData),
139  AbstractPhyloLikelihoodSet(collNodes->context(), std::make_shared<PhyloLikelihoodContainer>(collNodes->context(), collNodes)),
140  mSeqEvol_(processSeqEvol),
141  vProcPos_(),
142  mData_(),
143  likCal_(make_shared<AlignedLikelihoodCalculation>(collNodes->context()))
144 {
145  if (data->getNumberOfSites() != processSeqEvol->getNumberOfSites())
146  throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
147 
148  map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
149 
150  vProcPos_.resize(processSeqEvol->getNumberOfSites());
151 
152  // Build the PhyloLikelihoodContainer
153  auto pC = getPhyloContainer();
154 
155  for (const auto& it : mProcPos)
156  {
157  auto nProcess = it.first;
158 
159  auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
160 
161  auto nPL = make_shared<SingleProcessPhyloLikelihood>(this->context(), l, nProcess, nData);
162 
163  pC->addPhyloLikelihood(nProcess, nPL);
164 
165  for (size_t i = 0; i < it.second.size(); ++i)
166  {
167  vProcPos_[it.second[i]].nProc = nProcess;
168  vProcPos_[it.second[i]].pos = i;
169  }
170 
171  addPhyloLikelihood(nProcess);
172  }
173 
174  // set Data (will build calculations)
175  setData(data, nData);
176 }
177 
178 /******************************************************************************/
179 
181  std::shared_ptr<const AlignmentDataInterface> data,
182  size_t nData)
183 {
184  if (data->getNumberOfSites() != mSeqEvol_->getNumberOfSites())
185  throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
186 
188 
189  const auto& mProcPos = mSeqEvol_->mapOfProcessSites();
190 
191  for (const auto& it : mProcPos)
192  {
193  shared_ptr<AlignmentDataInterface> st = SiteContainerTools::getSelectedSites(*data, it.second);
194  getPhyloContainer()->setData(st, it.first);
195  mData_[it.first] = st;
196  }
197 
198  // Then build calculations
199  makeLikCal_();
200 }
201 
202 /******************************************************************************/
203 
205 {
206  NodeRefVec vLik;
207 
208  map<size_t, size_t> mProcInd; // map of the index of the process numbers in the list of vLikCal_
209 
210  // get the RowVectors of site likelihoods
211  for (const auto& lik : vLikCal_)
212  {
213  vLik.push_back(dynamic_pointer_cast<AlignedLikelihoodCalculation>(lik)->getSiteLikelihoods(false));
214  }
215 
216  // build the reverse map of Phylo indexes
217  std::map<size_t, size_t> nProc2iProc;
218  for (size_t i = 0; i < nPhylo_.size(); ++i)
219  {
220  nProc2iProc[nPhylo_[i]] = i;
221  }
222 
223  // and the matching between sequence & partition
224  /*
225  * Matrix of matching positions : site X (index of T in the vector of Ts, position for corresponding T)
226  *
227  */
228 
229  MatchingType matching;
230 
231  matching.resize(static_cast<Eigen::Index>(getNumberOfSites()), 2);
232 
233  for (size_t i = 0; i < getNumberOfSites(); ++i)
234  {
235  matching(Eigen::Index(i), 0) = nProc2iProc[vProcPos_[i].nProc];
236  matching(Eigen::Index(i), 1) = vProcPos_[i].pos;
237  }
238 
239  auto matchingDF = NumericConstant<MatchingType>::create(context(), matching);
240 
241  vLik.push_back(matchingDF);
242 
243  auto sL = CWiseMatching<RowLik, ReductionOf<RowLik>>::create(context(), std::move(vLik), RowVectorDimension (getNumberOfSites()));
244 
245  likCal_->setSiteLikelihoods(sL);
246 
248 
249  likCal_->setLikelihoodNode(lik);
250 
251  // using bpp::DotOptions;
252  // writeGraphToDot(
253  // "partition.dot", {lik.get()});//, DotOptions::DetailedNodeInfo | DotOp
254 }
The PhyloLikelihoodSet class, to manage a subset of PhyloLikelihoods from a given PhyloLikelihoodCont...
std::shared_ptr< PhyloLikelihoodContainer > pPhyloCont_
pointer to a PhyloLikelihoodContainer
std::vector< size_t > nPhylo_
vector of AbstractPhyloLikelihood numbers
std::shared_ptr< PhyloLikelihoodContainer > getPhyloContainer() override
std::vector< std::shared_ptr< LikelihoodCalculation > > vLikCal_
virtual bool addPhyloLikelihood(size_t nPhyl, const std::string &suff="") override
adds a PhyloLikelihood already stored in the PhyloLikelihoodContainer, iff it is an AbstractPhyloLike...
const Context & context() const override
virtual void setData(std::shared_ptr< const AlignmentDataInterface > sites, size_t nData=0)
Set the dataset for which the likelihood must be evaluated.
Context for dataflow node construction.
Definition: DataFlow.h:527
static std::shared_ptr< Self > create(Context &c, Args &&... args)
Build a new NumericConstant node with T(args...) value.
PartitionProcessPhyloLikelihood(Context &context, std::shared_ptr< PartitionSequenceEvolution > processSeqEvol, size_t nSeqEvol=0)
std::shared_ptr< AlignedLikelihoodCalculation > likCal_
void setData(std::shared_ptr< const AlignmentDataInterface > data, size_t nData=0) override
Set the dataset for which the likelihood must be evaluated.
size_t getNumberOfSites() const override
Get the number of sites in the dataset.
std::map< size_t, std::shared_ptr< AlignmentDataInterface > > mData_
std::shared_ptr< PartitionSequenceEvolution > mSeqEvol_
to avoid the dynamic casts
The PhyloLikelihoodContainer, owns and assigns numbers to Phylolikelihoods.
static void getSelectedSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
static ValueRef< DataLik > create(Context &c, NodeRefVec &&deps, const Dimension< F > &mDim)
Build a new SumOfLogarithms node with the given input matrix dimensions.
Defines the basic types of data flow nodes.
std::vector< NodeRef > NodeRefVec
Alias for a dependency vector (of NodeRef).
Definition: DataFlow.h:81
Eigen::Matrix< size_t, -1, 2 > MatchingType