bpp-phyl3 3.0.0
PartitionProcessPhyloLikelihood.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6
10
11using namespace std;
12using namespace bpp;
13
14/******************************************************************************/
15
16PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood(
17 Context& context,
18 std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
19 size_t nSeqEvol) :
23 context, processSeqEvol->getNumberOfSites(),
24 (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, 0),
26 AbstractSequencePhyloLikelihood(context, processSeqEvol, nSeqEvol),
27 AbstractPhyloLikelihoodSet(context, std::make_shared<PhyloLikelihoodContainer>(context, processSeqEvol->getCollection())),
28 mSeqEvol_(processSeqEvol),
29 vProcPos_(),
30 mData_(),
31 likCal_(new AlignedLikelihoodCalculation(context))
32{
33 // make new shared Parameters
34 auto collNodes = pPhyloCont_->getCollectionNodes();
35
36 //
37
38 map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
39
40 vProcPos_.resize(processSeqEvol->getNumberOfSites());
41
42 // Build the PhyloLikelihoodContainer
43 auto pC = getPhyloContainer();
44
45 for (const auto& it : mProcPos)
46 {
47 auto nProcess = it.first;
48
49 auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
50
51 auto nPL = make_shared<SingleProcessPhyloLikelihood>(context, l, nProcess);
52
53 pC->addPhyloLikelihood(nProcess, nPL);
54
55 for (size_t i = 0; i < it.second.size(); ++i)
56 {
57 vProcPos_[it.second[i]].nProc = nProcess;
58 vProcPos_[it.second[i]].pos = i;
59 }
60
61 addPhyloLikelihood(nProcess);
62 }
63}
64
65
66/******************************************************************************/
67
69 Context& context,
70 std::shared_ptr<const AlignmentDataInterface> data,
71 std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
72 size_t nSeqEvol,
73 size_t nData) :
75 AbstractAlignedPhyloLikelihood(context, data->getNumberOfSites()),
77 context, data->getNumberOfSites(),
78 (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
80 AbstractSequencePhyloLikelihood(context, processSeqEvol, nSeqEvol, nData),
81 AbstractPhyloLikelihoodSet(context, std::make_shared<PhyloLikelihoodContainer>(context, processSeqEvol->getCollection())),
82 mSeqEvol_(processSeqEvol),
83 vProcPos_(),
84 mData_(),
85 likCal_(new AlignedLikelihoodCalculation(context))
86{
87 if (data->getNumberOfSites() != processSeqEvol->getNumberOfSites())
88 throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
89
90 // get new shared Parameters
91 auto collNodes = pPhyloCont_->getCollectionNodes();
92
93
94 map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
95
96 vProcPos_.resize(processSeqEvol->getNumberOfSites());
97
98 // Build the PhyloLikelihoodContainer
99 auto pC = getPhyloContainer();
100
101 for (const auto& it:mProcPos)
102 {
103 auto nProcess = it.first;
104
105 auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
106
107 auto nPL = make_shared<SingleProcessPhyloLikelihood>(context, l, nProcess, nData);
108
109 pC->addPhyloLikelihood(nProcess, nPL);
110
111 for (size_t i = 0; i < it.second.size(); ++i)
112 {
113 vProcPos_[it.second[i]].nProc = nProcess;
114 vProcPos_[it.second[i]].pos = i;
115 }
116
117 addPhyloLikelihood(nProcess);
118 }
119
120 // set Data (will build calculations)
121 setData(data, nData);
122}
123
124/******************************************************************************/
125
127 std::shared_ptr<const AlignmentDataInterface> data,
128 std::shared_ptr<PartitionSequenceEvolution> processSeqEvol,
129 std::shared_ptr<CollectionNodes> collNodes,
130 size_t nSeqEvol,
131 size_t nData) :
132 AbstractPhyloLikelihood(collNodes->context()),
133 AbstractAlignedPhyloLikelihood(collNodes->context(), data->getNumberOfSites()),
135 collNodes->context(), data->getNumberOfSites(),
136 (processSeqEvol->getSubstitutionProcessNumbers().size() != 0) ? processSeqEvol->substitutionProcess(processSeqEvol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
138 AbstractSequencePhyloLikelihood(collNodes->context(), processSeqEvol, nSeqEvol, nData),
139 AbstractPhyloLikelihoodSet(collNodes->context(), std::make_shared<PhyloLikelihoodContainer>(collNodes->context(), collNodes)),
140 mSeqEvol_(processSeqEvol),
141 vProcPos_(),
142 mData_(),
143 likCal_(make_shared<AlignedLikelihoodCalculation>(collNodes->context()))
144{
145 if (data->getNumberOfSites() != processSeqEvol->getNumberOfSites())
146 throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
147
148 map<size_t, vector<size_t>>& mProcPos = processSeqEvol->mapOfProcessSites();
149
150 vProcPos_.resize(processSeqEvol->getNumberOfSites());
151
152 // Build the PhyloLikelihoodContainer
153 auto pC = getPhyloContainer();
154
155 for (const auto& it : mProcPos)
156 {
157 auto nProcess = it.first;
158
159 auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, nProcess);
160
161 auto nPL = make_shared<SingleProcessPhyloLikelihood>(this->context(), l, nProcess, nData);
162
163 pC->addPhyloLikelihood(nProcess, nPL);
164
165 for (size_t i = 0; i < it.second.size(); ++i)
166 {
167 vProcPos_[it.second[i]].nProc = nProcess;
168 vProcPos_[it.second[i]].pos = i;
169 }
170
171 addPhyloLikelihood(nProcess);
172 }
173
174 // set Data (will build calculations)
175 setData(data, nData);
176}
177
178/******************************************************************************/
179
181 std::shared_ptr<const AlignmentDataInterface> data,
182 size_t nData)
183{
184 if (data->getNumberOfSites() != mSeqEvol_->getNumberOfSites())
185 throw BadIntegerException("PartitionProcessPhyloLikelihood::PartitionProcessPhyloLikelihood, data and sequence process lengths do not match.", static_cast<int>(data->getNumberOfSites()));
186
188
189 const auto& mProcPos = mSeqEvol_->mapOfProcessSites();
190
191 for (const auto& it : mProcPos)
192 {
193 shared_ptr<AlignmentDataInterface> st = SiteContainerTools::getSelectedSites(*data, it.second);
194 getPhyloContainer()->setData(st, it.first);
195 mData_[it.first] = st;
196 }
197
198 // Then build calculations
199 makeLikCal_();
200}
201
202/******************************************************************************/
203
205{
206 NodeRefVec vLik;
207
208 map<size_t, size_t> mProcInd; // map of the index of the process numbers in the list of vLikCal_
209
210 // get the RowVectors of site likelihoods
211 for (const auto& lik : vLikCal_)
212 {
213 vLik.push_back(dynamic_pointer_cast<AlignedLikelihoodCalculation>(lik)->getSiteLikelihoods(false));
214 }
215
216 // build the reverse map of Phylo indexes
217 std::map<size_t, size_t> nProc2iProc;
218 for (size_t i = 0; i < nPhylo_.size(); ++i)
219 {
220 nProc2iProc[nPhylo_[i]] = i;
221 }
222
223 // and the matching between sequence & partition
224 /*
225 * Matrix of matching positions : site X (index of T in the vector of Ts, position for corresponding T)
226 *
227 */
228
229 MatchingType matching;
230
231 matching.resize(static_cast<Eigen::Index>(getNumberOfSites()), 2);
232
233 for (size_t i = 0; i < getNumberOfSites(); ++i)
234 {
235 matching(Eigen::Index(i), 0) = nProc2iProc[vProcPos_[i].nProc];
236 matching(Eigen::Index(i), 1) = vProcPos_[i].pos;
237 }
238
239 auto matchingDF = NumericConstant<MatchingType>::create(context(), matching);
240
241 vLik.push_back(matchingDF);
242
244
245 likCal_->setSiteLikelihoods(sL);
246
248
249 likCal_->setLikelihoodNode(lik);
250
251 // using bpp::DotOptions;
252 // writeGraphToDot(
253 // "partition.dot", {lik.get()});//, DotOptions::DetailedNodeInfo | DotOp
254}
The PhyloLikelihoodSet class, to manage a subset of PhyloLikelihoods from a given PhyloLikelihoodCont...
std::shared_ptr< PhyloLikelihoodContainer > pPhyloCont_
pointer to a PhyloLikelihoodContainer
std::vector< size_t > nPhylo_
vector of AbstractPhyloLikelihood numbers
std::vector< std::shared_ptr< LikelihoodCalculation > > vLikCal_
virtual bool addPhyloLikelihood(size_t nPhyl, const std::string &suff="") override
adds a PhyloLikelihood already stored in the PhyloLikelihoodContainer, iff it is an AbstractPhyloLike...
std::shared_ptr< PhyloLikelihoodContainer > getPhyloContainer() override
const Context & context() const override
virtual void setData(std::shared_ptr< const AlignmentDataInterface > sites, size_t nData=0)
Set the dataset for which the likelihood must be evaluated.
Context for dataflow node construction.
Definition: DataFlow.h:527
static std::shared_ptr< Self > create(Context &c, Args &&... args)
Build a new NumericConstant node with T(args...) value.
PartitionProcessPhyloLikelihood(Context &context, std::shared_ptr< PartitionSequenceEvolution > processSeqEvol, size_t nSeqEvol=0)
std::shared_ptr< AlignedLikelihoodCalculation > likCal_
void setData(std::shared_ptr< const AlignmentDataInterface > data, size_t nData=0) override
Set the dataset for which the likelihood must be evaluated.
size_t getNumberOfSites() const override
Get the number of sites in the dataset.
std::map< size_t, std::shared_ptr< AlignmentDataInterface > > mData_
std::shared_ptr< PartitionSequenceEvolution > mSeqEvol_
to avoid the dynamic casts
The PhyloLikelihoodContainer, owns and assigns numbers to Phylolikelihoods.
static void getSelectedSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
static ValueRef< DataLik > create(Context &c, NodeRefVec &&deps, const Dimension< F > &mDim)
Build a new SumOfLogarithms node with the given input matrix dimensions.
Defines the basic types of data flow nodes.
std::vector< NodeRef > NodeRefVec
Alias for a dependency vector (of NodeRef).
Definition: DataFlow.h:81
Eigen::Matrix< size_t, -1, 2 > MatchingType