bpp-phyl3 3.0.0
SubstitutionProcessSequenceSimulator.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
6#include <algorithm>
7
11
12// From bpp-seq:
14
16
17using namespace bpp;
18using namespace std;
19
20/******************************************************************************/
21
22SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator(const SequenceEvolution& evol) :
23 mProcess_(),
24 vMap_(),
25 seqNames_(),
26 mvPosNames_()
27{
28 vector<size_t> nProc = evol.getSubstitutionProcessNumbers();
29
30 vector<shared_ptr<PhyloNode>> vpn = evol.substitutionProcess(nProc[0]).getParametrizablePhyloTree()->getAllLeaves();
31
32 // set ups seqnames for all processes
33 for (auto& vi : vpn)
34 {
35 seqNames_.push_back(vi->getName());
36 }
37
38 for (size_t i = 0; i < nProc.size(); ++i)
39 {
40 const auto sp = evol.getSubstitutionProcess(nProc[i]);
41
42 mProcess_[nProc[i]] = make_unique<SimpleSubstitutionProcessSiteSimulator>(sp);
43
44 vector<string> seqNames2;
45
46 vector<shared_ptr<PhyloNode>> vpn2 = sp->getParametrizablePhyloTree()->getAllLeaves();
47 for (size_t i2 = 0; i2 < vpn2.size(); i2++)
48 {
49 seqNames2.push_back(vpn2[i2]->getName());
50 }
51
52 mvPosNames_[nProc[i]].resize(seqNames_.size());
53
54 for (size_t j = 0; j < seqNames_.size(); j++)
55 {
56 if (!VectorTools::contains(seqNames2, seqNames_[j]))
57 throw Exception("SubstitutionProcessSequenceSimulator, unknown sequence name: " + seqNames_[j]);
58 mvPosNames_[nProc[i]][j] = VectorTools::which(seqNames2, seqNames_[j]);
59 }
60 }
61
62 // set up position specific processes
63
64 auto pse = dynamic_cast<const PartitionSequenceEvolution*>(&evol);
65
66 if (pse)
67 {
68 setMap(pse->getProcessNumbersPerSite());
69 }
70 else
71 throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator(SequenceEvolution) not set for this type of process. Ask developers.");
72}
73
74/******************************************************************************/
75
77 mProcess_(),
78 vMap_(spss.vMap_),
79 seqNames_(spss.seqNames_),
80 mvPosNames_(spss.mvPosNames_)
81{
82 for (auto it : spss.mProcess_)
83 {
84 auto dit = std::dynamic_pointer_cast<SimpleSubstitutionProcessSiteSimulator>(it.second);
85 auto git = std::dynamic_pointer_cast<GivenDataSubstitutionProcessSiteSimulator>(it.second);
86
87 if (dit)
88 mProcess_[it.first] = std::make_shared<SimpleSubstitutionProcessSiteSimulator>(*dit);
89 else if (git)
90 mProcess_[it.first] = std::make_shared<GivenDataSubstitutionProcessSiteSimulator>(*git);
91 else
92 throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator: unknown type of site simulator.");
93 }
94}
95
96/******************************************************************************/
97
99{
100 vMap_ = spss.vMap_;
101 seqNames_ = spss.seqNames_;
103
104 mProcess_.clear();
105
106 for (auto it : spss.mProcess_)
107 {
108 auto dit = std::dynamic_pointer_cast<SimpleSubstitutionProcessSiteSimulator>(it.second);
109 auto git = std::dynamic_pointer_cast<GivenDataSubstitutionProcessSiteSimulator>(it.second);
110
111 if (dit)
112 mProcess_[it.first] = std::make_shared<SimpleSubstitutionProcessSiteSimulator>(*dit);
113 else if (git)
114 mProcess_[it.first] = std::make_shared<GivenDataSubstitutionProcessSiteSimulator>(*git);
115 else
116 throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator: unknown type of site simulator.");
117 }
118
119 return *this;
120}
121
122/******************************************************************************/
123
125{
126 for (auto it : mProcess_)
127 {
128 it.second->outputInternalSites(yn);
129 }
130}
131
132/******************************************************************************/
133
135{
136 vMap_.clear();
137
138 for (size_t i = 0; i < vMap.size(); i++)
139 {
140 if (mProcess_.find(vMap[i]) == mProcess_.end())
141 throw Exception("SubstitutionProcessSequenceSimulator::setMap: unknown Process number" + TextTools::toString(vMap[i]));
142 else
143 vMap_.push_back(vMap[i]);
144 }
145}
146
147/******************************************************************************/
148
149unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
150 size_t numberOfSites) const
151{
152 if (numberOfSites > vMap_.size())
153 throw BadIntegerException("SubstitutionProcessSequenceSimulator::simulate. Too many sites to simulate.", (int)numberOfSites);
154
155 auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
156 sites->setSequenceNames(seqNames_, true);
157
158 Vint vval(seqNames_.size());
159
160 for (size_t j = 0; j < numberOfSites; ++j)
161 {
162 auto site = mProcess_.find(vMap_[j])->second->simulateSite();
163
164 const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
165 for (size_t vn = 0; vn < vPosNames.size(); vn++)
166 {
167 vval[vn] = site->getValue(vPosNames[vn]);
168 }
169
170 auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
171 sites->addSite(site2);
172 }
173 return sites;
174}
175
176/******************************************************************************/
177
178unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
179 const vector<double>& rates) const
180{
181 size_t numberOfSites = rates.size();
182
183 if (numberOfSites > vMap_.size())
184 throw Exception("SubstitutionProcessSequenceSimulator::simulate : some sites do not have attributed process");
185
186 auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
187 sites->setSequenceNames(seqNames_, true);
188
189 Vint vval(seqNames_.size());
190
191 for (size_t j = 0; j < numberOfSites; ++j)
192 {
193 auto site = mProcess_.find(vMap_[j])->second->simulateSite(rates[j]);
194
195 const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
196 for (size_t vn = 0; vn < vPosNames.size(); vn++)
197 {
198 vval[vn] = site->getValue(vPosNames[vn]);
199 }
200
201 auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
202 sites->addSite(site2);
203 }
204 return sites;
205}
206
207/******************************************************************************/
208
209unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
210 const vector<size_t>& states) const
211{
212 size_t numberOfSites = states.size();
213
214 auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
215 sites->setSequenceNames(seqNames_, true);
216
217 Vint vval(seqNames_.size());
218
219 for (size_t j = 0; j < numberOfSites; ++j)
220 {
221 auto site = mProcess_.find(vMap_[j])->second->simulateSite(states[j]);
222
223 const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
224 for (size_t vn = 0; vn < vPosNames.size(); vn++)
225 {
226 vval[vn] = site->getValue(vPosNames[vn]);
227 }
228
229 auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
230 sites->addSite(site2);
231 }
232 return sites;
233}
234
235/******************************************************************************/
236
237unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
238 const vector<double>& rates,
239 const vector<size_t>& states) const
240{
241 size_t numberOfSites = rates.size();
242 if (states.size() != numberOfSites)
243 throw Exception("SubstitutionProcessSequenceSimulator::simulate, 'rates' and 'states' must have the same length.");
244
245 auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
246 sites->setSequenceNames(seqNames_, true);
247
248 Vint vval(seqNames_.size());
249
250 for (size_t j = 0; j < numberOfSites; ++j)
251 {
252 auto site = mProcess_.find(vMap_[j])->second->simulateSite(states[j], rates[j]);
253
254 const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
255 for (size_t vn = 0; vn < vPosNames.size(); vn++)
256 {
257 vval[vn] = site->getValue(vPosNames[vn]);
258 }
259
260 auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
261 sites->addSite(site2);
262 }
263 return sites;
264}
265
266/******************************************************************************/
267
269{
270 if (mProcess_.size() == 0)
271 return nullptr;
272
273 return mProcess_.begin()->second->getAlphabet();
274}
275
276/******************************************************************************/
277
279{
280 if (mProcess_.size() == 0)
281 throw NullPointerException("SubstitutionProcessSequenceSimulator::alphabet(). No process attached.");
282
283 return mProcess_.begin()->second->alphabet();
284}
285
286/******************************************************************************/
Sequence evolution framework based on a mixture of substitution processes.
This interface describes the evolution process of a sequence.
virtual const std::vector< size_t > & getSubstitutionProcessNumbers() const =0
virtual const SubstitutionProcessInterface & substitutionProcess(size_t number) const =0
virtual std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess(size_t number) const =0
virtual std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const =0
Sequences simulation under position specific substitution process.
std::vector< std::string > seqNames_
all processes trees must have at least the same sequence names as the first process of the map.
void setMap(std::vector< size_t > vMap)
reset the set of processes.
std::shared_ptr< const Alphabet > getAlphabet() const override
std::map< size_t, std::vector< size_t > > mvPosNames_
correspondence map of seqNames positions of the several trees. Reference is the tree of the first pro...
std::vector< size_t > vMap_
The vector of the site specific process in mProcess_; is mutable because can be changed for each simu...
SubstitutionProcessSequenceSimulator & operator=(const SubstitutionProcessSequenceSimulator &)
std::unique_ptr< SiteContainerInterface > simulate(size_t numberOfSites) const override
void outputInternalSequences(bool yn) override
Sets whether we will output the internal sequences or not.
std::map< size_t, std::shared_ptr< SiteSimulatorInterface > > mProcess_
the map of the process simulators.
static size_t which(const std::vector< T > &v, const T &which)
static bool contains(const std::vector< T > &vec, T el)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< int > Vint