bpp-phyl3  3.0.0
SubstitutionProcessSequenceSimulator.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <algorithm>
7 
11 
12 // From bpp-seq:
14 
16 
17 using namespace bpp;
18 using namespace std;
19 
20 /******************************************************************************/
21 
23  mProcess_(),
24  vMap_(),
25  seqNames_(),
26  mvPosNames_()
27 {
28  vector<size_t> nProc = evol.getSubstitutionProcessNumbers();
29 
30  vector<shared_ptr<PhyloNode>> vpn = evol.substitutionProcess(nProc[0]).getParametrizablePhyloTree()->getAllLeaves();
31 
32  // set ups seqnames for all processes
33  for (auto& vi : vpn)
34  {
35  seqNames_.push_back(vi->getName());
36  }
37 
38  for (size_t i = 0; i < nProc.size(); ++i)
39  {
40  const auto sp = evol.getSubstitutionProcess(nProc[i]);
41 
42  mProcess_[nProc[i]] = make_unique<SimpleSubstitutionProcessSiteSimulator>(sp);
43 
44  vector<string> seqNames2;
45 
46  vector<shared_ptr<PhyloNode>> vpn2 = sp->getParametrizablePhyloTree()->getAllLeaves();
47  for (size_t i2 = 0; i2 < vpn2.size(); i2++)
48  {
49  seqNames2.push_back(vpn2[i2]->getName());
50  }
51 
52  mvPosNames_[nProc[i]].resize(seqNames_.size());
53 
54  for (size_t j = 0; j < seqNames_.size(); j++)
55  {
56  if (!VectorTools::contains(seqNames2, seqNames_[j]))
57  throw Exception("SubstitutionProcessSequenceSimulator, unknown sequence name: " + seqNames_[j]);
58  mvPosNames_[nProc[i]][j] = VectorTools::which(seqNames2, seqNames_[j]);
59  }
60  }
61 
62  // set up position specific processes
63 
64  auto pse = dynamic_cast<const PartitionSequenceEvolution*>(&evol);
65 
66  if (pse)
67  {
68  setMap(pse->getProcessNumbersPerSite());
69  }
70  else
71  throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator(SequenceEvolution) not set for this type of process. Ask developers.");
72 }
73 
74 /******************************************************************************/
75 
77  mProcess_(),
78  vMap_(spss.vMap_),
79  seqNames_(spss.seqNames_),
80  mvPosNames_(spss.mvPosNames_)
81 {
82  for (auto it : spss.mProcess_)
83  {
84  auto dit = std::dynamic_pointer_cast<SimpleSubstitutionProcessSiteSimulator>(it.second);
85  auto git = std::dynamic_pointer_cast<GivenDataSubstitutionProcessSiteSimulator>(it.second);
86 
87  if (dit)
88  mProcess_[it.first] = std::make_shared<SimpleSubstitutionProcessSiteSimulator>(*dit);
89  else if (git)
90  mProcess_[it.first] = std::make_shared<GivenDataSubstitutionProcessSiteSimulator>(*git);
91  else
92  throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator: unknown type of site simulator.");
93  }
94 }
95 
96 /******************************************************************************/
97 
99 {
100  vMap_ = spss.vMap_;
101  seqNames_ = spss.seqNames_;
102  mvPosNames_ = spss.mvPosNames_;
103 
104  mProcess_.clear();
105 
106  for (auto it : spss.mProcess_)
107  {
108  auto dit = std::dynamic_pointer_cast<SimpleSubstitutionProcessSiteSimulator>(it.second);
109  auto git = std::dynamic_pointer_cast<GivenDataSubstitutionProcessSiteSimulator>(it.second);
110 
111  if (dit)
112  mProcess_[it.first] = std::make_shared<SimpleSubstitutionProcessSiteSimulator>(*dit);
113  else if (git)
114  mProcess_[it.first] = std::make_shared<GivenDataSubstitutionProcessSiteSimulator>(*git);
115  else
116  throw Exception("SubstitutionProcessSequenceSimulator::SubstitutionProcessSequenceSimulator: unknown type of site simulator.");
117  }
118 
119  return *this;
120 }
121 
122 /******************************************************************************/
123 
125 {
126  for (auto it : mProcess_)
127  {
128  it.second->outputInternalSites(yn);
129  }
130 }
131 
132 /******************************************************************************/
133 
134 void SubstitutionProcessSequenceSimulator::setMap(std::vector<size_t> vMap)
135 {
136  vMap_.clear();
137 
138  for (size_t i = 0; i < vMap.size(); i++)
139  {
140  if (mProcess_.find(vMap[i]) == mProcess_.end())
141  throw Exception("SubstitutionProcessSequenceSimulator::setMap: unknown Process number" + TextTools::toString(vMap[i]));
142  else
143  vMap_.push_back(vMap[i]);
144  }
145 }
146 
147 /******************************************************************************/
148 
149 unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
150  size_t numberOfSites) const
151 {
152  if (numberOfSites > vMap_.size())
153  throw BadIntegerException("SubstitutionProcessSequenceSimulator::simulate. Too many sites to simulate.", (int)numberOfSites);
154 
155  auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
156  sites->setSequenceNames(seqNames_, true);
157 
158  Vint vval(seqNames_.size());
159 
160  for (size_t j = 0; j < numberOfSites; ++j)
161  {
162  auto site = mProcess_.find(vMap_[j])->second->simulateSite();
163 
164  const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
165  for (size_t vn = 0; vn < vPosNames.size(); vn++)
166  {
167  vval[vn] = site->getValue(vPosNames[vn]);
168  }
169 
170  auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
171  sites->addSite(site2);
172  }
173  return sites;
174 }
175 
176 /******************************************************************************/
177 
178 unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
179  const vector<double>& rates) const
180 {
181  size_t numberOfSites = rates.size();
182 
183  if (numberOfSites > vMap_.size())
184  throw Exception("SubstitutionProcessSequenceSimulator::simulate : some sites do not have attributed process");
185 
186  auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
187  sites->setSequenceNames(seqNames_, true);
188 
189  Vint vval(seqNames_.size());
190 
191  for (size_t j = 0; j < numberOfSites; ++j)
192  {
193  auto site = mProcess_.find(vMap_[j])->second->simulateSite(rates[j]);
194 
195  const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
196  for (size_t vn = 0; vn < vPosNames.size(); vn++)
197  {
198  vval[vn] = site->getValue(vPosNames[vn]);
199  }
200 
201  auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
202  sites->addSite(site2);
203  }
204  return sites;
205 }
206 
207 /******************************************************************************/
208 
209 unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
210  const vector<size_t>& states) const
211 {
212  size_t numberOfSites = states.size();
213 
214  auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
215  sites->setSequenceNames(seqNames_, true);
216 
217  Vint vval(seqNames_.size());
218 
219  for (size_t j = 0; j < numberOfSites; ++j)
220  {
221  auto site = mProcess_.find(vMap_[j])->second->simulateSite(states[j]);
222 
223  const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
224  for (size_t vn = 0; vn < vPosNames.size(); vn++)
225  {
226  vval[vn] = site->getValue(vPosNames[vn]);
227  }
228 
229  auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
230  sites->addSite(site2);
231  }
232  return sites;
233 }
234 
235 /******************************************************************************/
236 
237 unique_ptr<SiteContainerInterface> SubstitutionProcessSequenceSimulator::simulate(
238  const vector<double>& rates,
239  const vector<size_t>& states) const
240 {
241  size_t numberOfSites = rates.size();
242  if (states.size() != numberOfSites)
243  throw Exception("SubstitutionProcessSequenceSimulator::simulate, 'rates' and 'states' must have the same length.");
244 
245  auto sites = make_unique<VectorSiteContainer>(seqNames_, getAlphabet());
246  sites->setSequenceNames(seqNames_, true);
247 
248  Vint vval(seqNames_.size());
249 
250  for (size_t j = 0; j < numberOfSites; ++j)
251  {
252  auto site = mProcess_.find(vMap_[j])->second->simulateSite(states[j], rates[j]);
253 
254  const vector<size_t>& vPosNames = mvPosNames_.find(vMap_[j])->second;
255  for (size_t vn = 0; vn < vPosNames.size(); vn++)
256  {
257  vval[vn] = site->getValue(vPosNames[vn]);
258  }
259 
260  auto site2 = make_unique<Site>(vval, sites->getAlphabet(), static_cast<int>(j));
261  sites->addSite(site2);
262  }
263  return sites;
264 }
265 
266 /******************************************************************************/
267 
268 shared_ptr<const Alphabet> SubstitutionProcessSequenceSimulator::getAlphabet() const
269 {
270  if (mProcess_.size() == 0)
271  return nullptr;
272 
273  return mProcess_.begin()->second->getAlphabet();
274 }
275 
276 /******************************************************************************/
277 
279 {
280  if (mProcess_.size() == 0)
281  throw NullPointerException("SubstitutionProcessSequenceSimulator::alphabet(). No process attached.");
282 
283  return mProcess_.begin()->second->alphabet();
284 }
285 
286 /******************************************************************************/
Sequence evolution framework based on a mixture of substitution processes.
This interface describes the evolution process of a sequence.
virtual const std::vector< size_t > & getSubstitutionProcessNumbers() const =0
virtual const SubstitutionProcessInterface & substitutionProcess(size_t number) const =0
virtual std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess(size_t number) const =0
virtual std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const =0
Sequences simulation under position specific substitution process.
std::vector< std::string > seqNames_
all processes trees must have at least the same sequence names as the first process of the map.
void setMap(std::vector< size_t > vMap)
reset the set of processes.
std::shared_ptr< const Alphabet > getAlphabet() const override
std::map< size_t, std::vector< size_t > > mvPosNames_
correspondence map of seqNames positions of the several trees. Reference is the tree of the first pro...
std::vector< size_t > vMap_
The vector of the site specific process in mProcess_; is mutable because can be changed for each simu...
SubstitutionProcessSequenceSimulator & operator=(const SubstitutionProcessSequenceSimulator &)
std::unique_ptr< SiteContainerInterface > simulate(size_t numberOfSites) const override
void outputInternalSequences(bool yn) override
Sets whether we will output the internal sequences or not.
std::map< size_t, std::shared_ptr< SiteSimulatorInterface > > mProcess_
the map of the process simulators.
static size_t which(const std::vector< T > &v, const T &which)
static bool contains(const std::vector< T > &vec, T el)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< int > Vint