bpp-phyl3  3.0.0
SitePatterns.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "SitePatterns.h"
6 
7 // From the bpp-seq library:
8 #include <Bpp/Seq/SiteTools.h>
11 
12 using namespace bpp;
13 using namespace std;
14 
15 /******************************************************************************/
16 
18  const AlignmentDataInterface& sequences,
19  std::vector<std::string> names) :
20  names_(),
21  sites_(),
22  weights_(),
23  indices_(),
24  alpha_(sequences.getAlphabet())
25 {
26  names_ = sequences.getSequenceNames();
27  if (names.size() != 0)
29  init_(sequences, names_);
30 }
31 
33  const AlignmentDataInterface& sequences,
34  std::vector<std::string> names)
35 {
36  // positions of the names in sequences list
37  std::vector<size_t> posseq;
38  for (const auto& n : names)
39  {
40  posseq.push_back(sequences.getSequencePosition(n));
41  }
42 
43  int nbSeq = static_cast<int>(sequences.getNumberOfSequences());
44  std::vector<size_t> posnseq;
45 
46  std::stable_sort(posseq.begin(), posseq.end());
47  for (int i = nbSeq - 1; i >= 0; i--)
48  {
49  if (!std::binary_search(posseq.begin(), posseq.end(), i))
50  posnseq.push_back(static_cast<size_t>(i));
51  }
52 
53  // Then build Sortable sites with correct sequences
54  size_t nbSites = sequences.getNumberOfSites();
55 
56  vector<SortableSite> ss(nbSites);
57  for (size_t i = 0; i < nbSites; i++)
58  {
59  CoreSiteInterface* currentSite = sequences.site(i).clone();
60  for (auto pos : posnseq)
61  {
62  currentSite->deleteElement(pos);
63  }
64 
65  SortableSite* ssi = &ss[i];
66  ssi->siteS = currentSite->toString();
67  ssi->siteP = currentSite;
68  ssi->originalPosition = i;
69  }
70 
71  if (nbSites > 0)
72  {
73  // Quick sort according to site contents:
74  sort(ss.begin(), ss.end());
75 
76  // Now build patterns:
77  SortableSite* ss0 = &ss[0];
78  auto previousSite = ss0->siteP;
79  indices_.resize(Eigen::Index(nbSites));
80  indices_[Eigen::Index(ss0->originalPosition)] = 0;
81  sites_.push_back(shared_ptr<const CoreSiteInterface>(previousSite));
82  weights_.push_back(1);
83 
84  size_t currentPos = 0;
85  for (size_t i = 1; i < nbSites; ++i)
86  {
87  SortableSite* ssi = &ss[i];
88  auto currentSite = ssi->siteP;
89 
90  bool siteExists = SymbolListTools::areSymbolListsIdentical(*currentSite, *previousSite);
91  if (siteExists)
92  {
93  weights_[currentPos]++;
94  delete currentSite;
95  }
96  else
97  {
98  sites_.push_back(shared_ptr<const CoreSiteInterface>(currentSite));
99  weights_.push_back(1);
100  currentPos++;
101  previousSite = currentSite;
102  }
103  indices_[Eigen::Index(ssi->originalPosition)] = currentPos;
104  }
105  }
106 }
107 
108 /******************************************************************************/
109 
110 unique_ptr<AlignmentDataInterface> SitePatterns::getSites() const
111 {
112  if (sites_.size() == 0)
113  throw Exception("SitePatterns::getSites : empty set.");
114 
115  unique_ptr<AlignmentDataInterface> sites;
116 
117  if (dynamic_pointer_cast<const Site>(sites_[0]))
118  {
119  // Copy the sites
120  vector<unique_ptr<Site>> vSites;
121  for (auto& s : sites_)
122  {
123  auto ptr = unique_ptr<Site>(dynamic_cast<Site*>(s->clone()));
124  vSites.push_back(std::move(ptr));
125  }
126  sites.reset(new VectorSiteContainer(vSites, alpha_));
127  sites->setSequenceNames(names_, true);
128  return sites;
129  }
130 
131  if (dynamic_pointer_cast<const ProbabilisticSite>(sites_[0]))
132  {
133  // Copy the sites
134  vector<unique_ptr<ProbabilisticSite>> vSites;
135  for (auto& s : sites_)
136  {
137  vSites.push_back(unique_ptr<ProbabilisticSite>(dynamic_cast<ProbabilisticSite*>(s->clone())));
138  }
139  sites.reset(new ProbabilisticVectorSiteContainer(vSites, alpha_));
140  sites->setSequenceNames(names_, true);
141  return sites;
142  }
143 
144  throw Exception("SitePatterns::getSites(). Unsupported site type.");
145 }
146 
147 /******************************************************************************/
CoreSiteInterface * clone() const override=0
virtual void deleteElement(size_t pos)=0
virtual std::string toString() const=0
Class used for site pattern sorting.
Definition: SitePatterns.h:44
const CoreSiteInterface * siteP
Definition: SitePatterns.h:47
void init_(const AlignmentDataInterface &sequences, std::vector< std::string > names={})
IndicesType indices_
Definition: SitePatterns.h:70
std::vector< std::shared_ptr< const CoreSiteInterface > > sites_
Definition: SitePatterns.h:68
std::vector< unsigned int > weights_
Definition: SitePatterns.h:69
std::vector< std::string > names_
Definition: SitePatterns.h:67
std::shared_ptr< const Alphabet > alpha_
Definition: SitePatterns.h:71
SitePatterns(const AlignmentDataInterface &sequences, std::vector< std::string > names={})
Build a new SitePattern object.
std::unique_ptr< AlignmentDataInterface > getSites() const
static bool areSymbolListsIdentical(const IntSymbolListInterface &list1, const IntSymbolListInterface &list2)
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
virtual size_t getNumberOfSites() const=0
virtual std::vector< std::string > getSequenceNames() const =0
virtual size_t getNumberOfSequences() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
Defines the basic types of data flow nodes.
TemplateVectorSiteContainer< ProbabilisticSite, ProbabilisticSequence > ProbabilisticVectorSiteContainer
TemplateVectorSiteContainer< Site, Sequence > VectorSiteContainer