bpp-phyl3 3.0.0
SitePatterns.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "SitePatterns.h"
6
7// From the bpp-seq library:
8#include <Bpp/Seq/SiteTools.h>
11
12using namespace bpp;
13using namespace std;
14
15/******************************************************************************/
16
17SitePatterns::SitePatterns(
18 const AlignmentDataInterface& sequences,
19 std::vector<std::string> names) :
20 names_(),
21 sites_(),
22 weights_(),
23 indices_(),
24 alpha_(sequences.getAlphabet())
25{
26 names_ = sequences.getSequenceNames();
27 if (names.size() != 0)
29 init_(sequences, names_);
30}
31
33 const AlignmentDataInterface& sequences,
34 std::vector<std::string> names)
35{
36 // positions of the names in sequences list
37 std::vector<size_t> posseq;
38 for (const auto& n : names)
39 {
40 posseq.push_back(sequences.getSequencePosition(n));
41 }
42
43 int nbSeq = static_cast<int>(sequences.getNumberOfSequences());
44 std::vector<size_t> posnseq;
45
46 std::stable_sort(posseq.begin(), posseq.end());
47 for (int i = nbSeq - 1; i >= 0; i--)
48 {
49 if (!std::binary_search(posseq.begin(), posseq.end(), i))
50 posnseq.push_back(static_cast<size_t>(i));
51 }
52
53 // Then build Sortable sites with correct sequences
54 size_t nbSites = sequences.getNumberOfSites();
55
56 vector<SortableSite> ss(nbSites);
57 for (size_t i = 0; i < nbSites; i++)
58 {
59 CoreSiteInterface* currentSite = sequences.site(i).clone();
60 for (auto pos : posnseq)
61 {
62 currentSite->deleteElement(pos);
63 }
64
65 SortableSite* ssi = &ss[i];
66 ssi->siteS = currentSite->toString();
67 ssi->siteP = currentSite;
68 ssi->originalPosition = i;
69 }
70
71 if (nbSites > 0)
72 {
73 // Quick sort according to site contents:
74 sort(ss.begin(), ss.end());
75
76 // Now build patterns:
77 SortableSite* ss0 = &ss[0];
78 auto previousSite = ss0->siteP;
79 indices_.resize(Eigen::Index(nbSites));
80 indices_[Eigen::Index(ss0->originalPosition)] = 0;
81 sites_.push_back(shared_ptr<const CoreSiteInterface>(previousSite));
82 weights_.push_back(1);
83
84 size_t currentPos = 0;
85 for (size_t i = 1; i < nbSites; ++i)
86 {
87 SortableSite* ssi = &ss[i];
88 auto currentSite = ssi->siteP;
89
90 bool siteExists = SymbolListTools::areSymbolListsIdentical(*currentSite, *previousSite);
91 if (siteExists)
92 {
93 weights_[currentPos]++;
94 delete currentSite;
95 }
96 else
97 {
98 sites_.push_back(shared_ptr<const CoreSiteInterface>(currentSite));
99 weights_.push_back(1);
100 currentPos++;
101 previousSite = currentSite;
102 }
103 indices_[Eigen::Index(ssi->originalPosition)] = currentPos;
104 }
105 }
106}
107
108/******************************************************************************/
109
110unique_ptr<AlignmentDataInterface> SitePatterns::getSites() const
111{
112 if (sites_.size() == 0)
113 throw Exception("SitePatterns::getSites : empty set.");
114
115 unique_ptr<AlignmentDataInterface> sites;
116
117 if (dynamic_pointer_cast<const Site>(sites_[0]))
118 {
119 // Copy the sites
120 vector<unique_ptr<Site>> vSites;
121 for (auto& s : sites_)
122 {
123 auto ptr = unique_ptr<Site>(dynamic_cast<Site*>(s->clone()));
124 vSites.push_back(std::move(ptr));
125 }
126 sites.reset(new VectorSiteContainer(vSites, alpha_));
127 sites->setSequenceNames(names_, true);
128 return sites;
129 }
130
131 if (dynamic_pointer_cast<const ProbabilisticSite>(sites_[0]))
132 {
133 // Copy the sites
134 vector<unique_ptr<ProbabilisticSite>> vSites;
135 for (auto& s : sites_)
136 {
137 vSites.push_back(unique_ptr<ProbabilisticSite>(dynamic_cast<ProbabilisticSite*>(s->clone())));
138 }
139 sites.reset(new ProbabilisticVectorSiteContainer(vSites, alpha_));
140 sites->setSequenceNames(names_, true);
141 return sites;
142 }
143
144 throw Exception("SitePatterns::getSites(). Unsupported site type.");
145}
146
147/******************************************************************************/
CoreSiteInterface * clone() const override=0
virtual void deleteElement(size_t pos)=0
virtual std::string toString() const=0
Class used for site pattern sorting.
Definition: SitePatterns.h:44
const CoreSiteInterface * siteP
Definition: SitePatterns.h:47
void init_(const AlignmentDataInterface &sequences, std::vector< std::string > names={})
IndicesType indices_
Definition: SitePatterns.h:70
std::vector< std::shared_ptr< const CoreSiteInterface > > sites_
Definition: SitePatterns.h:68
std::vector< unsigned int > weights_
Definition: SitePatterns.h:69
std::vector< std::string > names_
Definition: SitePatterns.h:67
std::shared_ptr< const Alphabet > alpha_
Definition: SitePatterns.h:71
std::unique_ptr< AlignmentDataInterface > getSites() const
static bool areSymbolListsIdentical(const IntSymbolListInterface &list1, const IntSymbolListInterface &list2)
virtual const CoreSiteInterface & site(size_t siteIndex) const=0
virtual size_t getNumberOfSites() const=0
virtual std::vector< std::string > getSequenceNames() const =0
virtual size_t getNumberOfSequences() const =0
virtual size_t getSequencePosition(const std::string &sequenceKey) const =0
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
Defines the basic types of data flow nodes.
TemplateVectorSiteContainer< ProbabilisticSite, ProbabilisticSequence > ProbabilisticVectorSiteContainer
TemplateVectorSiteContainer< Site, Sequence > VectorSiteContainer