bpp-popgen3  3.0.0
PolymorphismSequenceContainerTools.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
6 #define _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
7 
10 
11 // from bpp-seq
13 #include <Bpp/Seq/Io/Mase.h>
14 #include <Bpp/Seq/Io/MaseTools.h>
18 #include <Bpp/Seq/SiteTools.h>
19 
20 // from STL
21 #include <string>
22 
23 // From Local
25 #include "GeneralExceptions.h"
26 
27 namespace bpp
28 {
32 
39 {
40 public:
41  // Class destructor:
43 
44  /*******************************************************************************/
45 
46 public:
56  static std::unique_ptr<PolymorphismSequenceContainer> read(
57  const std::string& path,
58  std::shared_ptr<const Alphabet> alpha);
59 
67  static std::unique_ptr<PolymorphismSequenceContainer> extractIngroup(
69 
77  static std::unique_ptr<PolymorphismSequenceContainer> extractOutgroup(
79 
88  static std::unique_ptr<PolymorphismSequenceContainer> extractGroup(
90  size_t groupId);
91 
98  static std::unique_ptr<PolymorphismSequenceContainer> getSelectedSequences(
100  const SequenceSelection& ss);
101 
109  static std::unique_ptr<PolymorphismSequenceContainer> sample(
111  size_t n,
112  bool replace = true);
113 
119  static std::unique_ptr<PolymorphismSequenceContainer> getSitesWithoutGaps(
120  const PolymorphismSequenceContainer& psc);
121 
130  static size_t getNumberOfNonGapSites(
132  bool ingroup);
133 
143  static size_t getNumberOfCompleteSites(
145  bool ingroup);
146 
147 
153  static std::unique_ptr<PolymorphismSequenceContainer> getCompleteSites(
154  const PolymorphismSequenceContainer& psc);
155 
156 
162  static std::unique_ptr<PolymorphismSequenceContainer> excludeFlankingGap(
163  const PolymorphismSequenceContainer& psc);
164 
175  static std::unique_ptr<PolymorphismSequenceContainer> getSelectedSites(
177  const std::string& setName, bool phase);
178 
187  static std::unique_ptr<PolymorphismSequenceContainer> getNonCodingSites(
189  const std::string& setName);
190 
202  static std::unique_ptr<PolymorphismSequenceContainer> getOnePosition(
204  const std::string& setName,
205  size_t pos);
206 
216  static std::unique_ptr<PolymorphismSequenceContainer> getIntrons(
218  const std::string& setName,
219  const GeneticCode& gCode);
220 
227  static std::unique_ptr<PolymorphismSequenceContainer> get5Prime(
229  const std::string& setName);
230 
238  static std::unique_ptr<PolymorphismSequenceContainer> get3Prime(
240  const std::string& setName,
241  const GeneticCode& gCode);
242 
248  static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer& psc);
249 
258  static std::unique_ptr<PolymorphismSequenceContainer> getSynonymousSites(
260  const GeneticCode& gCode);
261 
270  static std::unique_ptr<PolymorphismSequenceContainer> getNonSynonymousSites(
272  const GeneticCode& gCode);
273 };
274 } // end of namespace bpp;
275 
276 #endif // _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
Utilitary function to manipulate PolymorphismSequenceContainer.
static std::unique_ptr< PolymorphismSequenceContainer > getCompleteSites(const PolymorphismSequenceContainer &psc)
Retrieves complete sites from a PolymorphismSequenceContainer.
static std::unique_ptr< PolymorphismSequenceContainer > getOnePosition(const PolymorphismSequenceContainer &psc, const std::string &setName, size_t pos)
Retrieve sites at one codon position (1,2,3)
static std::unique_ptr< PolymorphismSequenceContainer > getIntrons(const PolymorphismSequenceContainer &psc, const std::string &setName, const GeneticCode &gCode)
Retrieve intron sites.
static std::unique_ptr< PolymorphismSequenceContainer > getNonCodingSites(const PolymorphismSequenceContainer &psc, const std::string &setName)
Retrieve non-coding sites defined in the mase file header.
static std::unique_ptr< PolymorphismSequenceContainer > getSelectedSites(const PolymorphismSequenceContainer &psc, const std::string &setName, bool phase)
Get a PolymorphismSequenceContainer corresponding to a site selection annotated in the mase comments.
static std::unique_ptr< PolymorphismSequenceContainer > get5Prime(const PolymorphismSequenceContainer &psc, const std::string &setName)
Retrieve 5' sites.
static std::unique_ptr< PolymorphismSequenceContainer > extractIngroup(const PolymorphismSequenceContainer &psc)
Extract ingroup sequences from a PolymorphismSequenceContainer and create a new one.
static std::unique_ptr< PolymorphismSequenceContainer > sample(const PolymorphismSequenceContainer &psc, size_t n, bool replace=true)
Get a random set of sequences.
static std::unique_ptr< PolymorphismSequenceContainer > get3Prime(const PolymorphismSequenceContainer &psc, const std::string &setName, const GeneticCode &gCode)
Retrieve 3' sites.
static std::unique_ptr< PolymorphismSequenceContainer > excludeFlankingGap(const PolymorphismSequenceContainer &psc)
exclude flanking sites with gap but keep gap sites within the alignment
static std::unique_ptr< PolymorphismSequenceContainer > getNonSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gCode)
Retrieve non-synonymous codon sites.
static size_t getNumberOfNonGapSites(const PolymorphismSequenceContainer &psc, bool ingroup)
Return number of sites without gaps in a PolymorphismSequenceContainer.
static size_t getNumberOfCompleteSites(const PolymorphismSequenceContainer &psc, bool ingroup)
Return number of completely resolved sites in a PolymorphismSequenceContainer.
static std::unique_ptr< PolymorphismSequenceContainer > getSitesWithoutGaps(const PolymorphismSequenceContainer &psc)
Retrieves sites without gaps from PolymorphismSequenceContainer.
static std::unique_ptr< PolymorphismSequenceContainer > extractGroup(const PolymorphismSequenceContainer &psc, size_t groupId)
Extract a special group from the PolymorphismSequenceContainer.
static std::unique_ptr< PolymorphismSequenceContainer > getSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gCode)
Retrieve synonymous codon sites.
static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer &psc)
Get the species name of the ingroup.
static std::unique_ptr< PolymorphismSequenceContainer > read(const std::string &path, std::shared_ptr< const Alphabet > alpha)
Read a Mase+ file and return a PolymorphismSequenceContainer. Toggle Sequence when selection tag begi...
static std::unique_ptr< PolymorphismSequenceContainer > getSelectedSequences(const PolymorphismSequenceContainer &psc, const SequenceSelection &ss)
Extract selected sequences.
static std::unique_ptr< PolymorphismSequenceContainer > extractOutgroup(const PolymorphismSequenceContainer &psc)
Extract outgroup sequences from a PolymorphismSequenceContainer and create a new one.
The PolymorphismSequenceContainer class.
std::vector< size_t > SequenceSelection