bpp-popgen  3.0.0
PolymorphismSequenceContainerTools.h
Go to the documentation of this file.
1 //
2 // File: PolymorphismSequenceContainerTools.h
3 // Authors: Eric Bazin
4 // Sylvain Gaillard
5 // Created on: Thursday July 29 2004
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for population genetics analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
41 #ifndef _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
42 #define _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
43 
46 
47 // from bpp-seq
49 #include <Bpp/Seq/Io/Mase.h>
50 #include <Bpp/Seq/Io/MaseTools.h>
54 #include <Bpp/Seq/SiteTools.h>
55 
56 // from STL
57 #include <string>
58 
59 // From Local
61 #include "GeneralExceptions.h"
62 
63 namespace bpp
64 {
72 {
73 public:
74  // Class destructor:
76 
77  /*******************************************************************************/
78 
79 public:
89  static PolymorphismSequenceContainer* read(const std::string& path, const Alphabet* alpha);
90 
99 
108 
117  static PolymorphismSequenceContainer* extractGroup(const PolymorphismSequenceContainer& psc, size_t group_id);
118 
127 
135  static PolymorphismSequenceContainer* sample(const PolymorphismSequenceContainer& psc, size_t n, bool replace = true);
136 
143 
152  static size_t getNumberOfNonGapSites(const PolymorphismSequenceContainer& psc, bool ingroup);
153 
163  static size_t getNumberOfCompleteSites(const PolymorphismSequenceContainer& psc, bool ingroup);
164 
165 
172 
173 
180 
191  static PolymorphismSequenceContainer* getSelectedSites(const PolymorphismSequenceContainer& psc, const std::string& setName, bool phase);
192 
201  static PolymorphismSequenceContainer* getNonCodingSites(const PolymorphismSequenceContainer& psc, const std::string& setName);
202 
214  static PolymorphismSequenceContainer* getOnePosition(const PolymorphismSequenceContainer& psc, const std::string& setName, size_t pos);
215 
225  static PolymorphismSequenceContainer* getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const GeneticCode* gCode);
226 
233  static PolymorphismSequenceContainer* get5Prime(const PolymorphismSequenceContainer& psc, const std::string& setName);
234 
242  static PolymorphismSequenceContainer* get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const GeneticCode* gCode);
243 
249  static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer& psc);
250 
260 
270 
271 };
272 } // end of namespace bpp;
273 
274 #endif // _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
275 
Utilitary function to manipulate PolymorphismSequenceContainer.
static PolymorphismSequenceContainer * get5Prime(const PolymorphismSequenceContainer &psc, const std::string &setName)
Retrieve 5' sites.
static PolymorphismSequenceContainer * getNonCodingSites(const PolymorphismSequenceContainer &psc, const std::string &setName)
Retrieve non-coding sites defined in the mase file header.
static PolymorphismSequenceContainer * getSitesWithoutGaps(const PolymorphismSequenceContainer &psc)
Retrieves sites without gaps from PolymorphismSequenceContainer.
static PolymorphismSequenceContainer * extractIngroup(const PolymorphismSequenceContainer &psc)
Extract ingroup sequences from a PolymorphismSequenceContainer and create a new one.
static PolymorphismSequenceContainer * getSelectedSequences(const PolymorphismSequenceContainer &psc, const SequenceSelection &ss)
Extract selected sequences.
static PolymorphismSequenceContainer * extractOutgroup(const PolymorphismSequenceContainer &psc)
Extract outgroup sequences from a PolymorphismSequenceContainer and create a new one.
static PolymorphismSequenceContainer * extractGroup(const PolymorphismSequenceContainer &psc, size_t group_id)
Extract a special group from the PolymorphismSequenceContainer.
static size_t getNumberOfNonGapSites(const PolymorphismSequenceContainer &psc, bool ingroup)
Return number of sites without gaps in a PolymorphismSequenceContainer.
static PolymorphismSequenceContainer * getIntrons(const PolymorphismSequenceContainer &psc, const std::string &setName, const GeneticCode *gCode)
Retrieve intron sites.
static PolymorphismSequenceContainer * read(const std::string &path, const Alphabet *alpha)
Read a Mase+ file and return a PolymorphismSequenceContainer. Toggle Sequence when selection tag begi...
static size_t getNumberOfCompleteSites(const PolymorphismSequenceContainer &psc, bool ingroup)
Return number of completely resolved sites in a PolymorphismSequenceContainer.
static PolymorphismSequenceContainer * excludeFlankingGap(const PolymorphismSequenceContainer &psc)
exclude flanking sites with gap but keep gap sites within the alignment
static PolymorphismSequenceContainer * get3Prime(const PolymorphismSequenceContainer &psc, const std::string &setName, const GeneticCode *gCode)
Retrieve 3' sites.
static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer &psc)
Get the species name of the ingroup.
static PolymorphismSequenceContainer * getOnePosition(const PolymorphismSequenceContainer &psc, const std::string &setName, size_t pos)
Retrieve sites at one codon position (1,2,3)
static PolymorphismSequenceContainer * getSelectedSites(const PolymorphismSequenceContainer &psc, const std::string &setName, bool phase)
Get a PolymorphismSequenceContainer corresponding to a site selection annotated in the mase comments.
static PolymorphismSequenceContainer * sample(const PolymorphismSequenceContainer &psc, size_t n, bool replace=true)
Get a random set of sequences.
static PolymorphismSequenceContainer * getCompleteSites(const PolymorphismSequenceContainer &psc)
Retrieves complete sites from a PolymorphismSequenceContainer.
static PolymorphismSequenceContainer * getNonSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gCode)
Retrieve non-synonymous codon sites.
static PolymorphismSequenceContainer * getSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gCode)
Retrieve synonymous codon sites.
The PolymorphismSequenceContainer class.
std::vector< size_t > SequenceSelection