bpp-popgen3  3.0.0
DataSet.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef _DATASET_H_
6 #define _DATASET_H_
7 
8 // From the STL
9 #include <algorithm>
10 #include <vector>
11 #include <map>
12 #include <string>
13 
14 #include <Bpp/Exceptions.h>
15 #include <Bpp/Graphics/Point2D.h>
16 #include <Bpp/Utils/MapTools.h>
17 
18 #include "Group.h"
19 #include "Individual.h"
20 #include "Locality.h"
21 #include "../GeneralExceptions.h"
22 #include "AnalyzedLoci.h"
23 #include "../PolymorphismMultiGContainer.h"
24 #include "../PolymorphismSequenceContainer.h"
25 
26 namespace bpp
27 {
36 class DataSet
37 {
38 private:
39  std::unique_ptr<AnalyzedLoci> analyzedLoci_;
40  std::shared_ptr<const Alphabet> sequenceAlphabet_;
41  std::vector<std::shared_ptr<Locality<double>>> localities_; // Localities can be shared
42  std::vector<std::unique_ptr<Group>> groups_;
43 
44 public:
45  // Constructor and destructor
49  DataSet() :
50  analyzedLoci_(nullptr),
51  sequenceAlphabet_(nullptr),
52  localities_(),
53  groups_()
54  {}
55 
56 
60  virtual ~DataSet() = default;
61 
65  DataSet(const DataSet& ds);
66 
67  DataSet& operator=(const DataSet& ds);
68 
69 public:
70  // Methodes
71 // ** Locality manipulation ***************************************************/
78  void addLocality(const Locality<double>& locality);
79 
87  size_t getLocalityPosition(const std::string& name) const;
88 
96  std::shared_ptr<const Locality<double>> getLocalityAtPosition(size_t localityPosition) const;
97 
105  const Locality<double>& localityAtPosition(size_t localityPosition) const;
106 
112  std::shared_ptr<const Locality<double>> getLocalityByName(const std::string& name) const;
113 
119  const Locality<double>& localityByName(const std::string& name) const;
120 
126  void deleteLocalityAtPosition(size_t localityPosition);
127 
133  void deleteLocalityByName(const std::string& name);
134 
138  size_t getNumberOfLocalities() const { return localities_.size(); }
139 
143  bool hasLocality() const { return getNumberOfLocalities() > 0; }
144 
145  // ** Group manipulation ******************************************************/
153  void addGroup(const Group& group);
154 
158  void addEmptyGroup(size_t group_id);
159 
163  const Group& getGroupById(size_t group_id) const;
164 
170  size_t getGroupPosition(size_t group_id) const;
171 
177  std::string getGroupName(size_t group_id) const;
183  void setGroupName(size_t group_id, const std::string& group_name) const;
184 
190  const Group& getGroupAtPosition(size_t groupPosition) const;
191 
197  void deleteGroupAtPosition(size_t groupPosition);
198 
202  size_t getNumberOfGroups() const;
203 
210  void mergeTwoGroups(size_t source_id, size_t target_id);
211 
222  void mergeGroups(std::vector<size_t>& group_ids);
223 
232  void splitGroup(size_t group_id, std::vector<size_t> individuals_selection);
233 
234  // ** Individuals manipulation ************************************************/
241  void addIndividualToGroup(size_t groupPosition, const Individual& individual);
242 
249  void addEmptyIndividualToGroup(size_t groupPosition, const std::string& individual_id);
250 
256  size_t getNumberOfIndividualsInGroup(size_t groupPosition) const;
257 
264  size_t getIndividualPositionInGroup(size_t groupPosition, const std::string& individual_id) const;
265 
272  const Individual& getIndividualAtPositionFromGroup(size_t groupPosition, size_t individualPosition) const;
273 
280  const Individual& getIndividualByIdFromGroup(size_t groupPosition, const std::string& individualId) const;
281 
288  void deleteIndividualAtPositionFromGroup(size_t groupPosition, size_t individualPosition);
289 
296  void deleteIndividualByIdFromGroup(size_t groupPosition, const std::string& individualId);
297 
304  void setIndividualSexInGroup(size_t groupPosition, size_t individualPosition, const unsigned short sex);
305 
312  unsigned short getIndividualSexInGroup(size_t groupPosition, size_t individualPosition) const;
313 
321  size_t groupPosition,
322  size_t individualPosition,
323  const Date& date);
324 
333  size_t groupPosition,
334  size_t individualPosition) const;
335 
342  void setIndividualCoordInGroup(size_t groupPosition, size_t individualPosition, const Point2D<double>& coord);
343 
352  size_t groupPosition,
353  size_t individualPosition) const;
354 
363  size_t groupPosition,
364  size_t individualPosition,
365  const std::string& localityName);
366 
374  std::shared_ptr<const Locality<double>> getIndividualLocalityInGroup(
375  size_t groupPosition,
376  size_t individualPosition) const;
377 
387  size_t groupPosition,
388  size_t individualPosition,
389  size_t sequencePosition,
390  std::unique_ptr<Sequence>& sequence);
391 
401  const Sequence& getIndividualSequenceByNameInGroup(size_t groupPosition, size_t individualPosition, const std::string& sequenceName) const;
402 
411  const Sequence& getIndividualSequenceAtPositionInGroup(size_t groupPosition, size_t individualPosition, size_t sequencePosition) const;
412 
421  void deleteIndividualSequenceByNameInGroup(size_t groupPosition, size_t individualPosition, const std::string& sequenceName);
422 
431  void deleteIndividualSequenceAtPositionInGroup(size_t groupPosition, size_t individualPosition, size_t sequencePosition);
432 
440  std::vector<std::string> getIndividualSequencesNamesInGroup(size_t groupPosition, size_t individualPosition) const;
441 
450  size_t getIndividualSequencePositionInGroup(size_t groupPosition, size_t individualPosition, const std::string& sequenceName) const;
451 
459  size_t getIndividualNumberOfSequencesInGroup(size_t groupPosition, size_t individualPosition) const;
460 
467  void setIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition, const MultilocusGenotype& genotype);
468 
478  void initIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition);
479 
486  void deleteIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition);
487 
497  size_t groupPosition,
498  size_t individualPosition,
499  size_t locusPosition,
500  const MonolocusGenotypeInterface& monogen);
501 
512  size_t groupPosition,
513  size_t individualPosition,
514  size_t locusPosition,
515  const std::vector<size_t> alleleKeys);
516 
527  size_t groupPosition,
528  size_t individualPosition,
529  size_t locusPosition,
530  const std::vector<std::string> alleleId);
531 
542  size_t groupPosition,
543  size_t individualPosition,
544  size_t locusPosition) const;
545 
549  void setAlphabet(std::shared_ptr<const Alphabet> alpha) { sequenceAlphabet_ = alpha; }
550 
554  void setAlphabet(const std::string& alphaType);
555 
561  std::shared_ptr<const Alphabet> getAlphabet() const
562  {
563  if (!sequenceAlphabet_)
564  throw NullPointerException("DataSet::getAlphabet: no sequence data.");
565  return sequenceAlphabet_;
566  }
567 
573  const Alphabet& alphabet() const
574  {
575  if (!sequenceAlphabet_)
576  throw NullPointerException("DataSet::getAlphabet: no sequence data.");
577  return *sequenceAlphabet_;
578  }
579 
585  std::string getAlphabetType() const
586  {
587  if (!sequenceAlphabet_)
588  throw NullPointerException("DataSet::getAlphabet: no sequence data.");
589  return sequenceAlphabet_->getAlphabetType();
590  }
591 
592 
593  // ** AnalyzedLoci manipulation ***********************************************/
600  {
602  }
603 
609  void initAnalyzedLoci(size_t numberOfLoci)
610  {
611  if (analyzedLoci_)
612  throw Exception("DataSet::initAnalyzedLoci: analyzedLoci_ already initialized.");
613  analyzedLoci_ = std::make_unique<AnalyzedLoci>(numberOfLoci);
614  }
615 
621  const AnalyzedLoci& analyzedLoci() const
622  {
623  if (analyzedLoci_)
624  return *analyzedLoci_;
625  throw NullPointerException("DataSet::getAnalyzedLoci: no loci initialized.");
626  }
627 
632  {
633  if (analyzedLoci_) analyzedLoci_.reset(nullptr);
634  }
635 
642  void setLocusInfo(size_t locus_position, const LocusInfo& locus);
643 
647  const LocusInfo& getLocusInfoByName(const std::string& locus_name) const;
648 
652  const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const;
653 
657  void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele);
658 
662  void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele);
663 
667  size_t getNumberOfLoci() const;
668 
672  size_t getPloidyByLocusName(const std::string& locus_name) const;
673 
677  size_t getPloidyByLocusPosition(size_t locus_position) const;
678 
682  std::unique_ptr<PolymorphismMultiGContainer> getPolymorphismMultiGContainer() const;
683 
689  std::unique_ptr<PolymorphismMultiGContainer> getPolymorphismMultiGContainer(const std::map<size_t, std::vector<size_t>>& selection) const;
690 
698  std::unique_ptr<PolymorphismSequenceContainer> getPolymorphismSequenceContainer(
699  const std::map<size_t, std::vector<size_t>>& selection,
700  size_t sequence_position) const;
701 
702  // ** General tests **********************************************************/
706  bool hasSequenceData() const { return sequenceAlphabet_ != nullptr; }
707 
711  bool hasAlleleicData() const { return analyzedLoci_ != nullptr; }
712 };
713 } // end of namespace bpp;
714 
715 #endif // _DATASET_H_
The AlleleInfo interface.
Definition: AlleleInfo.h:25
The AnalyzedLoci class.
Definition: AnalyzedLoci.h:31
AnalyzedLoci * clone() const
Definition: AnalyzedLoci.h:71
The DataSet class.
Definition: DataSet.h:37
const Group & getGroupAtPosition(size_t groupPosition) const
Get a group by position.
Definition: DataSet.cpp:233
void deleteLocalityAtPosition(size_t localityPosition)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:130
size_t getIndividualSequencePositionInGroup(size_t groupPosition, size_t individualPosition, const std::string &sequenceName) const
Get the position of a Sequence in an Individual of a Group.
Definition: DataSet.cpp:768
size_t getNumberOfGroups() const
Get the number of Groups.
Definition: DataSet.cpp:251
std::unique_ptr< PolymorphismSequenceContainer > getPolymorphismSequenceContainer(const std::map< size_t, std::vector< size_t >> &selection, size_t sequence_position) const
Get a PolymorphismSequenceContainer from a selection of groups and individuals.
Definition: DataSet.cpp:1191
const Group & getGroupById(size_t group_id) const
Get a group by identifier.
Definition: DataSet.cpp:178
std::string getAlphabetType() const
Get the alphabet type as a string.
Definition: DataSet.h:585
void deleteIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition)
Delete the MultilocusGenotype of an Individual from a Group.
Definition: DataSet.cpp:856
bool hasAlleleicData() const
Tell if there is alelelic data.
Definition: DataSet.h:711
std::unique_ptr< PolymorphismMultiGContainer > getPolymorphismMultiGContainer() const
Get a PolymorphismMultiGContainer with all allelic data of the DataSet.
Definition: DataSet.cpp:1131
void addAlleleInfoByLocusName(const std::string &locus_name, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1050
void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition, const std::vector< size_t > alleleKeys)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:900
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1070
void initAnalyzedLoci(size_t numberOfLoci)
Initialize the AnalyzedLoci for number of loci.
Definition: DataSet.h:609
std::shared_ptr< const Locality< double > > getIndividualLocalityInGroup(size_t groupPosition, size_t individualPosition) const
Get the Locality of an Individual in a Group.
Definition: DataSet.cpp:603
void deleteGroupAtPosition(size_t groupPosition)
Delete a Group from the DataSet.
Definition: DataSet.cpp:242
size_t getPloidyByLocusPosition(size_t locus_position) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1115
void mergeTwoGroups(size_t source_id, size_t target_id)
Merge two groups.
Definition: DataSet.cpp:258
const Sequence & getIndividualSequenceByNameInGroup(size_t groupPosition, size_t individualPosition, const std::string &sequenceName) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:652
size_t getIndividualPositionInGroup(size_t groupPosition, const std::string &individual_id) const
Get the position of an Individual in a Group.
Definition: DataSet.cpp:399
void mergeGroups(std::vector< size_t > &group_ids)
Merge some Groups in one.
Definition: DataSet.cpp:289
unsigned short getIndividualSexInGroup(size_t groupPosition, size_t individualPosition) const
Get the sex of an Individual in a Group.
Definition: DataSet.cpp:495
std::shared_ptr< const Alphabet > getAlphabet() const
Get a pointer toward the alphabet if there is sequence data.
Definition: DataSet.h:561
void deleteLocalityByName(const std::string &name)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:139
const Individual & getIndividualAtPositionFromGroup(size_t groupPosition, size_t individualPosition) const
Get an Individual from a Group.
Definition: DataSet.cpp:415
const AnalyzedLoci & analyzedLoci() const
Get the AnalyzedLoci if there is one.
Definition: DataSet.h:621
void setIndividualCoordInGroup(size_t groupPosition, size_t individualPosition, const Point2D< double > &coord)
Set the coordinates of an Individual in a Group.
Definition: DataSet.cpp:547
void setGroupName(size_t group_id, const std::string &group_name) const
set the name of a Group.
Definition: DataSet.cpp:206
std::vector< std::unique_ptr< Group > > groups_
Definition: DataSet.h:42
void deleteIndividualSequenceAtPositionInGroup(size_t groupPosition, size_t individualPosition, size_t sequencePosition)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:724
void deleteIndividualAtPositionFromGroup(size_t groupPosition, size_t individualPosition)
Delete an Individual from a group.
Definition: DataSet.cpp:447
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
Definition: DataSet.h:599
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:221
const Individual & getIndividualByIdFromGroup(size_t groupPosition, const std::string &individualId) const
Get an Individual from a Group.
Definition: DataSet.cpp:431
std::string getGroupName(size_t group_id) const
Get the name of a Group. If the name is an empty string it just returns the group_id.
Definition: DataSet.cpp:193
DataSet & operator=(const DataSet &ds)
Definition: DataSet.cpp:33
void addGroup(const Group &group)
Add a Group to the DataSet.
Definition: DataSet.cpp:154
const MonolocusGenotypeInterface & getIndividualMonolocusGenotypeInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition) const
Get a MonolocusGenotype from an Individual of a Group.
Definition: DataSet.cpp:957
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
Definition: DataSet.cpp:998
size_t getNumberOfIndividualsInGroup(size_t groupPosition) const
Get the number of Individuals in a Group.
Definition: DataSet.cpp:390
void addIndividualSequenceInGroup(size_t groupPosition, size_t individualPosition, size_t sequencePosition, std::unique_ptr< Sequence > &sequence)
Add a Sequence to an Individual in a Group.
Definition: DataSet.cpp:623
void deleteIndividualSequenceByNameInGroup(size_t groupPosition, size_t individualPosition, const std::string &sequenceName)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:700
void setIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition, const MultilocusGenotype &genotype)
Set the MultilocusGenotype of an Individual in a Group.
Definition: DataSet.cpp:812
size_t getIndividualNumberOfSequencesInGroup(size_t groupPosition, size_t individualPosition) const
Get the number of Sequences in an Individual of a Group.
Definition: DataSet.cpp:792
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
Definition: DataSet.h:706
void setIndividualSexInGroup(size_t groupPosition, size_t individualPosition, const unsigned short sex)
Set the sex of an Individual in a Group.
Definition: DataSet.cpp:479
void setAlphabet(std::shared_ptr< const Alphabet > alpha)
Set the alphabet of the AnalyzedSequences.
Definition: DataSet.h:549
void deleteAnalyzedLoci()
Delete the AnalyzedLoci.
Definition: DataSet.h:631
std::vector< std::shared_ptr< Locality< double > > > localities_
Definition: DataSet.h:41
void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition, const std::vector< std::string > alleleId)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:928
const Date & individualDateInGroup(size_t groupPosition, size_t individualPosition) const
Get the Date of an Individual in a Group.
Definition: DataSet.cpp:527
const Point2D< double > & individualCoordInGroup(size_t groupPosition, size_t individualPosition) const
Get the coordinate of an Individual in a Group.
Definition: DataSet.cpp:563
void deleteIndividualByIdFromGroup(size_t groupPosition, const std::string &individualId)
Delete an Individual from a group.
Definition: DataSet.cpp:463
const LocusInfo & getLocusInfoByName(const std::string &locus_name) const
Get a LocusInfo by its name.
Definition: DataSet.cpp:1014
size_t getPloidyByLocusName(const std::string &locus_name) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1099
const Locality< double > & localityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:116
size_t getLocalityPosition(const std::string &name) const
Get the position of a locality in the container.
Definition: DataSet.cpp:72
const Locality< double > & localityAtPosition(size_t localityPosition) const
Get a Locality by localityPosition.
Definition: DataSet.cpp:93
virtual ~DataSet()=default
Destroy a DataSet.
size_t getNumberOfLocalities() const
Get the number of Localities.
Definition: DataSet.h:138
void addEmptyIndividualToGroup(size_t groupPosition, const std::string &individual_id)
Add an empty Individual to a Group.
Definition: DataSet.cpp:374
std::shared_ptr< const Locality< double > > getLocalityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:102
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
Definition: DataSet.cpp:1030
size_t getNumberOfLoci() const
Get the number of loci.
Definition: DataSet.cpp:1090
void setIndividualLocalityInGroupByName(size_t groupPosition, size_t individualPosition, const std::string &localityName)
Set the Locality of an Individual in a Group.
Definition: DataSet.cpp:583
void addIndividualToGroup(size_t groupPosition, const Individual &individual)
Add an Individual to a Group.
Definition: DataSet.cpp:356
std::shared_ptr< const Alphabet > sequenceAlphabet_
Definition: DataSet.h:40
std::vector< std::string > getIndividualSequencesNamesInGroup(size_t groupPosition, size_t individualPosition) const
Get the Sequences' names from an Individual of a Group.
Definition: DataSet.cpp:748
void setIndividualMonolocusGenotypeInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition, const MonolocusGenotypeInterface &monogen)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:872
const Sequence & getIndividualSequenceAtPositionInGroup(size_t groupPosition, size_t individualPosition, size_t sequencePosition) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:676
std::unique_ptr< AnalyzedLoci > analyzedLoci_
Definition: DataSet.h:39
void initIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition)
Initialize the genotype of an Individual in a Group.
Definition: DataSet.cpp:828
bool hasLocality() const
Tell if there is at least one locality.
Definition: DataSet.h:143
std::shared_ptr< const Locality< double > > getLocalityAtPosition(size_t localityPosition) const
Get a Locality by localityPosition.
Definition: DataSet.cpp:84
void setIndividualDateInGroup(size_t groupPosition, size_t individualPosition, const Date &date)
Set the Date of an Individual in a Group.
Definition: DataSet.cpp:511
const Alphabet & alphabet() const
Get a reference toward the alphabet if there is sequence data.
Definition: DataSet.h:573
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:166
void splitGroup(size_t group_id, std::vector< size_t > individuals_selection)
Split a group in two.
Definition: DataSet.cpp:320
DataSet()
Build a new void DataSet.
Definition: DataSet.h:49
void addLocality(const Locality< double > &locality)
Add a locality to the DataSet.
Definition: DataSet.cpp:60
The Date class.
Definition: Date.h:21
The Group class.
Definition: Group.h:36
The Individual class.
Definition: Individual.h:40
The Locality class.
Definition: Locality.h:25
The LocusInfo class.
Definition: LocusInfo.h:31
The MonolocusGenotype virtual class.
The MultilocusGenotype class.