42 #include "../Alphabet/CodonAlphabet.h"
43 #include "../SymbolListTools.h"
45 #include "SequenceContainerTools.h"
48 // From bpp-core:
49 #include <Bpp/Text/TextTools.h>
51 using namespace bpp;
53 // From the STL:
54 #include <iostream>
56 using namespace std;
58 /******************************************************************************/
61 {
63  for (size_t i = 0; i < size; ++i)
64  {
65  vsc->addSequence(BasicSequence(TextTools::toString(i), "", alphabet), false);
66  }
67  return vsc;
68 }
70 /******************************************************************************/
73  const Alphabet* alphabet,
74  const vector<string>& seqNames)
75 {
76  SequenceContainer* sc = createContainerOfSpecifiedSize(alphabet, seqNames.size());
77  sc->setSequenceNames(seqNames, true);
78  return sc;
79 }
81 /******************************************************************************/
84  const OrderedSequenceContainer& sequences,
85  const SequenceSelection& selection,
86  SequenceContainer& outputCont)
87 {
88  bool checkNames = outputCont.getNumberOfSequences() > 0;
89  for (size_t i = 0; i < selection.size(); i++)
90  {
91  outputCont.addSequence(sequences.getSequence(selection[i]), checkNames);
92  }
93 }
95 /******************************************************************************/
98  const SequenceContainer& sequences,
99  const std::vector<std::string>& selection,
100  SequenceContainer& outputCont, bool strict)
101 {
102  bool checkNames = outputCont.getNumberOfSequences() > 0;
103  for (size_t i = 0; i < selection.size(); i++)
104  {
105  if (strict)
106  {
107  outputCont.addSequence(sequences.getSequence(selection[i]), checkNames);
108  }
109  else
110  {
111  if (sequences.hasSequence(selection[i]))
112  outputCont.addSequence(sequences.getSequence(selection[i]), checkNames);
113  }
114  }
115 }
117 /******************************************************************************/
120  OrderedSequenceContainer& sequences,
121  const SequenceSelection& selection)
122 {
123  vector<string> names = sequences.getSequenceNames();
124  for (size_t i = 0; i < names.size(); i++)
125  {
126  // We need to do this because after removal the indices will not be the same!
127  // another solution would be to sort decreasingly the indices...
128  bool test = false;
129  for (size_t j = 0; j < selection.size() && !test; j++)
130  {
131  test = (selection[j] == i);
132  }
133  if (!test)
134  sequences.removeSequence(names[i]);
135  // WARNING: what if selection contains several times the same indice? ...
136  }
137 }
139 /******************************************************************************/
142 {
143  vector<string> seqNames = sequences.getSequenceNames();
144  if (seqNames.size() <= 1)
145  return true;
146  size_t length = sequences.getSequence(seqNames[0]).size();
147  for (size_t i = 1; i < seqNames.size(); i++)
148  {
149  if (sequences.getSequence(seqNames[i]).size() != length)
150  return false;
151  }
152  return true;
153 }
155 /******************************************************************************/
157 void SequenceContainerTools::getFrequencies(const SequencedValuesContainer& sequences, std::map<int, double>& f, double pseudoCount)
158 {
159  double n = 0;
161  const SequenceContainer* sc = dynamic_cast<const SequenceContainer*>(&sequences);
162  const ProbabilisticSequenceContainer* psc = dynamic_cast<const ProbabilisticSequenceContainer*>(&sequences);
164  for (const auto& name: sequences.getSequenceNames())
165  {
166  if (sc)
167  {
168  const Sequence& seq = sc->getSequence(name);
169  SymbolListTools::getCounts(seq, f, true);
170  n += static_cast<double>(seq.size());
171  }
172  else if (psc)
173  {
174  const ProbabilisticSequence& seq = psc->getSequence(name);
175  SymbolListTools::getCounts(seq, f, true);
176  n += static_cast<double>(seq.size());
177  }
178  else
179  throw Exception("SequenceContainerTools::getFrequencies : unknown SequenceContainer type.");
180  }
182  if (pseudoCount != 0)
183  {
184  const Alphabet* pA = sequences.getAlphabet();
185  for (int i = 0; i < static_cast<int>(pA->getSize()); i++)
186  {
187  f[i] += pseudoCount;
188  }
190  n += pseudoCount * static_cast<double>(pA->getSize());
191  }
193  for (auto& i : f)
194  {
195  i.second = i.second / n;
196  }
197 }
199 /******************************************************************************/
201 void SequenceContainerTools::getCounts(const SequenceContainer& sequences, std::map<int, int>& f)
202 {
203  for (const auto& name: sequences.getSequenceNames())
204  {
205  const Sequence& seq = sequences.getSequence(name);
206  for (size_t i = 0; i < seq.size(); i++)
207  {
208  f[seq[i]]++;
209  }
210  }
211 }
213 /******************************************************************************/
216 {
217  const CodonAlphabet* calpha = dynamic_cast<const CodonAlphabet*>(sequences.getAlphabet());
218  if (!calpha)
219  throw AlphabetException("SequenceContainerTools::getCodonPosition. Input sequences should be of type codon.");
221  for (const auto& name: sequences.getSequenceNames())
222  {
223  const Sequence& seq = sequences.getSequence(name);
224  vector<int> newseq(seq.size());
225  for (size_t i = 0; i < seq.size(); i++)
226  {
227  newseq[i] = calpha->getNPosition(seq[i], pos);
228  }
229  BasicSequence s(name, newseq, sequences.getComments(name), calpha->getNucleicAlphabet());
230  newcont->addSequence(s);
231  }
232  return newcont;
233 }
235 /******************************************************************************/
std::vector< size_t > SequenceSelection