bpp-seq3  3.0.0
PhredPhd.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include <Bpp/Text/TextTools.h>
7 
8 #include "PhredPhd.h"
9 
10 using namespace bpp;
11 
12 /******************************************************************************/
13 
14 // PhredPhd::PhredPhd() {}
15 
16 /******************************************************************************/
17 
18 bool PhredPhd::nextSequence(std::istream& input, Sequence& seq) const
19 {
20  std::vector<size_t> pos;
21  return nextSequence(input, seq, pos);
22 }
23 
24 /******************************************************************************/
25 
26 bool PhredPhd::nextSequence(std::istream& input, Sequence& seq, std::vector<size_t>& pos) const
27 {
28  if (!input)
29  {
30  throw IOException ("PhredPhd::read: fail to open stream");
31  }
32 
33  bool flag = false;
34  std::string name, sequence = ""; // Initialization
35  std::vector<int> q;
36  std::vector<size_t> p;
37 
38  flag = parseFile_(input, name, sequence, q, p);
39  // Sequence creation
40  if (name == "")
41  throw Exception("PhredPhd::read: sequence without name!");
42  seq.setName(name);
43  seq.setContent(sequence);
44  try
45  {
46  SequenceWithQuality& sq = dynamic_cast<SequenceWithQuality&>(seq);
47  sq.setQualities(q);
48  }
49  catch (...)
50  {}
51  return flag;
52 }
53 
54 /******************************************************************************/
55 
56 bool PhredPhd::parseFile_(std::istream& input, std::string& name, std::string& sequence, std::vector<int>& qual, std::vector<size_t>& pos) const
57 {
58  bool readSeqFlag = false;
59  std::string temp;
60  // Read sequence info
61  // Main loop : for all lines
62  while (!input.eof())
63  {
64  std::getline(input, temp, '\n'); // Copy current line in temporary string
65  StringTokenizer st(temp, " ");
66  if (st.hasMoreToken())
67  {
68  if (st.getToken(0) == "BEGIN_SEQUENCE")
69  {
70  name = st.getToken(1);
71  }
72  std::string flag = st.getToken(0);
73  while (flag != "END_SEQUENCE" && !input.eof())
74  {
75  getline(input, temp, '\n');
76  StringTokenizer st2(temp, " ");
77  if (st2.hasMoreToken())
78  {
79  flag = st2.getToken(0);
80  }
81  if (flag == "BEGIN_DNA")
82  {
83  readSeqFlag = parseDNA_(input, sequence, qual, pos);
84  break; // End the whole loop after parsing DNA
85  }
86  }
87  }
88  }
89  return readSeqFlag;
90 }
91 
92 /******************************************************************************/
93 
94 bool PhredPhd::parseDNA_(std::istream& input, std::string& sequence, std::vector<int>& qual, std::vector<size_t>& pos) const
95 {
96  bool readSeqFlag = false;
97  std::string line_buffer;
98  std::string flag;
99  sequence.clear();
100  qual.clear();
101  pos.clear();
102  while (flag != "END_DNA" && !input.eof())
103  {
104  std::getline(input, line_buffer, '\n');
105  StringTokenizer st(line_buffer, " ");
106  if (st.hasMoreToken())
107  {
108  flag = TextTools::toUpper(st.getToken(0));
109  if (st.numberOfRemainingTokens() == 3)
110  {
111  sequence += flag;
112  qual.push_back(TextTools::toInt(st.getToken(1)));
113  pos.push_back(TextTools::to<size_t>(st.getToken(2)));
114  readSeqFlag = true;
115  }
116  }
117  }
118  return readSeqFlag;
119 }
120 
121 /******************************************************************************/
void setName(const std::string &name) override
Set the name of this sequence.
Definition: CoreSequence.h:172
bool nextSequence(std::istream &input, Sequence &seq) const
Definition: PhredPhd.cpp:18
bool parseDNA_(std::istream &input, std::string &sequence, std::vector< int > &qual, std::vector< size_t > &pos) const
Parse the DNA part of the file.
Definition: PhredPhd.cpp:94
bool parseFile_(std::istream &input, std::string &name, std::string &sequence, std::vector< int > &qual, std::vector< size_t > &pos) const
Global file parser.
Definition: PhredPhd.cpp:56
A SequenceWithAnnotation class with quality scores attached.
void setQualities(const std::vector< int > &quality)
Set the whole quality scores.
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
void setContent(const std::string &sequence) override
Set the whole content of the sequence.
Definition: Sequence.cpp:20
size_t numberOfRemainingTokens() const
bool hasMoreToken() const
const std::string & getToken(size_t pos) const
int toInt(const std::string &s, char scientificNotation='e')
std::string toUpper(const std::string &s)
This alphabet is used to deal NumericAlphabet.