55 if (blockBuffer_.size() == 0) {
58 MafBlock* block = iterator_->nextBlock();
62 vector< vector<int> > aln;
63 for (
size_t i = 0; i < species_.size(); ++i) {
69 if (aln.size() != species_.size()) {
70 blockBuffer_.push_back(block);
72 (*logstream_ <<
"QUAL CLEANER: block is missing quality score for at least one species and will therefore not be filtered.").endLine();
76 size_t nr = aln.
size();
85 for (i = 0; i < windowSize_; ++i) {
86 for (
size_t j = 0; j < nr; ++j) {
89 window_.push_back(col);
96 while (i + step_ < nc) {
101 double n =
static_cast<double>(aln.size() * windowSize_);
102 for (
size_t u = 0; u < window_.size(); ++u)
103 for (
size_t v = 0; v < window_[u].size(); ++v) {
104 mean += (window_[u][v] > 0 ?
static_cast<double>(window_[u][v]) : 0.);
105 if (window_[u][v] == -1) n--;
107 if (n > 0 && (mean / n) < minQual_) {
108 if (pos.size() == 0) {
109 pos.push_back(i - windowSize_);
112 if (i - windowSize_ <= pos[pos.size() - 1]) {
113 pos[pos.size() - 1] = i;
115 pos.push_back(i - windowSize_);
122 for (
size_t k = 0; k < step_; ++k) {
123 for (
size_t j = 0; j < nr; ++j) {
126 window_.push_back(col);
134 double n =
static_cast<double>(aln.size() * windowSize_);
135 for (
size_t u = 0; u < window_.size(); ++u)
136 for (
size_t v = 0; v < window_[u].size(); ++v) {
137 mean += (window_[u][v] > 0 ?
static_cast<double>(window_[u][v]) : 0.);
138 if (window_[u][v] == -1) n--;
140 if (n > 0 && (mean / n) < minQual_) {
141 if (pos.size() == 0) {
142 pos.push_back(i - windowSize_);
145 if (i - windowSize_ < pos[pos.size() - 1]) {
146 pos[pos.size() - 1] = i;
148 pos.push_back(i - windowSize_);
157 if (pos.size() == 0) {
158 blockBuffer_.push_back(block);
160 (*logstream_ <<
"QUAL CLEANER: block is clean and kept as is.").endLine();
162 }
else if (pos.size() == 2 && pos.front() == 0 && pos.back() == block->
getNumberOfSites()) {
165 (*logstream_ <<
"QUAL CLEANER: block was entirely removed. Tried to get the next one.").endLine();
169 (*logstream_ <<
"QUAL CLEANER: block with size "<< block->
getNumberOfSites() <<
" will be split into " << (pos.size() / 2 + 1) <<
" blocks.").endLine();
175 for (i = 0; i < pos.size(); i+=2) {
179 (*logstream_ <<
"QUAL CLEANER: removing region (" << pos[i] <<
", " << pos[i+1] <<
") from block.").endLine();
195 blockBuffer_.push_back(newBlock);
198 if (keepTrashedBlocks_) {
207 trashBuffer_.push_back(outBlock);
221 blockBuffer_.push_back(newBlock);
229 }
while (blockBuffer_.size() == 0);
232 MafBlock* block = blockBuffer_.front();
233 blockBuffer_.pop_front();
virtual size_t size() const=0
A synteny block data structure, the basic unit of a MAF alignement file.
unsigned int getPass() const
void setScore(double score)
void setPass(unsigned int pass)
size_t getNumberOfSequences() const
size_t getNumberOfSites() const
const MafSequence & getSequence(const std::string &name) const
void addSequence(const MafSequence &sequence)
const MafSequence & getSequenceForSpecies(const std::string &species) const
A sequence class which is used to store data from MAF files.
MafSequence * subSequence(size_t startAt, size_t length) const
Extract a sub-sequence.
MafBlock * analyseCurrentBlock_()
static const std::string QUALITY_SCORE
const std::vector< int > & getScores() const
virtual bool hasAnnotation(const std::string &type) const
virtual const SequenceAnnotation & getAnnotation(const std::string &type) const