55 if (blockBuffer_.size() == 0) {
58 MafBlock* block = iterator_->nextBlock();
62 vector< vector<bool> > aln;
63 for (
size_t i = 0; i < species_.size(); ++i) {
71 size_t nr = aln.
size();
80 for (i = 0; i < windowSize_; ++i) {
81 for (
size_t j = 0; j < nr; ++j) {
84 window_.push_back(col);
91 while (i + step_ < nc) {
96 for (
size_t u = 0; u < window_.size(); ++u)
97 for (
size_t v = 0; v < window_[u].size(); ++v)
98 if (window_[u][v]) sum++;
99 if (sum > maxMasked_) {
100 if (pos.size() == 0) {
101 pos.push_back(i - windowSize_);
104 if (i - windowSize_ <= pos[pos.size() - 1]) {
105 pos[pos.size() - 1] = i;
107 pos.push_back(i - windowSize_);
114 for (
size_t k = 0; k < step_; ++k) {
115 for (
size_t j = 0; j < nr; ++j) {
118 window_.push_back(col);
125 unsigned int sum = 0;
126 for (
size_t u = 0; u < window_.size(); ++u)
127 for (
size_t v = 0; v < window_[u].size(); ++v)
128 if (window_[u][v]) sum++;
129 if (sum > maxMasked_) {
130 if (pos.size() == 0) {
131 pos.push_back(i - windowSize_);
134 if (i - windowSize_ < pos[pos.size() - 1]) {
135 pos[pos.size() - 1] = i;
137 pos.push_back(i - windowSize_);
146 if (pos.size() == 0) {
147 blockBuffer_.push_back(block);
149 (*logstream_ <<
"MASK CLEANER: block is clean and kept as is.").endLine();
151 }
else if (pos.size() == 2 && pos.front() == 0 && pos.back() == block->
getNumberOfSites()) {
154 (*logstream_ <<
"MASK CLEANER: block was entirely removed. Tried to get the next one.").endLine();
158 (*logstream_ <<
"MASK CLEANER: block with size "<< block->
getNumberOfSites() <<
" will be split into " << (pos.size() / 2 + 1) <<
" blocks.").endLine();
164 for (i = 0; i < pos.size(); i+=2) {
168 (*logstream_ <<
"MASK CLEANER: removing region (" << pos[i] <<
", " << pos[i+1] <<
") from block.").endLine();
184 blockBuffer_.push_back(newBlock);
187 if (keepTrashedBlocks_) {
196 trashBuffer_.push_back(outBlock);
210 blockBuffer_.push_back(newBlock);
217 }
while (blockBuffer_.size() == 0);
220 MafBlock* block = blockBuffer_.front();
221 blockBuffer_.pop_front();
virtual size_t size() const=0
A synteny block data structure, the basic unit of a MAF alignement file.
unsigned int getPass() const
void setScore(double score)
void setPass(unsigned int pass)
size_t getNumberOfSequences() const
size_t getNumberOfSites() const
const MafSequence & getSequence(const std::string &name) const
void addSequence(const MafSequence &sequence)
bool hasSequenceForSpecies(const std::string &species) const
const MafSequence & getSequenceForSpecies(const std::string &species) const
A sequence class which is used to store data from MAF files.
MafSequence * subSequence(size_t startAt, size_t length) const
Extract a sub-sequence.
MafBlock * analyseCurrentBlock_()
const std::vector< bool > & getMask() const
static const std::string MASK
virtual bool hasAnnotation(const std::string &type) const
virtual const SequenceAnnotation & getAnnotation(const std::string &type) const