digest
Loading...
Searching...
No Matches
window_minimizer.hpp
1#ifndef WINDOW_MINIMIZER_HPP
2#define WINDOW_MINIMIZER_HPP
3
4#include "data_structure.hpp"
5#include "digest/digester.hpp"
6#include <cstddef>
7#include <cstdint>
8
9namespace digest {
10
17class BadWindowSizeException : public std::exception {
18 const char *what() const throw() {
19 return "Number of kmers in large window cannot be 0";
20 }
21};
22
33template <BadCharPolicy P, class T> class WindowMin : public Digester<P> {
34 public:
46 WindowMin(const char *seq, size_t len, unsigned k, unsigned large_window,
47 size_t start = 0,
49 : Digester<P>(seq, len, k, start, minimized_h), ds(large_window),
50 large_window(large_window), ds_size(0), is_minimized(false) {
51 if (large_window == 0) {
53 }
54 }
55
66 WindowMin(const std::string &seq, unsigned k, unsigned large_window,
67 size_t start = 0,
69 : WindowMin<P, T>(seq.c_str(), seq.size(), k, large_window, start,
70 minimized_h) {}
71
80 void roll_minimizer(unsigned amount, std::vector<uint32_t> &vec) override {
81 amount += vec.size();
82
83 while (ds_size + 1 < large_window and this->is_valid_hash) {
85 ds.insert(this->get_pos(), this->chash);
86 } else if (this->get_minimized_h() ==
88 ds.insert(this->get_pos(), this->fhash);
89 } else {
90 ds.insert(this->get_pos(), this->rhash);
91 }
92
93 this->roll_one();
94 ds_size++;
95 }
96
97 while (this->is_valid_hash and vec.size() < amount) {
98 roll_ds_wind(vec);
99 }
100 }
101
110 void
111 roll_minimizer(unsigned amount,
112 std::vector<std::pair<uint32_t, uint32_t>> &vec) override {
113 amount += vec.size();
114
115 while (ds_size + 1 < large_window and this->is_valid_hash) {
117 ds.insert(this->get_pos(), this->chash);
118 } else if (this->get_minimized_h() ==
120 ds.insert(this->get_pos(), this->fhash);
121 } else {
122 ds.insert(this->get_pos(), this->rhash);
123 }
124
125 this->roll_one();
126 ds_size++;
127 }
128
129 while (this->is_valid_hash and vec.size() < amount) {
130 roll_ds_wind(vec);
131 }
132 }
133
134 void new_seq(const char *seq, size_t len, size_t start) override {
135 ds = T(large_window);
136 Digester<P>::new_seq(seq, len, start);
137 }
138
139 void new_seq(const std::string &seq, size_t pos) override {
140 ds = T(large_window);
141 Digester<P>::new_seq(seq.c_str(), seq.size(), pos);
142 }
143
148 unsigned get_large_wind_kmer_am() { return large_window; }
149
150 // function is mainly to help with tests
157 size_t get_ds_size() { return ds_size; }
158
159 // function is mainly to help with tests
166 bool get_is_minimized() { return is_minimized; }
167
168 protected:
169 // data structure which will find miminum
170 T ds;
171
172 uint32_t large_window;
173
174 // internal counter that tracks the number of actual values in the data
175 // structure
176 size_t ds_size;
177
178 // internal bool keeping track of if we have obtained the first minimizer
179 // yet, because we don't want to add a position to the vector if it's
180 // already in there
181 bool is_minimized;
182
183 // the index of previous minimizer, a minimizer is only a new minimizer if
184 // it is different from the previous minimizer
185 uint32_t prev_mini;
186
187 private:
193 void roll_ds_wind(std::vector<uint32_t> &vec) {
195 ds.insert(this->get_pos(), this->chash);
196 } else if (this->get_minimized_h() ==
198 ds.insert(this->get_pos(), this->fhash);
199 } else {
200 ds.insert(this->get_pos(), this->rhash);
201 }
202 check(vec);
203
204 this->roll_one();
205 }
206
212 void roll_ds_wind(std::vector<std::pair<uint32_t, uint32_t>> &vec) {
214 ds.insert(this->get_pos(), this->chash);
215 } else if (this->get_minimized_h() ==
217 ds.insert(this->get_pos(), this->fhash);
218 } else {
219 ds.insert(this->get_pos(), this->rhash);
220 }
221 check(vec);
222
223 this->roll_one();
224 }
225
232 void check(std::vector<uint32_t> &vec) {
233 if (is_minimized) {
234 if (ds.min() != prev_mini) {
235 prev_mini = ds.min();
236 vec.emplace_back(prev_mini);
237 }
238 } else {
239 is_minimized = true;
240 prev_mini = ds.min();
241 vec.emplace_back(prev_mini);
242 }
243 }
244
251 void check(std::vector<std::pair<uint32_t, uint32_t>> &vec) {
252 if (is_minimized) {
253 if (ds.min() != prev_mini) {
254 prev_mini = ds.min();
255 vec.emplace_back(prev_mini, ds.min_hash());
256 }
257 } else {
258 is_minimized = true;
259 prev_mini = ds.min();
260 vec.emplace_back(prev_mini, ds.min_hash());
261 }
262 }
263};
264
265} // namespace digest
266
267#endif // WINDOW_MINIMIZER_HPP
Exception thrown when initializing a Window Minimizer or Syncmer with a large window size of 0.
Definition window_minimizer.hpp:17
an abstract class for Digester objects.
Definition digester.hpp:75
bool roll_one()
moves the internal pointer to the next valid k-mer. Time Complexity: O(1)
Definition digester.hpp:138
MinimizedHashType get_minimized_h()
Definition digester.hpp:289
size_t get_pos()
Definition digester.hpp:177
virtual void new_seq(const char *seq, size_t len, size_t start)
replaces the current sequence with the new one. It's like starting over with a completely new seqeunc...
Definition digester.hpp:209
Child class of Digester that defines a minimizer as a kmer whose hash is minimal among those in the l...
Definition window_minimizer.hpp:33
void roll_minimizer(unsigned amount, std::vector< uint32_t > &vec) override
adds up to amount of positions of minimizers into vec. Here a k-mer is considered a minimizer if its ...
Definition window_minimizer.hpp:80
void new_seq(const char *seq, size_t len, size_t start) override
replaces the current sequence with the new one. It's like starting over with a completely new seqeunc...
Definition window_minimizer.hpp:134
WindowMin(const std::string &seq, unsigned k, unsigned large_window, size_t start=0, MinimizedHashType minimized_h=MinimizedHashType::CANON)
Definition window_minimizer.hpp:66
void new_seq(const std::string &seq, size_t pos) override
replaces the current sequence with the new one. It's like starting over with a completely new sequenc...
Definition window_minimizer.hpp:139
size_t get_ds_size()
gets the size of the internal rmq data structure being used. Mainly used to help with tests (so you p...
Definition window_minimizer.hpp:157
void roll_minimizer(unsigned amount, std::vector< std::pair< uint32_t, uint32_t > > &vec) override
adds up to amount of positions and hashes of minimizers into vec. Here a k-mer is considered a minimi...
Definition window_minimizer.hpp:111
unsigned get_large_wind_kmer_am()
Definition window_minimizer.hpp:148
bool get_is_minimized()
checks if we have generated the first minimizer. Mainly used to help with tests (so you probably shou...
Definition window_minimizer.hpp:166
WindowMin(const char *seq, size_t len, unsigned k, unsigned large_window, size_t start=0, MinimizedHashType minimized_h=MinimizedHashType::CANON)
Definition window_minimizer.hpp:46
digest code.
Definition data_structure.hpp:27
MinimizedHashType
Enum values for the type of hash to minimize.
Definition digester.hpp:41