89    Digester(
const char *seq, 
size_t len, 
unsigned k, 
size_t start = 0,
 
   91        : seq(seq), len(len), offset(0), start(start), end(start + k), chash(0),
 
   92          fhash(0), rhash(0), k(k), minimized_h(minimized_h) {
 
   93        if (k < 4 or start >= len or (
int) minimized_h > 2) {
 
 
  109    Digester(
const std::string &seq, 
unsigned k, 
size_t start = 0,
 
  111        : 
Digester(seq.c_str(), seq.size(), k, start, minimized_h) {}
 
 
  140            return roll_one_skip_over();
 
  142            return roll_one_write_over();
 
 
  155                                std::vector<uint32_t> &vec) = 0;
 
  167                   std::vector<std::pair<uint32_t, uint32_t>> &vec) = 0;
 
  177    size_t get_pos() { 
return offset + start - c_outs.size(); }
 
  209    virtual void new_seq(
const char *seq, 
size_t len, 
size_t start) {
 
  214        this->end = start + this->k;
 
  215        is_valid_hash = 
false;
 
 
  232    virtual void new_seq(
const std::string &seq, 
size_t pos) {
 
  233        new_seq(seq.c_str(), seq.size(), pos);
 
 
  255            append_seq_skip_over(seq, len);
 
  257            append_seq_write_over(seq, len);
 
 
  279            append_seq_skip_over(seq.c_str(), seq.size());
 
  281            append_seq_write_over(seq.c_str(), seq.size());
 
 
  299    std::array<bool, 256> actg{
 
  300        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
  301        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  302        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  303        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  304        0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  305        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  306        0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  307        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  308        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  309        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  310        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  311        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  312        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  313        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  314        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  315        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 
  327    bool is_ACTG(
char in) { 
return actg[in]; }
 
  341            return init_hash_skip_over();
 
  343            return init_hash_write_over();
 
  347    void append_seq_skip_over(
const char *seq, 
size_t len) {
 
  348        if (end < this->len) {
 
  349            throw NotRolledTillEndException();
 
  352        size_t ind = this->len - 1;
 
  369        if ((start != end || c_outs.size() == k) && c_outs.size() > 0) {
 
  375        std::vector<char> temp_vec;
 
  376        while (temp_vec.size() + c_outs.size() < k - 1 && ind >= start) {
 
  377            if (!is_ACTG(this->seq[ind]))
 
  380            temp_vec.push_back(this->seq[ind]);
 
  386        for (std::vector<char>::reverse_iterator rit = temp_vec.rbegin();
 
  387             rit != temp_vec.rend(); rit++) {
 
  388            c_outs.push_back(*rit);
 
  397        while (c_outs.size() < k && ind < len) {
 
  398            if (!is_ACTG(seq[ind])) {
 
  407            c_outs.push_back(seq[ind]);
 
  414        if (c_outs.size() == k) {
 
  415            std::string temp(c_outs.begin(), c_outs.end());
 
  418            fhash = base_forward_hash(temp.c_str(), k);
 
  419            rhash = base_reverse_hash(temp.c_str(), k);
 
  420            chash = nthash::canonical(fhash, rhash);
 
  421            is_valid_hash = 
true;
 
  427    void append_seq_write_over(
const char *seq, 
size_t len) {
 
  428        if (end < this->len) {
 
  429            throw NotRolledTillEndException();
 
  432        size_t ind = this->len - 1;
 
  434        if ((start != end || c_outs.size() == k) && c_outs.size() > 0) {
 
  440        std::vector<char> temp_vec;
 
  441        while (temp_vec.size() + c_outs.size() < k - 1 && ind >= start) {
 
  442            if (!is_ACTG(this->seq[ind])) {
 
  443                temp_vec.push_back(
'A');
 
  445                temp_vec.push_back(this->seq[ind]);
 
  452        for (std::vector<char>::reverse_iterator rit = temp_vec.rbegin();
 
  453             rit != temp_vec.rend(); rit++) {
 
  454            c_outs.push_back(*rit);
 
  463        while (c_outs.size() < k && ind < len) {
 
  464            if (!is_ACTG(seq[ind])) {
 
  465                c_outs.push_back(
'A');
 
  467                c_outs.push_back(seq[ind]);
 
  476        if (c_outs.size() == k) {
 
  477            std::string temp(c_outs.begin(), c_outs.end());
 
  480            fhash = base_forward_hash(temp.c_str(), k);
 
  481            rhash = base_reverse_hash(temp.c_str(), k);
 
  482            chash = nthash::canonical(fhash, rhash);
 
  483            is_valid_hash = 
true;
 
  489    bool init_hash_skip_over() {
 
  491        while (end - 1 < len) {
 
  493            for (
size_t i = start; i < end; i++) {
 
  494                if (!is_ACTG(seq[i])) {
 
  505            fhash = base_forward_hash(seq + start, k);
 
  506            rhash = base_reverse_hash(seq + start, k);
 
  507            chash = nthash::canonical(fhash, rhash);
 
  508            is_valid_hash = 
true;
 
  511        is_valid_hash = 
false;
 
  517    bool init_hash_write_over() {
 
  519        while (end - 1 < len) {
 
  520            std::string init_str;
 
  521            for (
size_t i = start; i < end; i++) {
 
  522                if (!is_ACTG(seq[i])) {
 
  523                    init_str.push_back(
'A');
 
  525                    init_str.push_back(seq[i]);
 
  530            fhash = base_forward_hash(init_str.c_str(), k);
 
  531            rhash = base_reverse_hash(init_str.c_str(), k);
 
  532            chash = nthash::canonical(fhash, rhash);
 
  533            is_valid_hash = 
true;
 
  536        is_valid_hash = 
false;
 
  540    bool roll_one_skip_over() {
 
  541        if (!is_valid_hash) {
 
  545            is_valid_hash = 
false;
 
  548        if (c_outs.size() > 0) {
 
  549            if (is_ACTG(seq[end])) {
 
  550                fhash = next_forward_hash(fhash, k, c_outs.front(), seq[end]);
 
  551                rhash = next_reverse_hash(rhash, k, c_outs.front(), seq[end]);
 
  554                chash = nthash::canonical(fhash, rhash);
 
  566            if (is_ACTG(seq[end])) {
 
  567                fhash = next_forward_hash(fhash, k, seq[start], seq[end]);
 
  568                rhash = next_reverse_hash(rhash, k, seq[start], seq[end]);
 
  571                chash = nthash::canonical(fhash, rhash);
 
  581    bool roll_one_write_over() {
 
  582        if (!is_valid_hash) {
 
  586            is_valid_hash = 
false;
 
  589        char next_char = is_ACTG(seq[end]) ? seq[end] : 
'A';
 
  590        if (c_outs.size() > 0) {
 
  591            fhash = next_forward_hash(fhash, k, c_outs.front(), next_char);
 
  592            rhash = next_reverse_hash(rhash, k, c_outs.front(), next_char);
 
  597            char out_char = is_ACTG(seq[start]) ? seq[start] : 
'A';
 
  598            fhash = next_forward_hash(fhash, k, out_char, next_char);
 
  599            rhash = next_reverse_hash(rhash, k, out_char, next_char);
 
  603        chash = nthash::canonical(fhash, rhash);
 
  638    std::deque<char> c_outs;
 
  645    bool is_valid_hash = 
false;