digest
|
Possible implementation for multi-threading the digestion of a single sequence. The key thing to note is basically by carefully telling where each digester should start digesting you can make it so each kmer is only considered once. For more details on a function, click on more and it will take you to the description that is located in modules. More...
Classes | |
class | BadThreadOutParams |
Exception thrown when invalid parameters are passed to the thread functions. More... | |
Functions | |
template<digest::BadCharPolicy P> | |
void | thread_mod (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const char *seq, size_t len, unsigned k, uint32_t mod, uint32_t congruence=0, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
template<digest::BadCharPolicy P> | |
void | thread_mod (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const std::string &seq, unsigned k, uint32_t mod, uint32_t congruence=0, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as the other thread_mod, except it can take a C++ string, and does not need to be provided the length of the string | |
template<digest::BadCharPolicy P> | |
void | thread_mod (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const char *seq, size_t len, unsigned k, uint32_t mod, uint32_t congruence=0, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_mod that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts | |
template<digest::BadCharPolicy P> | |
void | thread_mod (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const std::string &seq, unsigned k, uint32_t mod, uint32_t congruence=0, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_mod that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts | |
template<digest::BadCharPolicy P, class T > | |
void | thread_wind (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const char *seq, size_t len, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
template<digest::BadCharPolicy P, class T > | |
void | thread_wind (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const std::string &seq, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as the other thread_wind, except it can take a C++ string, and does not need to be provided the length of the string | |
template<digest::BadCharPolicy P, class T > | |
void | thread_wind (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const char *seq, size_t len, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_wind that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts | |
template<digest::BadCharPolicy P, class T > | |
void | thread_wind (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const std::string &seq, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_wind that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts | |
template<digest::BadCharPolicy P, class T > | |
void | thread_sync (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const char *seq, size_t len, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
template<digest::BadCharPolicy P, class T > | |
void | thread_sync (unsigned thread_count, std::vector< std::vector< uint32_t > > &vec, const std::string &seq, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as the other thread_sync, except it can take a C++ string, and does not need to be provided the length of the string | |
template<digest::BadCharPolicy P, class T > | |
void | thread_sync (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const char *seq, size_t len, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_wind that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts | |
template<digest::BadCharPolicy P, class T > | |
void | thread_sync (unsigned thread_count, std::vector< std::vector< std::pair< uint32_t, uint32_t > > > &vec, const std::string &seq, unsigned k, uint32_t large_wind_kmer_am, size_t start=0, digest::MinimizedHashType minimized_h=digest::MinimizedHashType::CANON) |
same as other thread_sync that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts | |
Possible implementation for multi-threading the digestion of a single sequence. The key thing to note is basically by carefully telling where each digester should start digesting you can make it so each kmer is only considered once. For more details on a function, click on more and it will take you to the description that is located in modules.
void digest::thread_out::thread_mod | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | mod, | ||
uint32_t | congruence = 0 , |
||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_mod that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_mod | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | mod, | ||
uint32_t | congruence = 0 , |
||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_mod that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_mod | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | mod, | ||
uint32_t | congruence = 0 , |
||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
thread_count | the number of threads to use |
vec | a vector of vectors in which the minimizers will be placed. Each vector corresponds to one thread. The minimizers within each vector will be in ascending order by index, and the vectors themselves will also be in ascending order by index, i.e. all minimizers in vector_i will go before all minimizers in vector_(i+1). |
seq | char pointer poitning to the c-string of DNA sequence to be hashed. |
len | length of seq. |
k | k-mer size. |
mod | mod space to be used to calculate universal minimizers |
congruence | value we want minimizer hashes to be congruent to in the mod space |
start | 0-indexed position in seq to start hashing from. |
minimized_h | hash to be minimized, 0 for canoncial, 1 for forward, 2 for reverse |
BadThreadOutParams |
void digest::thread_out::thread_mod | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | mod, | ||
uint32_t | congruence = 0 , |
||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as the other thread_mod, except it can take a C++ string, and does not need to be provided the length of the string
seq | C++ string of DNA sequence to be hashed. |
void digest::thread_out::thread_sync | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_wind that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_sync | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_sync that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_sync | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
P | policy for dealing with non-ACTG characters |
T | min query data structure to use, refer to docs of the classes in the ds namespace for more info |
thread_count | the number of threads to use |
vec | a vector of vectors in which the minimizers will be placed. Each vector corresponds to one thread. The minimizers within each vector will be in ascending order by index, and the vectors themselves will also be in ascending order by index, i.e. all minimizers in vector_i will go before all minimizers in vector_(i+1). |
seq | char pointer poitning to the c-string of DNA sequence to be hashed. |
len | length of seq. |
k | k-mer size. |
large_wind_kmer_am | |
start | 0-indexed position in seq to start hashing from. |
minimized_h | hash to be minimized, 0 for canoncial, 1 for forward, 2 for reverse |
BadThreadOutParams |
void digest::thread_out::thread_sync | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as the other thread_sync, except it can take a C++ string, and does not need to be provided the length of the string
seq | C++ string of DNA sequence to be hashed. |
void digest::thread_out::thread_wind | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_wind that takes a c-string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_wind | ( | unsigned | thread_count, |
std::vector< std::vector< std::pair< uint32_t, uint32_t > > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as other thread_wind that takes a C++ string, except here vec is a vector of vectors of pairs of uint32_ts
vec | vec will contain both the index and the hash of minimizers. All other things previously stated about vec remain true |
void digest::thread_out::thread_wind | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const char * | seq, | ||
size_t | len, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
P | policy for dealing with non-ACTG characters |
T | min query data structure to use, refer to docs of the classes in the ds namespace for more info |
thread_count | the number of threads to use |
vec | a vector of vectors in which the minimizers will be placed. Each vector corresponds to one thread. The minimizers within each vector will be in ascending order by index, and the vectors themselves will also be in ascending order by index, i.e. all minimizers in vector_i will go before all minimizers in vector_(i+1). |
seq | char pointer poitning to the c-string of DNA sequence to be hashed. |
len | length of seq. |
k | k-mer size. |
large_wind_kmer_am | |
start | 0-indexed position in seq to start hashing from. |
minimized_h | hash to be minimized, 0 for canoncial, 1 for forward, 2 for reverse |
BadThreadOutParams |
void digest::thread_out::thread_wind | ( | unsigned | thread_count, |
std::vector< std::vector< uint32_t > > & | vec, | ||
const std::string & | seq, | ||
unsigned | k, | ||
uint32_t | large_wind_kmer_am, | ||
size_t | start = 0 , |
||
digest::MinimizedHashType | minimized_h = digest::MinimizedHashType::CANON |
||
) |
same as the other thread_wind, except it can take a C++ string, and does not need to be provided the length of the string
seq | C++ string of DNA sequence to be hashed. |