23#include "pasta/bit_vector/bit_vector.hpp"
24#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
25#include "pasta/bit_vector/support/flat_rank.hpp"
26#include "pasta/bit_vector/support/l12_type.hpp"
33#if defined(__x86_64__)
34# include <emmintrin.h>
35# include <immintrin.h>
37#include "pasta/utils/debug_asserts.hpp"
40#include <tlx/container/simple_vector.hpp>
72 typename VectorType = BitVector>
88 using Array = tlx::SimpleVector<T, tlx::SimpleVectorMode::NoInitNoDestroy>;
92 std::vector<uint32_t> samples0_;
94 std::vector<uint32_t> samples1_;
124 [[nodiscard(
"select0 computed but not used")]]
size_t
128 size_t const sample_pos =
130 size_t l1_pos = samples0_[sample_pos];
134 while (l1_pos + 1 < l12_end &&
136 l12_[l1_pos + 1].l1() <
142 while (l1_pos + 1 < l12_end &&
l12_[l1_pos + 1].l1() < rank) {
145 rank -=
l12_[l1_pos].l1();
149#if defined(__x86_64__)
151 _mm_loadu_si128(
reinterpret_cast<__m128i const*
>(&
l12_[l1_pos]));
152 __m128i
const shuffle_mask = _mm_setr_epi8(10,
168 value = _mm_shuffle_epi8(value, shuffle_mask);
171 __m128i
const upper_values = _mm_srli_epi16(value, 4);
173 __m128i
const lower_mask = _mm_set1_epi16(uint16_t{0b0000111111111111});
176 __m128i
const lower_values = _mm_and_si128(value, lower_mask);
179 value = _mm_blend_epi16(upper_values, lower_values, 0b01010101);
182 __m128i
const max_ones =
187 std::numeric_limits<int16_t>::max(),
192 value = _mm_sub_epi16(max_ones, value);
198 value = _mm_insert_epi16(value, std::numeric_limits<int16_t>::max(), 4);
204 PASTA_ASSERT(rank <= std::numeric_limits<uint16_t>::max(),
205 "Rank is too large. This should not occur because in this "
206 "block the number of previous bits should reduce the "
207 "local rank further.");
209 cmp_value = _mm_set1_epi16(rank);
211 cmp_value = _mm_set1_epi16(rank - 1);
216 __m128i cmp_result = _mm_cmpgt_epi16(value, cmp_value);
222 uint32_t
const result = _mm_movemask_epi8(cmp_result);
226 l2_pos = (16 - std::popcount(result)) / 2;
229 l12_[l1_pos][l2_pos]);
231 rank -=
l12_[l1_pos][l2_pos];
235 auto tmp =
l12_[l1_pos].data >> 32;
238 ((tmp >> 12) & uint16_t(0b111111111111)) <
245 while (((tmp >> 12) & uint16_t(0b111111111111)) < rank && l2_pos < 7) {
252 (
l12_[l1_pos][l2_pos]);
254 rank -= (
l12_[l1_pos][l2_pos]);
258 auto tmp =
l12_[l1_pos].data >> 44;
260 ((tmp >> 36) & uint16_t(0b111111111111));
262 if (uint16_t
const right =
264 ((tmp >> 60) & uint16_t(0b111111111111));
266 if (uint16_t
const leaf =
268 ((tmp >> 72) & uint16_t(0b111111111111));
277 if (uint16_t
const leaf =
279 ((tmp >> 48) & uint16_t(0b111111111111));
289 if (uint16_t
const left =
291 ((tmp >> 12) & uint16_t(0b111111111111));
293 if (uint16_t
const leaf =
295 ((tmp >> 24) & uint16_t(0b111111111111));
304 if (uint16_t
const leaf =
306 (tmp & uint16_t(0b111111111111));
314 auto tmp =
l12_[l1_pos].data >> 44;
315 if (uint16_t
const mid = ((tmp >> 36) & uint16_t(0b111111111111));
317 if (uint16_t
const right = ((tmp >> 60) & uint16_t(0b111111111111));
319 if (uint16_t
const leaf = ((tmp >> 72) & uint16_t(0b111111111111));
328 if (uint16_t
const leaf = ((tmp >> 48) & uint16_t(0b111111111111));
338 if (uint16_t
const left = ((tmp >> 12) & uint16_t(0b111111111111));
340 if (uint16_t
const leaf = ((tmp >> 24) & uint16_t(0b111111111111));
349 if (uint16_t
const leaf = (tmp & uint16_t(0b111111111111));
361 "Using unsupported search method for l2 entries");
368 while ((
popcount = pasta::popcount_zeros<1>(
data_ + last_pos)) < rank) {
372 return (last_pos * 64) +
select(~
data_[last_pos], rank - 1);
380 [[nodiscard(
"select1 computed but not used")]]
size_t
384 size_t const sample_pos =
386 size_t l1_pos = samples1_[sample_pos];
388 while ((l1_pos + 1) < l12_end &&
l12_[l1_pos + 1].l1() < rank) {
391 rank -=
l12_[l1_pos].l1();
393 while (l1_pos + 1 < l12_end &&
395 l12_[l1_pos + 1].l1() <
403#if defined(__x86_64__)
405 _mm_loadu_si128(
reinterpret_cast<__m128i const*
>(&
l12_[l1_pos]));
406 __m128i
const shuffle_mask = _mm_setr_epi8(10,
422 value = _mm_shuffle_epi8(value, shuffle_mask);
425 __m128i
const upper_values = _mm_srli_epi16(value, 4);
427 __m128i
const lower_mask = _mm_set1_epi16(uint16_t{0b0000111111111111});
430 __m128i
const lower_values = _mm_and_si128(value, lower_mask);
433 value = _mm_blend_epi16(upper_values, lower_values, 0b01010101);
440 value = _mm_insert_epi16(value, std::numeric_limits<int16_t>::max(), 4);
442 __m128i
const max_ones =
447 std::numeric_limits<int16_t>::max(),
452 value = _mm_sub_epi16(max_ones, value);
457 PASTA_ASSERT(rank <= std::numeric_limits<uint16_t>::max(),
458 "Rank is too large. This should not occur because in this "
459 "block the number of previous bits should reduce the "
460 "local rank further.");
461 __m128i
const cmp_value = _mm_set1_epi16(rank - 1);
465 __m128i cmp_result = _mm_cmpgt_epi16(value, cmp_value);
471 uint32_t
const result = _mm_movemask_epi8(cmp_result);
475 l2_pos = (16 - std::popcount(result)) / 2;
477 rank -=
l12_[l1_pos][l2_pos];
480 l12_[l1_pos][l2_pos]);
484 auto tmp =
l12_[l1_pos].data >> 32;
486 while (((tmp >> 12) & uint16_t(0b111111111111)) < rank && l2_pos < 7) {
490 rank -= (
l12_[l1_pos][l2_pos]);
493 ((tmp >> 12) & uint16_t(0b111111111111)) <
500 (
l12_[l1_pos][l2_pos]);
504 auto tmp =
l12_[l1_pos].data >> 44;
505 if (uint16_t
const mid = ((tmp >> 36) & uint16_t(0b111111111111));
507 if (uint16_t
const right = ((tmp >> 60) & uint16_t(0b111111111111));
509 if (uint16_t
const leaf = ((tmp >> 72) & uint16_t(0b111111111111));
518 if (uint16_t
const leaf = ((tmp >> 48) & uint16_t(0b111111111111));
528 if (uint16_t
const left = ((tmp >> 12) & uint16_t(0b111111111111));
530 if (uint16_t
const leaf = ((tmp >> 24) & uint16_t(0b111111111111));
539 if (uint16_t
const leaf = (tmp & uint16_t(0b111111111111));
547 auto tmp =
l12_[l1_pos].data >> 44;
549 ((tmp >> 36) & uint16_t(0b111111111111));
551 if (uint16_t
const right =
553 ((tmp >> 60) & uint16_t(0b111111111111));
555 if (uint16_t
const leaf =
557 ((tmp >> 72) & uint16_t(0b111111111111));
566 if (uint16_t
const leaf =
568 ((tmp >> 48) & uint16_t(0b111111111111));
578 if (uint16_t
const left =
580 ((tmp >> 12) & uint16_t(0b111111111111));
582 if (uint16_t
const leaf =
584 ((tmp >> 24) & uint16_t(0b111111111111));
593 if (uint16_t
const leaf =
595 (tmp & uint16_t(0b111111111111));
607 "Using unsupported search method for l2 entries");
614 while ((
popcount = pasta::popcount<1>(
data_ + last_pos)) < rank) {
618 return (last_pos * 64) +
select(
data_[last_pos], rank - 1);
625 [[nodiscard(
"space usage computed but not used")]]
size_t
627 return samples0_.size() *
sizeof(uint32_t) +
628 samples1_.size() *
sizeof(uint32_t) +
sizeof(*
this);
634 size_t const l12_end =
l12_.size();
635 size_t next_sample0_value = 1;
636 size_t next_sample1_value = 1;
637 for (
size_t l12_pos = 0; l12_pos < l12_end; ++l12_pos) {
640 l12_[l12_pos].l1() >=
641 next_sample0_value) {
642 samples0_.push_back(l12_pos - 1);
645 if (
l12_[l12_pos].l1() >= next_sample1_value) {
646 samples1_.push_back(l12_pos - 1);
650 if (
l12_[l12_pos].l1() >= next_sample0_value) {
651 samples0_.push_back(l12_pos - 1);
655 l12_[l12_pos].l1() >=
656 next_sample1_value) {
657 samples1_.push_back(l12_pos - 1);
663 if (samples0_.size() == 0) [[unlikely]] {
664 samples0_.push_back(0);
666 samples0_.push_back(samples0_.back());
668 if (samples1_.size() == 0) [[unlikely]] {
669 samples1_.push_back(0);
671 samples1_.push_back(samples1_.back());
Select support for BitVector that can be used as an alternative to RankSelect for bit vectors up to l...
Definition flat_rank_select.hpp:73
size_t select0(size_t rank) const
Get position of specific zero, i.e., select.
Definition flat_rank_select.hpp:125
FlatRankSelect(FlatRankSelect &&)=default
Default move constructor.
size_t space_usage() const final
Estimate for the space usage.
Definition flat_rank_select.hpp:626
size_t select1(size_t rank) const
Get position of specific one, i.e., select.
Definition flat_rank_select.hpp:381
FlatRankSelect(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank and select queries.
Definition flat_rank_select.hpp:106
FlatRankSelect()=default
Default constructor w/o parameter.
~FlatRankSelect()=default
Destructor. Deleting manually created arrays.
FlatRankSelect & operator=(FlatRankSelect &&)=default
Default move assignment.
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition flat_rank.hpp:82
tlx::SimpleVector< BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy > l12_
Definition flat_rank.hpp:94
size_t l12_end_
Definition flat_rank.hpp:96
size_t data_size_
Definition flat_rank.hpp:89
BitVector::RawDataConstAccess data_
Definition flat_rank.hpp:91
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition optimized_for.hpp:40
constexpr bool use_intrinsics(FindL2FlatWith const find_with)
Helper function indicating whether intrinsic function should be used.
Definition find_l2_flat_with.hpp:70
constexpr bool use_linear_search(FindL2FlatWith const find_with)
Helper function indicating whether a linear search function should be used.
Definition find_l2_flat_with.hpp:48
FindL2FlatWith
Enum used to specify whether intrinsic functions should be used.
Definition find_l2_flat_with.hpp:35
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition optimized_for.hpp:59
constexpr bool use_binary_search(FindL2FlatWith const find_with)
Helper function indicating whether a binary search function should be used.
Definition find_l2_flat_with.hpp:59
@ DONT_CARE
It does not matter (both types are called equally often).
uint64_t popcount(uint64_t const *const buffer)
Compute popcount of a specific number of 64-bit words.
Definition popcount.hpp:41
uint64_t select(uint64_t x, uint64_t k)
Select set bit in 64-bit word and return its position starting from the LSB (starting from the left).
Definition select.hpp:144
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition flat_rank.hpp:50
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition flat_rank.hpp:44
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition flat_rank.hpp:42
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition flat_rank.hpp:57
static constexpr size_t L1_WORD_SIZE
Number of 64-bit words covered by an L1-block.
Definition flat_rank.hpp:52