23#include "pasta/bit_vector/bit_vector.hpp"
24#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
25#include "pasta/bit_vector/support/flat_rank.hpp"
26#include "pasta/bit_vector/support/l12_type.hpp"
33#if defined(__x86_64__)
34# include <emmintrin.h>
35# include <immintrin.h>
37#include "pasta/utils/debug_asserts.hpp"
40#include <tlx/container/simple_vector.hpp>
72 typename VectorType = BitVector>
91 using Array = tlx::SimpleVector<T, tlx::SimpleVectorMode::NoInitNoDestroy>;
95 std::vector<uint32_t> samples0_;
97 std::vector<uint32_t> samples1_;
127 [[nodiscard(
"select0 computed but not used")]]
size_t
131 size_t const sample_pos =
133 size_t l1_pos = samples0_[sample_pos];
137 while (l1_pos + 1 < l12_end &&
139 l12_[l1_pos + 1].l1() <
145 while (l1_pos + 1 < l12_end &&
l12_[l1_pos + 1].l1() < rank) {
148 rank -=
l12_[l1_pos].l1();
152#if defined(__x86_64__)
154 _mm_loadu_si128(
reinterpret_cast<__m128i const*
>(&
l12_[l1_pos]));
155 __m128i
const shuffle_mask = _mm_setr_epi8(10,
171 value = _mm_shuffle_epi8(value, shuffle_mask);
174 __m128i
const upper_values = _mm_srli_epi16(value, 4);
176 __m128i
const lower_mask = _mm_set1_epi16(uint16_t{0b0000111111111111});
179 __m128i
const lower_values = _mm_and_si128(value, lower_mask);
182 value = _mm_blend_epi16(upper_values, lower_values, 0b01010101);
185 __m128i
const max_ones =
190 std::numeric_limits<int16_t>::max(),
195 value = _mm_sub_epi16(max_ones, value);
201 value = _mm_insert_epi16(value, std::numeric_limits<int16_t>::max(), 4);
207 PASTA_ASSERT(rank <= std::numeric_limits<uint16_t>::max(),
208 "Rank is too large. This should not occur because in this "
209 "block the number of previous bits should reduce the "
210 "local rank further.");
212 cmp_value = _mm_set1_epi16(rank);
214 cmp_value = _mm_set1_epi16(rank - 1);
219 __m128i cmp_result = _mm_cmpgt_epi16(value, cmp_value);
225 uint32_t
const result = _mm_movemask_epi8(cmp_result);
229 l2_pos = (16 - std::popcount(result)) / 2;
232 l12_[l1_pos][l2_pos]);
234 rank -=
l12_[l1_pos][l2_pos];
238 auto tmp =
l12_[l1_pos].data >> 32;
241 ((tmp >> 12) & uint16_t(0b111111111111)) <
248 while (((tmp >> 12) & uint16_t(0b111111111111)) < rank && l2_pos < 7) {
255 (
l12_[l1_pos][l2_pos]);
257 rank -= (
l12_[l1_pos][l2_pos]);
261 auto tmp =
l12_[l1_pos].data >> 44;
263 ((tmp >> 36) & uint16_t(0b111111111111));
265 if (uint16_t
const right =
267 ((tmp >> 60) & uint16_t(0b111111111111));
269 if (uint16_t
const leaf =
271 ((tmp >> 72) & uint16_t(0b111111111111));
280 if (uint16_t
const leaf =
282 ((tmp >> 48) & uint16_t(0b111111111111));
292 if (uint16_t
const left =
294 ((tmp >> 12) & uint16_t(0b111111111111));
296 if (uint16_t
const leaf =
298 ((tmp >> 24) & uint16_t(0b111111111111));
307 if (uint16_t
const leaf =
309 (tmp & uint16_t(0b111111111111));
317 auto tmp =
l12_[l1_pos].data >> 44;
318 if (uint16_t
const mid = ((tmp >> 36) & uint16_t(0b111111111111));
320 if (uint16_t
const right = ((tmp >> 60) & uint16_t(0b111111111111));
322 if (uint16_t
const leaf = ((tmp >> 72) & uint16_t(0b111111111111));
331 if (uint16_t
const leaf = ((tmp >> 48) & uint16_t(0b111111111111));
341 if (uint16_t
const left = ((tmp >> 12) & uint16_t(0b111111111111));
343 if (uint16_t
const leaf = ((tmp >> 24) & uint16_t(0b111111111111));
352 if (uint16_t
const leaf = (tmp & uint16_t(0b111111111111));
364 "Using unsupported search method for l2 entries");
383 [[nodiscard(
"select1 computed but not used")]]
size_t
387 size_t const sample_pos =
389 size_t l1_pos = samples1_[sample_pos];
391 while ((l1_pos + 1) < l12_end &&
l12_[l1_pos + 1].l1() < rank) {
394 rank -=
l12_[l1_pos].l1();
396 while (l1_pos + 1 < l12_end &&
398 l12_[l1_pos + 1].l1() <
406#if defined(__x86_64__)
408 _mm_loadu_si128(
reinterpret_cast<__m128i const*
>(&
l12_[l1_pos]));
409 __m128i
const shuffle_mask = _mm_setr_epi8(10,
425 value = _mm_shuffle_epi8(value, shuffle_mask);
428 __m128i
const upper_values = _mm_srli_epi16(value, 4);
430 __m128i
const lower_mask = _mm_set1_epi16(uint16_t{0b0000111111111111});
433 __m128i
const lower_values = _mm_and_si128(value, lower_mask);
436 value = _mm_blend_epi16(upper_values, lower_values, 0b01010101);
443 value = _mm_insert_epi16(value, std::numeric_limits<int16_t>::max(), 4);
445 __m128i
const max_ones =
450 std::numeric_limits<int16_t>::max(),
455 value = _mm_sub_epi16(max_ones, value);
460 PASTA_ASSERT(rank <= std::numeric_limits<uint16_t>::max(),
461 "Rank is too large. This should not occur because in this "
462 "block the number of previous bits should reduce the "
463 "local rank further.");
464 __m128i
const cmp_value = _mm_set1_epi16(rank - 1);
468 __m128i cmp_result = _mm_cmpgt_epi16(value, cmp_value);
474 uint32_t
const result = _mm_movemask_epi8(cmp_result);
478 l2_pos = (16 - std::popcount(result)) / 2;
480 rank -=
l12_[l1_pos][l2_pos];
483 l12_[l1_pos][l2_pos]);
487 auto tmp =
l12_[l1_pos].data >> 32;
489 while (((tmp >> 12) & uint16_t(0b111111111111)) < rank && l2_pos < 7) {
493 rank -= (
l12_[l1_pos][l2_pos]);
496 ((tmp >> 12) & uint16_t(0b111111111111)) <
503 (
l12_[l1_pos][l2_pos]);
507 auto tmp =
l12_[l1_pos].data >> 44;
508 if (uint16_t
const mid = ((tmp >> 36) & uint16_t(0b111111111111));
510 if (uint16_t
const right = ((tmp >> 60) & uint16_t(0b111111111111));
512 if (uint16_t
const leaf = ((tmp >> 72) & uint16_t(0b111111111111));
521 if (uint16_t
const leaf = ((tmp >> 48) & uint16_t(0b111111111111));
531 if (uint16_t
const left = ((tmp >> 12) & uint16_t(0b111111111111));
533 if (uint16_t
const leaf = ((tmp >> 24) & uint16_t(0b111111111111));
542 if (uint16_t
const leaf = (tmp & uint16_t(0b111111111111));
550 auto tmp =
l12_[l1_pos].data >> 44;
552 ((tmp >> 36) & uint16_t(0b111111111111));
554 if (uint16_t
const right =
556 ((tmp >> 60) & uint16_t(0b111111111111));
558 if (uint16_t
const leaf =
560 ((tmp >> 72) & uint16_t(0b111111111111));
569 if (uint16_t
const leaf =
571 ((tmp >> 48) & uint16_t(0b111111111111));
581 if (uint16_t
const left =
583 ((tmp >> 12) & uint16_t(0b111111111111));
585 if (uint16_t
const leaf =
587 ((tmp >> 24) & uint16_t(0b111111111111));
596 if (uint16_t
const leaf =
598 (tmp & uint16_t(0b111111111111));
610 "Using unsupported search method for l2 entries");
628 [[nodiscard(
"space usage computed but not used")]]
size_t
630 return samples0_.size() *
sizeof(uint32_t) +
631 samples1_.size() *
sizeof(uint32_t) +
sizeof(*
this);
637 size_t const l12_end =
l12_.size();
638 size_t next_sample0_value = 1;
639 size_t next_sample1_value = 1;
640 for (
size_t l12_pos = 0; l12_pos < l12_end; ++l12_pos) {
643 l12_[l12_pos].l1() >=
644 next_sample0_value) {
645 samples0_.push_back(l12_pos - 1);
648 if (
l12_[l12_pos].l1() >= next_sample1_value) {
649 samples1_.push_back(l12_pos - 1);
653 if (
l12_[l12_pos].l1() >= next_sample0_value) {
654 samples0_.push_back(l12_pos - 1);
658 l12_[l12_pos].l1() >=
659 next_sample1_value) {
660 samples1_.push_back(l12_pos - 1);
666 if (samples0_.size() == 0) [[unlikely]] {
667 samples0_.push_back(0);
669 samples0_.push_back(samples0_.back());
671 if (samples1_.size() == 0) [[unlikely]] {
672 samples1_.push_back(0);
674 samples1_.push_back(samples1_.back());
Select support for BitVector that can be used as an alternative to RankSelect for bit vectors up to l...
Definition flat_rank_select.hpp:73
size_t select0(size_t rank) const
Get position of specific zero, i.e., select.
Definition flat_rank_select.hpp:128
FlatRankSelect(FlatRankSelect &&)=default
Default move constructor.
size_t space_usage() const final
Estimate for the space usage.
Definition flat_rank_select.hpp:629
size_t select1(size_t rank) const
Get position of specific one, i.e., select.
Definition flat_rank_select.hpp:384
FlatRankSelect(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank and select queries.
Definition flat_rank_select.hpp:109
FlatRankSelect()=default
Default constructor w/o parameter.
~FlatRankSelect()=default
Destructor. Deleting manually created arrays.
FlatRankSelect & operator=(FlatRankSelect &&)=default
Default move assignment.
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition flat_rank.hpp:82
tlx::SimpleVector< BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy > l12_
Definition flat_rank.hpp:97
BitVector::RawDataConstAccess data_access_
Definition flat_rank.hpp:95
size_t l12_end_
Definition flat_rank.hpp:99
uint64_t const * data_
Definition flat_rank.hpp:92
size_t data_size_
Definition flat_rank.hpp:89
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition optimized_for.hpp:40
constexpr bool use_intrinsics(FindL2FlatWith const find_with)
Helper function indicating whether intrinsic function should be used.
Definition find_l2_flat_with.hpp:70
constexpr bool use_linear_search(FindL2FlatWith const find_with)
Helper function indicating whether a linear search function should be used.
Definition find_l2_flat_with.hpp:48
FindL2FlatWith
Enum used to specify whether intrinsic functions should be used.
Definition find_l2_flat_with.hpp:35
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition optimized_for.hpp:59
constexpr bool use_binary_search(FindL2FlatWith const find_with)
Helper function indicating whether a binary search function should be used.
Definition find_l2_flat_with.hpp:59
@ DONT_CARE
It does not matter (both types are called equally often).
uint64_t popcount(uint64_t const *const buffer)
Compute popcount of a specific number of 64-bit words.
Definition popcount.hpp:41
uint64_t select(uint64_t x, uint64_t k)
Select set bit in 64-bit word and return its position starting from the LSB (starting from the left).
Definition select.hpp:144
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition flat_rank.hpp:50
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition flat_rank.hpp:44
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition flat_rank.hpp:42
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition flat_rank.hpp:57
static constexpr size_t L1_WORD_SIZE
Number of 64-bit words covered by an L1-block.
Definition flat_rank.hpp:52