pasta_bit_vector  1.0.1
Bit Vector with Compact and Fast Rank and Select Support
Loading...
Searching...
No Matches
flat_rank.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/bit_vector.hpp"
24#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
25#include "pasta/bit_vector/support/l12_type.hpp"
28
29#include <numeric>
30#include <pasta/utils/debug_asserts.hpp>
31#include <tlx/container/simple_vector.hpp>
32
33namespace pasta {
34
35/*!
36 * \ingroup pasta_bit_vector_configuration
37 * \brief Static configuration for \c FlatRank and
38 * \c FlatRankSelect
39 */
41 //! Bits covered by an L2-block.
42 static constexpr size_t L2_BIT_SIZE = 512;
43 //! Bits covered by an L1-block.
44 static constexpr size_t L1_BIT_SIZE = 8 * L2_BIT_SIZE;
45 //! Bits covered by an L0-block.
46 static constexpr size_t L0_BIT_SIZE =
47 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) + 1;
48
49 //! Number of 64-bit words covered by an L2-block.
50 static constexpr size_t L2_WORD_SIZE = L2_BIT_SIZE / (sizeof(uint64_t) * 8);
51 //! Number of 64-bit words covered by an L1-block.
52 static constexpr size_t L1_WORD_SIZE = L1_BIT_SIZE / (sizeof(uint64_t) * 8);
53 //! Number of 64-bit words covered by an L0-block.
54 static constexpr size_t L0_WORD_SIZE = L0_BIT_SIZE / (sizeof(uint64_t) * 8);
55
56 //! Sample rate of positions for faster select queries.
57 static constexpr size_t SELECT_SAMPLE_RATE = 8192;
58}; // struct FlatRankSelectConfig
59
60//! \addtogroup pasta_bit_vector_rank
61//! \{
62
63/*!
64 * \brief %Rank support for \ref BitVector that can be used as an alternative
65 * to \ref Rank for bit vectors up to length 2^40.
66 *
67 * The rank support is an extended and engineered version of the popcount rank
68 * support by Zhou et al. \cite ZhouAK2013PopcountRankSelect. This flat rank
69 * support hoverer removes the highest utility array (L0) and also groups
70 * twice as many L2-blocks in a single L1-block. This allows us to store more
71 * information---most importantly the number of ones w.r.t. the beginning of
72 * the L1-block---in the L2-blocks. For the L1/2-block layout see
73 * \ref BigL12Type.
74 *
75 * \tparam OptimizedFor Compile time option to optimize data structure for
76 * either 0, 1, or no specific type of query.
77 * \tparam VectorType Type of the vector the rank data structure is constructed
78 * for, e.g., plain \c BitVector or a compressed bit vector.
79 */
80template <OptimizedFor optimized_for = OptimizedFor::DONT_CARE,
81 typename VectorType = BitVector>
82class FlatRank {
83 //! Friend class, using internal information l12_.
84 template <OptimizedFor o, FindL2FlatWith f, typename v>
85 friend class FlatRankSelect;
86
87protected:
88 //! Size of the bit vector the rank support is constructed for.
89 size_t data_size_;
90 //! Pointer to the data of the uncompressed bit vector. Can be invalid in
91 //! after compressable bit vector is compressed.
92 uint64_t const* data_;
93 //! Pointer to the data of the bit vector. This is mutable to enable caching
94 //! in the compressed bit vector variant.
95 typename VectorType::RawDataConstAccess mutable data_access_;
96 //! Array containing the information about the L1- and L2-blocks.
97 tlx::SimpleVector<BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy> l12_;
98 //! Number of actual existing BigL12-blocks (important for scanning)
99 size_t l12_end_ = 0;
100
101public:
102 //! Default constructor w/o parameter.
103 FlatRank() = default;
104
105 /*!
106 * \brief Constructor. Creates the auxiliary information for efficient rank
107 * queries.
108 * \param bv Vector of \c VectorType the rank structure is created for.
109 */
110 FlatRank(VectorType& bv)
111 : data_size_(bv.data().size()),
112 data_(bv.data().data()),
113 data_access_(),
114 l12_((data_size_ / FlatRankSelectConfig::L1_WORD_SIZE) + 1) {
115 init();
116 if constexpr (!std::is_same_v<VectorType, BitVector>) {
117 bv.compress();
118 data_access_ = bv.compresed_data();
119 } else {
121 }
122 }
123
124 /*!
125 * \brief Computes rank of zeros.
126 * \param index Index the rank of zeros is computed for.
127 * \return Number of zeros (rank) before position \c index.
128 */
129 [[nodiscard("rank0 computed but not used")]] size_t
130 rank0(size_t index) const {
131 return index - rank1(index);
132 }
133
134 /*!
135 * \brief Computes rank of ones.
136 * \param index Index the rank of ones is computed for.
137 * \return Number of ones (rank) before position \c index.
138 */
139 [[nodiscard("rank1 computed but not used")]] size_t
140 rank1(size_t index) const {
141 size_t offset = ((index / 512) * 8);
142 size_t const l1_pos = index / FlatRankSelectConfig::L1_BIT_SIZE;
143 size_t const l2_pos = ((index % FlatRankSelectConfig::L1_BIT_SIZE) /
145 size_t result = l12_[l1_pos].l1() + l12_[l1_pos][l2_pos];
146
147 // It is faster to not have a specialized rank0 function when
148 // optimized for zero queries, because there is no popcount for
149 // zero equivalent and for all popcounts in this code, the words
150 // would have to be bit-wise negated, which is more expensive than
151 // the computation below.
152 if constexpr (!optimize_one_or_dont_care(optimized_for)) {
153 result = ((l1_pos * FlatRankSelectConfig::L1_BIT_SIZE) +
155 result;
156 }
157
159 PASTA_ASSERT(index < 512,
160 "Trying to access bits that should be "
161 "covered in an L1-block");
162 for (size_t i = 0; i < index / 64; ++i) {
163 result += std::popcount(data_access_[offset++]);
164 }
165 if (index %= 64; index > 0) [[likely]] {
166 uint64_t const remaining = (data_access_[offset]) << (64 - index);
167 result += std::popcount(remaining);
168 }
169 return result;
170 }
171
172 /*!
173 * \brief Estimate for the space usage.
174 * \return Number of bytes used by this data structure.
175 */
176 [[nodiscard("space useage computed but not used")]] virtual size_t
177 space_usage() const {
178 return l12_.size() * sizeof(BigL12Type) + sizeof(*this);
179 }
180
181private:
182 //! Function used for initializing data structure to reduce LOCs of
183 //! constructor.
184 void init() {
185 uint64_t const* data = data_;
186 uint64_t const* const data_end = data_ + data_size_;
187
188 uint64_t l1_entry = 0ULL;
189 std::array<uint16_t, 7> l2_entries = {0, 0, 0, 0, 0, 0, 0};
190
191 while (data + 64 < data_end) {
192 if constexpr (optimize_one_or_dont_care(optimized_for)) {
193 l2_entries[0] = popcount<8>(data);
194 } else {
195 l2_entries[0] = popcount_zeros<8>(data);
196 }
197 data += 8;
198 for (size_t i = 1; i < 7; ++i) {
199 if constexpr (optimize_one_or_dont_care(optimized_for)) {
200 l2_entries[i] = l2_entries[i - 1] + popcount<8>(data);
201 } else {
202 l2_entries[i] = l2_entries[i - 1] + popcount_zeros<8>(data);
203 }
204 data += 8;
205 }
206 l12_[l12_end_++] = BigL12Type(l1_entry, l2_entries);
207 if constexpr (optimize_one_or_dont_care(optimized_for)) {
208 l1_entry += l2_entries.back() + popcount<8>(data);
209 } else {
210 l1_entry += l2_entries.back() + popcount_zeros<8>(data);
211 }
212 data += 8;
213 }
214 size_t l2_pos = 0;
215 l2_entries = {0, 0, 0, 0, 0, 0, 0};
216 while (data + 8 < data_end) {
217 if constexpr (optimize_one_or_dont_care(optimized_for)) {
218 l2_entries[l2_pos++] = popcount<8>(data);
219 } else {
220 l2_entries[l2_pos++] = popcount_zeros<8>(data);
221 }
222 data += 8;
223 }
224 if (l2_pos < 7) {
225 while (data < data_end) {
226 if constexpr (optimize_one_or_dont_care(optimized_for)) {
227 l2_entries[l2_pos] += popcount<1>(data++);
228 } else {
229 l2_entries[l2_pos] += popcount_zeros<1>(data++);
230 }
231 }
232 }
233 std::partial_sum(l2_entries.begin(), l2_entries.end(), l2_entries.begin());
234 l12_[l12_end_++] = BigL12Type(l1_entry, l2_entries);
235 }
236}; // class FlatRank
237
238//! \}
239
240} // namespace pasta
241
242/******************************************************************************/
Uncompressed, highly tuned, fixed size bit vector.
Definition bit_vector.hpp:178
Select support for BitVector that can be used as an alternative to RankSelect for bit vectors up to l...
Definition flat_rank_select.hpp:73
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition flat_rank.hpp:82
virtual size_t space_usage() const
Estimate for the space usage.
Definition flat_rank.hpp:177
FlatRank()=default
Default constructor w/o parameter.
tlx::SimpleVector< BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy > l12_
Array containing the information about the L1- and L2-blocks.
Definition flat_rank.hpp:97
size_t rank1(size_t index) const
Computes rank of ones.
Definition flat_rank.hpp:140
VectorType::RawDataConstAccess data_access_
Definition flat_rank.hpp:95
size_t rank0(size_t index) const
Computes rank of zeros.
Definition flat_rank.hpp:130
FlatRank(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank queries.
Definition flat_rank.hpp:110
size_t l12_end_
Number of actual existing BigL12-blocks (important for scanning)
Definition flat_rank.hpp:99
uint64_t const * data_
Definition flat_rank.hpp:92
size_t data_size_
Size of the bit vector the rank support is constructed for.
Definition flat_rank.hpp:89
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition optimized_for.hpp:40
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition optimized_for.hpp:59
@ DONT_CARE
It does not matter (both types are called equally often).
Check that L12Type requires only 64 bits.
Definition l12_type.hpp:94
Static configuration for FlatRank and FlatRankSelect.
Definition flat_rank.hpp:40
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition flat_rank.hpp:50
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition flat_rank.hpp:44
static constexpr size_t L0_WORD_SIZE
Number of 64-bit words covered by an L0-block.
Definition flat_rank.hpp:54
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition flat_rank.hpp:42
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition flat_rank.hpp:57
static constexpr size_t L0_BIT_SIZE
Bits covered by an L0-block.
Definition flat_rank.hpp:46
static constexpr size_t L1_WORD_SIZE
Number of 64-bit words covered by an L1-block.
Definition flat_rank.hpp:52