pasta_bit_vector  1.0.1
Bit Vector with Compact and Fast Rank and Select Support
Loading...
Searching...
No Matches
bit_vector.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2019-2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
24#include "pasta/bit_vector/support/find_l2_wide_with.hpp"
26
27#include <algorithm>
28#include <cstddef>
29#include <cstdint>
30#include <iostream>
31#include <iterator>
32#include <span>
33#include <tlx/container/simple_vector.hpp>
34
35namespace pasta {
36
37/*!
38 * \brief Utility class used for the access operator of the \c BitVector.
39 *
40 * This utility class is created when the bit vector is accessed through the
41 * access operator. It cannot be created explicitly and cannot be assigned. It
42 * can only be cast to bool (for read access) or be assigned a bool (for write
43 * access).
44 */
45class BitAccess {
46 //! Forward declaration.
47 friend class BitVector;
48
49 //! 64-bit word the bit is contained in.
50 uint64_t* const data_;
51 //! Position of the bit within the 64-bit word.
52 size_t position_;
53
54public:
55 //! Deleted constructor.
56 BitAccess() = delete;
57 //! Avoiding implicit generation of the move constructor.
58 BitAccess(BitAccess&&) = delete;
59
60 //! Avoiding implicit copy assignment.
61 BitAccess& operator=(BitAccess const&) = delete;
62 //! Avoiding implicit move assignment.
64
65 /*!
66 * \brief Constructor setting 64-bit word and position of the bit in the
67 * word.
68 *
69 * This constructor can be used when a single bit in the bit vector has to
70 * be accessed and one does not want to extract the bit from the 64-bit word
71 * manually.
72 *
73 * \param data Pointer to the 64-bit word that contains the bit.
74 * \param position Position of the bit within the 64-bit word.
75 */
76 BitAccess(uint64_t* const data, size_t const position) noexcept
77 : data_(data),
78 position_(position) {}
79
80 /*!
81 * \brief User-defined conversion function to bool.
82 *
83 * Used for read access to a single bit.
84 *
85 */
86 operator bool() const noexcept {
87 uint8_t const offset = uint64_t(position_) & uint64_t(0b111111);
88 return (data_[position_ >> 6] >> offset) & 1ULL;
89 }
90
91 /*!
92 * \brief Assignment operator to set a bit within the bit vector.
93 * \param value Value the bit should be written to.
94 * \return This after the bit has been written.
95 */
96 BitAccess& operator=(bool const value) noexcept {
97 // https://graphics.stanford.edu/~seander/bithacks.html
98 // (ConditionalSetOrClearBitsWithoutBranching)
99 uint64_t const mask = 1ULL << (uint64_t(position_) & uint64_t(0b111111));
100 data_[position_ >> 6] = (data_[position_ >> 6] & ~mask) | (-value & mask);
101 return *this;
102 }
103
104 /*!
105 * \brief Computes the distance to another \c BitAccess instance.
106 *
107 * This function is only used within the iterator and usually should not be
108 * required in any other situation. It has **undefined** behavior if used
109 * for two instances from different \c BitVector.
110 * \param other The \c BitAccess instance the distance is computed to.
111 * \return The distance between \c this and the other \c BitAccess instance.
112 */
113 int64_t distance(BitAccess const& other) const noexcept {
114 return position_ - other.position_;
115 }
116
117 /*!
118 * \brief Comparison of two \c BitAccess instances for equality.
119 *
120 * This does not compare the value the \c BitAccess instance refers to but
121 * rather if the two instances refer to the same position in the same
122 * \c BitVector.
123 * \param a First \c BitAccess instance.
124 * \param b Second \c BitAccess instance.
125 * \return \c true if both instances refer to the same position in the same
126 * \c BitVector and \c false otherwise.
127 */
128 friend bool operator==(BitAccess const& a, BitAccess const& b) noexcept {
129 return a.position_ == b.position_ && a.data_ == b.data_;
130 }
131
132 /*!
133 * \brief Comparison of two \c BitAccess instances for inequality.
134 *
135 * This does not compare the value the \c BitAccess instance refers to but
136 * rather if the two instances refer to different position or to different
137 * \c BitVector.
138 * \param a First \c BitAccess instance.
139 * \param b Second \c BitAccess instance.
140 * \return \c true if both instances refer to different position or
141 * different \c BitVector and \c false otherwise.
142 */
143 friend bool operator!=(BitAccess const& a, BitAccess const& b) noexcept {
144 return a.position_ != b.position_ || a.data_ != b.data_;
145 }
146
147private:
148 /*!
149 * \brief The copy constructor is private and should not be used.
150 *
151 * We just use the copy constructor internally for the \c BitVector. There
152 * is no reason for the constructor to be called anywhere else.
153 */
154 BitAccess(BitAccess const&) = default;
155}; // class BitAccess
156
157//! \addtogroup pasta_bit_vector
158//! \{
159
160/*!\brief Uncompressed, highly tuned, fixed size bit vector
161 *
162 * The uncompressed bit vector can be used as a replacement for
163 * \c std::vector<bool>, when the size of the vector is known in advance. It
164 * provides an access operator.
165 *
166 * **Important:** If you plan on accessing the data directly, note that the
167 * bits are stored in reverse order in the 64-bit words. This saves an
168 * subtraction, when shifting the bits for access. To be more precise, the
169 * bits are stored as follows (for simplicity, we show it for 8-bit words):
170 *
171 * | 7 6 5 4 3 2 1 0 | 15 14 13 12 11 10 9 8 | 23 22 21 20 19 18 17 16 | ...
172 *
173 * \todo Add all required support to make bit vector work as a true (fixed
174 * replacement of \c std::vector<bool>).
175 * \todo Create dynamic sized bit vector (true replacement of
176 * \c std::vector<bool>).
177 */
179public:
180 //! Type that is used to store the raw data of the bit vector.
181 using RawDataType = uint64_t;
182 //! Pointer to the data type that is used to store the raw data of the bit
183 //! vector.
185 //! Type that can be used to access constant values of the raw data used to
186 //! represent the bit vector.
188
189private:
190 //! Size of the bit vector in bits.
191 size_t bit_size_ = 0;
192 //! Size of the underlying data used to store the bits.
193 size_t size_ = 0;
194 //! Array of 64-bit words used to store the content of the bit vector.
195 tlx::SimpleVector<RawDataType, tlx::SimpleVectorMode::NoInitNoDestroy> data_;
196 //! Pointer to the raw data of the bit vector.
197 RawDataPointer raw_data_ = nullptr;
198
199public:
200 /*!
201 * \brief Custom iterator for \c BitVector
202 */
203 struct Iterator {
204 //! Iterator category.
205 using iterator_category = std::forward_iterator_tag;
206 //! Difference type.
207 using differece_type = std::ptrdiff_t;
208 //! Value type is not bit but the \c BitAccess object used for access.
210 //! Pointer type.
212 //! Reference type.
214
215 /*!
216 * \brief Constructor. Creates Iterator pointing at a bit and holds
217 * pointer to data.
218 * \param data Pointer to the beginning of the \c BitVector.
219 * \param position Position the iterator is pointing at.
220 */
221 Iterator(uint64_t* const data, size_t const position) noexcept
222 : bit_access_(data, position) {}
223
224 /*!
225 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
226 * \return Reference to the bit the iterator points at.
227 */
228 reference operator*() noexcept {
229 return bit_access_;
230 }
231
232 /*!
233 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
234 * \return Pointer to the bit the iterator points at.
235 */
236 pointer operator->() noexcept {
237 return &bit_access_;
238 }
239
240 //! Prefix increment.
241 Iterator& operator++() noexcept {
242 ++bit_access_.position_;
243 return *this;
244 }
245
246 //! Postfix increment.
247 Iterator operator++(int32_t) noexcept {
248 auto tmp = *this;
249 ++(*this);
250 return tmp;
251 }
252
253 //! Iterator comparison equality.
254 friend bool operator==(Iterator const& a, Iterator const& b) noexcept {
255 return a.bit_access_ == b.bit_access_;
256 }
257
258 //! Iterator comparison inequality.
259 friend bool operator!=(Iterator const& a, Iterator const& b) noexcept {
260 return a.bit_access_ != b.bit_access_;
261 }
262
263 //! Iterator distance computation.
265 Iterator const& b) noexcept {
266 return a.bit_access_.distance(b.bit_access_);
267 }
268
269 private:
270 BitAccess bit_access_;
271 }; // struct BitVector::Iterator
272
273 //! Default empty constructor.
274 BitVector() = default;
275
276 //! Default move constructor.
277 BitVector(BitVector&&) = default;
278
279 //! Default copy constructor.
280 BitVector(BitVector const&) = default;
281
282 //! Deleted copy assignment, due to \c SimpleVector not supporting copy
283 //! assignment.
284 BitVector& operator=(BitVector const&) = delete;
285
286 //! Default move assignment.
288
289 /*!
290 * \brief Constructor. Creates a bit vector that holds a specific, fixed
291 * number of bits.
292 * \param size Number of bits the bit vector contains.
293 */
294 BitVector(size_t const size) noexcept
295 : bit_size_(size),
296 size_((bit_size_ >> 6) + 1),
297 data_(size_),
298 raw_data_(data_.data()) {}
299
300 /*!
301 * \brief Constructor. Creates a bit vector that holds a specific, fixed
302 * number of bits set to a default value.
303 * \param size Number of bits the bit vector contains.
304 * \param init_value Value all bits initially are set to. Either 0
305 * (\c false) or 1 (\c true).
306 */
307 BitVector(size_t const size, bool const init_value) noexcept
308 : BitVector(size) {
309 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
310 std::fill_n(raw_data_, size_, fill_value);
311 }
312
313 /*!
314 * \brief Access operator to read/write to a bit of the bit vector.
315 * \param index Index of the bit to be read/write to in the bit vector.
316 * \return \c BitAccess that allows to access to a single bit.
317 */
318 BitAccess operator[](size_t const index) noexcept {
319 return BitAccess(raw_data_, index);
320 }
321
322 /*!
323 * \brief Access operator to read to a bit of the bit vector.
324 * \param index Index of the bit to be read to in the bit vector.
325 * \return \c BitAccess that allows to read access to a single bit.
326 */
327 BitAccess operator[](size_t const index) const noexcept {
328 return BitAccess(raw_data_, index);
329 }
330
331 /*!
332 * \brief Resize the bit vector to contain \c size bits.
333 * \param size Number of bits the resized bit vector contains.
334 */
335 void resize(size_t const size) noexcept {
336 bit_size_ = size;
337 size_ = (bit_size_ >> 6) + 1;
338 data_.resize(size_);
339 raw_data_ = data_.data();
340 }
341
342 /*!
343 * \brief Resize the bit vector to contain \c size bits and fill new bits with
344 * default value. \param size Number of bits the resized bit vector contains.
345 * \param init_value Value all bits that are appended to the bit vector (if
346 * any) will have.
347 */
348 void resize(size_t const size, bool const init_value) noexcept {
349 size_t const old_bit_size = bit_size_;
350 bit_size_ = size;
351 size_ = (bit_size_ >> 6) + 1;
352 data_.resize(size_);
353 raw_data_ = data_.data();
354
355 if (old_bit_size < bit_size_) {
356 size_t max_bitwise = std::min(bit_size_, ((old_bit_size + 63) / 64) * 64);
357 for (size_t i = old_bit_size; i < max_bitwise; ++i) {
358 operator[](i) = init_value;
359 }
360 size_t const old_size = (old_bit_size + 63) / 64;
361 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
362 std::fill_n(raw_data_ + old_size, size_ - old_size, fill_value);
363 }
364 }
365
366 /*!
367 * \brief Get iterator representing the first element of the \c BitVector.
368 * \return Iterator representing the first element of the \c BitVector.
369 */
370 Iterator begin() noexcept {
371 return Iterator(raw_data_, 0);
372 }
373
374 /*!
375 * \brief Get iterator representing the end of the \c BitVector.
376 *
377 * The \c end() method returns an iterator that may refers to an invalid
378 * memory address. It should never be accessed directly.
379 * \return Iterator representing the end of the \c BitVector.
380 */
381 Iterator end() noexcept {
382 return Iterator(raw_data_, bit_size_);
383 }
384
385 /*!
386 * \brief Direct access to the raw data of the bit vector.
387 *
388 * Note that the raw data does not contain the bits from left to right. A
389 * detailed description can be found at the top of this file.
390 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
391 */
392 std::span<uint64_t> data() noexcept {
393 return std::span{raw_data_, size_};
394 }
395
396 /*!
397 * \brief Direct access to the raw data of the bit vector.
398 *
399 * Note that the raw data does not contain the bits from left to right. A
400 * detailed description can be found at the top of this file.
401 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
402 */
403 std::span<uint64_t> data() const noexcept {
404 return std::span{raw_data_, size_};
405 }
406
407 /*!
408 * \brief Direct access to one 64-bit element of the raw data of the bit
409 * vector.
410 *
411 * Note that the raw data does not contain the bits from left to right. A
412 * detailed description can be found at the top of this file.
413 * \param index Index of the 64-bit bit word that should be returned.
414 * \return index-th 64-bit word of the raw bit vector data.
415 */
416 uint64_t data(size_t const index) const noexcept {
417 return raw_data_[index];
418 }
419
420 /*!
421 * \brief Estimate for the space usage.
422 * \return Number of bytes used by this data structure.
423 */
424 [[nodiscard("space usage computed but not used")]] size_t
425 space_usage() const {
426 return (data_.size() * sizeof(RawDataType)) + sizeof(*this);
427 }
428
429 /*!
430 * \brief Get the size of the bit vector in bits.
431 * \return Size of the bit vector in bits.
432 */
433 size_t size() const noexcept {
434 return bit_size_;
435 }
436
437 //! formatted output of the \c BitVector
438 friend std::ostream& operator<<(std::ostream& os, BitVector const& bv) {
439 for (size_t i = 0; i < bv.bit_size_; ++i) {
440 os << (bv[i] ? "1" : "0");
441 }
442 return os;
443 }
444
445}; // class BitVector
446
447//! \}
448
449} // namespace pasta
450
451/******************************************************************************/
Utility class used for the access operator of the BitVector.
Definition bit_vector.hpp:45
BitAccess(BitAccess &&)=delete
Avoiding implicit generation of the move constructor.
int64_t distance(BitAccess const &other) const noexcept
Computes the distance to another BitAccess instance.
Definition bit_vector.hpp:113
BitAccess & operator=(BitAccess const &)=delete
Avoiding implicit copy assignment.
BitAccess & operator=(bool const value) noexcept
Assignment operator to set a bit within the bit vector.
Definition bit_vector.hpp:96
BitAccess()=delete
Deleted constructor.
BitAccess & operator=(BitAccess &&)=delete
Avoiding implicit move assignment.
friend bool operator==(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for equality.
Definition bit_vector.hpp:128
BitAccess(uint64_t *const data, size_t const position) noexcept
Constructor setting 64-bit word and position of the bit in the word.
Definition bit_vector.hpp:76
friend bool operator!=(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for inequality.
Definition bit_vector.hpp:143
Uncompressed, highly tuned, fixed size bit vector.
Definition bit_vector.hpp:178
Iterator begin() noexcept
Get iterator representing the first element of the BitVector.
Definition bit_vector.hpp:370
BitAccess operator[](size_t const index) const noexcept
Access operator to read to a bit of the bit vector.
Definition bit_vector.hpp:327
BitVector & operator=(BitVector const &)=delete
uint64_t RawDataType
Type that is used to store the raw data of the bit vector.
Definition bit_vector.hpp:181
BitVector()=default
Default empty constructor.
RawDataType const * RawDataConstAccess
Definition bit_vector.hpp:187
friend std::ostream & operator<<(std::ostream &os, BitVector const &bv)
formatted output of the BitVector
Definition bit_vector.hpp:438
std::span< uint64_t > data() const noexcept
Direct access to the raw data of the bit vector.
Definition bit_vector.hpp:403
RawDataType * RawDataPointer
Definition bit_vector.hpp:184
size_t size() const noexcept
Get the size of the bit vector in bits.
Definition bit_vector.hpp:433
BitVector(BitVector &&)=default
Default move constructor.
BitVector(size_t const size, bool const init_value) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits set to a default value.
Definition bit_vector.hpp:307
uint64_t data(size_t const index) const noexcept
Direct access to one 64-bit element of the raw data of the bit vector.
Definition bit_vector.hpp:416
BitVector(BitVector const &)=default
Default copy constructor.
Iterator end() noexcept
Get iterator representing the end of the BitVector.
Definition bit_vector.hpp:381
void resize(size_t const size) noexcept
Resize the bit vector to contain size bits.
Definition bit_vector.hpp:335
std::span< uint64_t > data() noexcept
Direct access to the raw data of the bit vector.
Definition bit_vector.hpp:392
void resize(size_t const size, bool const init_value) noexcept
Resize the bit vector to contain size bits and fill new bits with default value.
Definition bit_vector.hpp:348
BitVector & operator=(BitVector &&)=default
Default move assignment.
size_t space_usage() const
Estimate for the space usage.
Definition bit_vector.hpp:425
BitVector(size_t const size) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits.
Definition bit_vector.hpp:294
BitAccess operator[](size_t const index) noexcept
Access operator to read/write to a bit of the bit vector.
Definition bit_vector.hpp:318
Custom iterator for BitVector.
Definition bit_vector.hpp:203
friend bool operator==(Iterator const &a, Iterator const &b) noexcept
Iterator comparison equality.
Definition bit_vector.hpp:254
Iterator operator++(int32_t) noexcept
Postfix increment.
Definition bit_vector.hpp:247
Iterator & operator++() noexcept
Prefix increment.
Definition bit_vector.hpp:241
std::forward_iterator_tag iterator_category
Iterator category.
Definition bit_vector.hpp:205
Iterator(uint64_t *const data, size_t const position) noexcept
Constructor. Creates Iterator pointing at a bit and holds pointer to data.
Definition bit_vector.hpp:221
friend bool operator!=(Iterator const &a, Iterator const &b) noexcept
Iterator comparison inequality.
Definition bit_vector.hpp:259
pointer operator->() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition bit_vector.hpp:236
friend differece_type operator-(Iterator const &a, Iterator const &b) noexcept
Iterator distance computation.
Definition bit_vector.hpp:264
std::ptrdiff_t differece_type
Difference type.
Definition bit_vector.hpp:207
reference operator*() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition bit_vector.hpp:228