Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scalar optimizations from CRoaring / arXiv:1709.07821 section 3 #127

Merged
merged 10 commits into from
Jan 12, 2022
49 changes: 22 additions & 27 deletions src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ const ARRAY_LIMIT: u64 = 4096;
#[derive(PartialEq, Clone)]
pub struct Container {
pub key: u16,
pub len: u64,
pub store: Store,
}

Expand All @@ -22,14 +21,17 @@ pub struct Iter<'a> {

impl Container {
pub fn new(key: u16) -> Container {
Container { key, len: 0, store: Store::Array(Vec::new()) }
Container { key, store: Store::new() }
}
}

impl Container {
pub fn len(&self) -> u64 {
self.store.len()
}

pub fn insert(&mut self, index: u16) -> bool {
if self.store.insert(index) {
self.len += 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -39,7 +41,6 @@ impl Container {

pub fn insert_range(&mut self, range: RangeInclusive<u16>) -> u64 {
let inserted = self.store.insert_range(range);
self.len += inserted;
self.ensure_correct_store();
inserted
}
Expand All @@ -49,7 +50,6 @@ impl Container {
/// Returns whether the `index` was effectively pushed.
pub fn push(&mut self, index: u16) -> bool {
if self.store.push(index) {
self.len += 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -59,7 +59,6 @@ impl Container {

pub fn remove(&mut self, index: u16) -> bool {
if self.store.remove(index) {
self.len -= 1;
self.ensure_correct_store();
true
} else {
Expand All @@ -69,7 +68,6 @@ impl Container {

pub fn remove_range(&mut self, range: RangeInclusive<u16>) -> u64 {
let result = self.store.remove_range(range);
self.len -= result;
self.ensure_correct_store();
result
}
Expand All @@ -83,7 +81,7 @@ impl Container {
}

pub fn is_subset(&self, other: &Self) -> bool {
self.len <= other.len && self.store.is_subset(&other.store)
self.len() <= other.len() && self.store.is_subset(&other.store)
}

pub fn min(&self) -> Option<u16> {
Expand All @@ -95,14 +93,18 @@ impl Container {
}

fn ensure_correct_store(&mut self) {
let new_store = match (&self.store, self.len) {
(store @ &Store::Bitmap(..), len) if len <= ARRAY_LIMIT => Some(store.to_array()),
(store @ &Store::Array(..), len) if len > ARRAY_LIMIT => Some(store.to_bitmap()),
_ => None,
match &self.store {
Store::Bitmap(ref bits) => {
if bits.len() <= ARRAY_LIMIT {
self.store = Store::Array(bits.to_array_store())
}
}
Store::Array(ref vec) => {
if vec.len() as u64 > ARRAY_LIMIT {
self.store = Store::Bitmap(vec.to_bitmap_store())
}
}
};
if let Some(new_store) = new_store {
self.store = new_store;
}
}
}

Expand All @@ -111,7 +113,7 @@ impl BitOr<&Container> for &Container {

fn bitor(self, rhs: &Container) -> Container {
let store = BitOr::bitor(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -120,15 +122,13 @@ impl BitOr<&Container> for &Container {
impl BitOrAssign<Container> for Container {
fn bitor_assign(&mut self, rhs: Container) {
BitOrAssign::bitor_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitOrAssign<&Container> for Container {
fn bitor_assign(&mut self, rhs: &Container) {
BitOrAssign::bitor_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -138,7 +138,7 @@ impl BitAnd<&Container> for &Container {

fn bitand(self, rhs: &Container) -> Container {
let store = BitAnd::bitand(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -147,15 +147,13 @@ impl BitAnd<&Container> for &Container {
impl BitAndAssign<Container> for Container {
fn bitand_assign(&mut self, rhs: Container) {
BitAndAssign::bitand_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitAndAssign<&Container> for Container {
fn bitand_assign(&mut self, rhs: &Container) {
BitAndAssign::bitand_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -165,7 +163,7 @@ impl Sub<&Container> for &Container {

fn sub(self, rhs: &Container) -> Container {
let store = Sub::sub(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -174,7 +172,6 @@ impl Sub<&Container> for &Container {
impl SubAssign<&Container> for Container {
fn sub_assign(&mut self, rhs: &Container) {
SubAssign::sub_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand All @@ -184,7 +181,7 @@ impl BitXor<&Container> for &Container {

fn bitxor(self, rhs: &Container) -> Container {
let store = BitXor::bitxor(&self.store, &rhs.store);
let mut container = Container { key: self.key, len: store.len(), store };
let mut container = Container { key: self.key, store };
container.ensure_correct_store();
container
}
Expand All @@ -193,15 +190,13 @@ impl BitXor<&Container> for &Container {
impl BitXorAssign<Container> for Container {
fn bitxor_assign(&mut self, rhs: Container) {
BitXorAssign::bitxor_assign(&mut self.store, rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}

impl BitXorAssign<&Container> for Container {
fn bitxor_assign(&mut self, rhs: &Container) {
BitXorAssign::bitxor_assign(&mut self.store, &rhs.store);
self.len = self.store.len();
self.ensure_correct_store();
}
}
Expand Down Expand Up @@ -236,6 +231,6 @@ impl<'a> Iterator for Iter<'a> {

impl fmt::Debug for Container {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
format!("Container<{:?} @ {:?}>", self.len, self.key).fmt(formatter)
format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter)
}
}
8 changes: 4 additions & 4 deletions src/bitmap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl RoaringBitmap {
match self.containers.binary_search_by_key(&key, |c| c.key) {
Ok(loc) => {
if self.containers[loc].remove(index) {
if self.containers[loc].len == 0 {
if self.containers[loc].len() == 0 {
self.containers.remove(loc);
}
true
Expand Down Expand Up @@ -214,7 +214,7 @@ impl RoaringBitmap {
let a = if key == start_container_key { start_index } else { 0 };
let b = if key == end_container_key { end_index } else { u16::MAX };
removed += self.containers[index].remove_range(a..=b);
if self.containers[index].len == 0 {
if self.containers[index].len() == 0 {
self.containers.remove(index);
continue;
}
Expand Down Expand Up @@ -297,7 +297,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.len(), 2);
/// ```
pub fn len(&self) -> u64 {
self.containers.iter().map(|container| container.len).sum()
self.containers.iter().map(|container| container.len()).sum()
}

/// Returns the minimum value in the set (if the set is non-empty).
Expand Down Expand Up @@ -447,7 +447,7 @@ mod tests {
let inserted = b.insert_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1);
assert_eq!(inserted, 1);

assert_eq!(b.containers[0].len, 1);
assert_eq!(b.containers[0].len(), 1);
assert_eq!(b.containers.len(), 1);

let removed = b.remove_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1);
Expand Down
4 changes: 2 additions & 2 deletions src/bitmap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ pub struct IntoIter {

impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
let size_hint = containers.iter().map(|c| c.len).sum();
let size_hint = containers.iter().map(|c| c.len()).sum();
Iter { inner: containers.iter().flat_map(identity as _), size_hint }
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
let size_hint = containers.iter().map(|c| c.len).sum();
let size_hint = containers.iter().map(|c| c.len()).sum();
IntoIter { inner: containers.into_iter().flat_map(identity as _), size_hint }
}
}
Expand Down
16 changes: 8 additions & 8 deletions src/bitmap/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap {
for pair in Pairs::new(&self.containers, &rhs.containers) {
if let (Some(lhs), Some(rhs)) = pair {
let container = BitAnd::bitand(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -301,7 +301,7 @@ impl BitAndAssign<RoaringBitmap> for RoaringBitmap {
let rhs_cont = &mut rhs.containers[loc];
let rhs_cont = mem::replace(rhs_cont, Container::new(rhs_cont.key));
BitAndAssign::bitand_assign(cont, rhs_cont);
cont.len != 0
cont.len() != 0
}
Err(_) => false,
}
Expand All @@ -317,7 +317,7 @@ impl BitAndAssign<&RoaringBitmap> for RoaringBitmap {
match rhs.containers.binary_search_by_key(&key, |c| c.key) {
Ok(loc) => {
BitAndAssign::bitand_assign(cont, &rhs.containers[loc]);
cont.len != 0
cont.len() != 0
}
Err(_) => false,
}
Expand Down Expand Up @@ -367,7 +367,7 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap {
(None, Some(_)) => (),
(Some(lhs), Some(rhs)) => {
let container = Sub::sub(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -393,7 +393,7 @@ impl SubAssign<&RoaringBitmap> for RoaringBitmap {
match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) {
Ok(loc) => {
SubAssign::sub_assign(cont, &rhs.containers[loc]);
cont.len != 0
cont.len() != 0
}
Err(_) => true,
}
Expand Down Expand Up @@ -443,7 +443,7 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap {
(None, Some(rhs)) => containers.push(rhs.clone()),
(Some(lhs), Some(rhs)) => {
let container = BitXor::bitxor(lhs, rhs);
if container.len != 0 {
if container.len() != 0 {
containers.push(container);
}
}
Expand All @@ -462,7 +462,7 @@ impl BitXorAssign<RoaringBitmap> for RoaringBitmap {
match pair {
(Some(mut lhs), Some(rhs)) => {
BitXorAssign::bitxor_assign(&mut lhs, rhs);
if lhs.len != 0 {
if lhs.len() != 0 {
self.containers.push(lhs);
}
}
Expand All @@ -481,7 +481,7 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap {
match pair {
(Some(mut lhs), Some(rhs)) => {
BitXorAssign::bitxor_assign(&mut lhs, rhs);
if lhs.len != 0 {
if lhs.len() != 0 {
self.containers.push(lhs);
}
}
Expand Down
18 changes: 9 additions & 9 deletions src/bitmap/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::io;

use super::container::Container;
use super::store::Store;
use crate::bitmap::store::{ArrayStore, BitmapStore, Store};
use crate::RoaringBitmap;

const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
Expand Down Expand Up @@ -32,7 +32,7 @@ impl RoaringBitmap {
.containers
.iter()
.map(|container| match container.store {
Store::Array(ref values) => 8 + values.len() * 2,
Store::Array(ref values) => 8 + values.len() as usize * 2,
Store::Bitmap(..) => 8 + 8 * 1024,
})
.sum();
Expand Down Expand Up @@ -64,7 +64,7 @@ impl RoaringBitmap {

for container in &self.containers {
writer.write_u16::<LittleEndian>(container.key)?;
writer.write_u16::<LittleEndian>((container.len - 1) as u16)?;
writer.write_u16::<LittleEndian>((container.len() - 1) as u16)?;
}

let mut offset = 8 + 8 * self.containers.len() as u32;
Expand All @@ -83,12 +83,12 @@ impl RoaringBitmap {
for container in &self.containers {
match container.store {
Store::Array(ref values) => {
for &value in values {
for &value in values.iter() {
writer.write_u16::<LittleEndian>(value)?;
}
}
Store::Bitmap(ref values) => {
for &value in values.iter() {
Store::Bitmap(ref bits) => {
for &value in bits.as_array() {
writer.write_u64::<LittleEndian>(value)?;
}
}
Expand Down Expand Up @@ -152,15 +152,15 @@ impl RoaringBitmap {
let mut values = vec![0; len as usize];
reader.read_exact(cast_slice_mut(&mut values))?;
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
Store::Array(values)
Store::Array(ArrayStore::from_vec_unchecked(values))
} else {
let mut values = Box::new([0; 1024]);
reader.read_exact(cast_slice_mut(&mut values[..]))?;
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
Store::Bitmap(values)
Store::Bitmap(BitmapStore::from_unchecked(len, values))
};

containers.push(Container { key, len, store });
containers.push(Container { key, store });
}

Ok(RoaringBitmap { containers })
Expand Down
Loading