Skip to content

Commit

Permalink
Optimize take kernel for BinaryViewArray and StringViewArray (#…
Browse files Browse the repository at this point in the history
…6168)

* improve speed of view take kernel

* ArrayData -> new_unchecked

* Update arrow-select/src/take.rs

Co-authored-by: Andrew Lamb <[email protected]>

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
a10y and alamb authored Aug 2, 2024
1 parent ede5a64 commit 0c3732f
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
9 changes: 4 additions & 5 deletions arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,11 +487,10 @@ fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
) -> Result<GenericByteViewArray<T>, ArrowError> {
let new_views = take_native(array.views(), indices);
let new_nulls = take_nulls(array.nulls(), indices);
Ok(GenericByteViewArray::new(
new_views,
array.data_buffers().to_vec(),
new_nulls,
))
// Safety: array.views was valid, and take_native copies only valid values, and verifies bounds
Ok(unsafe {
GenericByteViewArray::new_unchecked(new_views, array.data_buffers().to_vec(), new_nulls)
})
}

/// `take` implementation for list arrays
Expand Down
36 changes: 36 additions & 0 deletions arrow/benches/take_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,42 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(512, 0.0);
let indices = create_random_index(512, 0.0);
c.bench_function("take stringview 512", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(1024, 0.0);
let indices = create_random_index(1024, 0.0);
c.bench_function("take stringview 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(512, 0.0);
let indices = create_random_index(512, 0.5);
c.bench_function("take stringview null indices 512", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(1024, 0.0);
let indices = create_random_index(1024, 0.5);
c.bench_function("take stringview null indices 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(1024, 0.5);
let indices = create_random_index(1024, 0.0);
c.bench_function("take stringview null values 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_string_view_array(1024, 0.5);
let indices = create_random_index(1024, 0.5);
c.bench_function("take stringview null values null indices 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});

let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
let indices = create_random_index(1024, 0.0);
c.bench_function(
Expand Down
28 changes: 28 additions & 0 deletions arrow/src/util/bench_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,34 @@ pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
.collect()
}

/// Creates a random (but fixed-seeded) string view array of a given size and null density.
///
/// See `create_string_array` above for more details.
pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
create_string_view_array_with_max_len(size, null_density, 400)
}

/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
fn create_string_view_array_with_max_len(
size: usize,
null_density: f32,
max_str_len: usize,
) -> StringViewArray {
let rng = &mut seedable_rng();
(0..size)
.map(|_| {
if rng.gen::<f32>() < null_density {
None
} else {
let str_len = rng.gen_range(0..max_str_len);
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
let value = String::from_utf8(value).unwrap();
Some(value)
}
})
.collect()
}

/// Creates a random (but fixed-seeded) array of a given size, null density and length
pub fn create_string_view_array_with_len(
size: usize,
Expand Down

0 comments on commit 0c3732f

Please sign in to comment.