Skip to content

Commit

Permalink
optimize split_files by using drain.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rachelint committed Aug 6, 2024
1 parent b28adeb commit 49ca5cb
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,22 @@ pub fn split_files(

// effectively this is div with rounding up instead of truncating
let chunk_size = (partitioned_files.len() + n - 1) / n;
partitioned_files
.chunks_mut(chunk_size)
.map(|c| c.iter_mut().map(mem::take).collect())
.collect()
let mut chunks = Vec::with_capacity(n);
let mut current_chunk = Vec::with_capacity(chunk_size);
for file in partitioned_files.drain(..) {
current_chunk.push(file);
if current_chunk.len() == chunk_size {
let full_chunk =
mem::replace(&mut current_chunk, Vec::with_capacity(chunk_size));
chunks.push(full_chunk);
}
}

if !current_chunk.is_empty() {
chunks.push(current_chunk)
}

chunks
}

struct Partition {
Expand Down

0 comments on commit 49ca5cb

Please sign in to comment.