diff --git a/src/dreduce.jl b/src/dreduce.jl index 7bee309565..c7472222d3 100644 --- a/src/dreduce.jl +++ b/src/dreduce.jl @@ -19,11 +19,11 @@ See also: [Parallel processing tutorial](@ref tutorial-parallel), # Keyword Arguments - `pool::AbstractWorkerPool`: Passed to `Distributed.remotecall`. -- `basesize::Integer = length(array) ÷ nworkers()`: A size of chunk in +- `basesize::Integer = ⌈length(array) / nworkers()⌉`: A size of chunk in `array` that is processed by each worker. A smaller size may be required when computation time for processing each item can fluctuate a lot. -- `threads_basesize::Integer = basesize ÷ nthreads()`: A size of chunk +- `threads_basesize::Integer = ⌈basesize / nthreads()⌉`: A size of chunk in `array` that is processed by each task in each worker process. The default setting assumes that the number of threads used in all workers are the same. For heterogeneous setup where each worker @@ -50,8 +50,8 @@ See [`dreduce`](@ref) and [`transduce`](@ref). function dtransduce( xform::Transducer, step, init, coll; simd::SIMDFlag = Val(false), - basesize::Integer = max(1, length(coll) ÷ Distributed.nworkers()), - threads_basesize::Integer = max(1, basesize ÷ Threads.nthreads()), + basesize::Integer = max(1, cld(length(coll), Distributed.nworkers())), + threads_basesize::Integer = max(1, cld(basesize, Threads.nthreads())), pool::Distributed.AbstractWorkerPool = Distributed.default_worker_pool(), _remote_reduce = _transduce_assoc_nocomplete, ) diff --git a/src/reduce.jl b/src/reduce.jl index 1a1c5fa962..ef4bce22de 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -17,7 +17,7 @@ See also: [Parallel processing tutorial](@ref tutorial-parallel), [`foldl`](@ref), [`dreduce`](@ref). # Keyword Arguments -- `basesize::Integer = length(reducible) ÷ nthreads()`: A size of +- `basesize::Integer = ⌈length(reducible) / nthreads()⌉`: bA size of chunk in `reducible` that is processed by each worker. A smaller size may be required when: * computation time for processing each item fluctuates a lot @@ -135,7 +135,7 @@ function transduce_assoc( init, coll; simd::SIMDFlag = Val(false), - basesize::Integer = length(coll) ÷ Threads.nthreads(), + basesize::Integer = cld(length(coll), Threads.nthreads()), stoppable::Union{Bool,Nothing} = nothing, ) rf = maybe_usesimd(Reduction(xform, step), simd) @@ -424,7 +424,7 @@ function tcopy( ::typeof(Map(identity)), T::Type{<:AbstractSet}, array::PartitionableArray; - basesize::Integer = max(1, length(array) ÷ Threads.nthreads()), + basesize::Integer = max(1, cld(length(array), Threads.nthreads())), kwargs..., ) @argcheck basesize >= 1