From dc77ee28ca32ca3e02c993546a6fcc25e87c6edc Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 14 Mar 2020 20:57:39 -0700 Subject: [PATCH] Use ceiling division --- src/dreduce.jl | 8 ++++---- src/reduce.jl | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dreduce.jl b/src/dreduce.jl index 7bee309565..c7472222d3 100644 --- a/src/dreduce.jl +++ b/src/dreduce.jl @@ -19,11 +19,11 @@ See also: [Parallel processing tutorial](@ref tutorial-parallel), # Keyword Arguments - `pool::AbstractWorkerPool`: Passed to `Distributed.remotecall`. -- `basesize::Integer = length(array) ÷ nworkers()`: A size of chunk in +- `basesize::Integer = ⌈length(array) / nworkers()⌉`: A size of chunk in `array` that is processed by each worker. A smaller size may be required when computation time for processing each item can fluctuate a lot. -- `threads_basesize::Integer = basesize ÷ nthreads()`: A size of chunk +- `threads_basesize::Integer = ⌈basesize / nthreads()⌉`: A size of chunk in `array` that is processed by each task in each worker process. The default setting assumes that the number of threads used in all workers are the same. For heterogeneous setup where each worker @@ -50,8 +50,8 @@ See [`dreduce`](@ref) and [`transduce`](@ref). function dtransduce( xform::Transducer, step, init, coll; simd::SIMDFlag = Val(false), - basesize::Integer = max(1, length(coll) ÷ Distributed.nworkers()), - threads_basesize::Integer = max(1, basesize ÷ Threads.nthreads()), + basesize::Integer = max(1, cld(length(coll), Distributed.nworkers())), + threads_basesize::Integer = max(1, cld(basesize, Threads.nthreads())), pool::Distributed.AbstractWorkerPool = Distributed.default_worker_pool(), _remote_reduce = _transduce_assoc_nocomplete, ) diff --git a/src/reduce.jl b/src/reduce.jl index 1a1c5fa962..ef4bce22de 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -17,7 +17,7 @@ See also: [Parallel processing tutorial](@ref tutorial-parallel), [`foldl`](@ref), [`dreduce`](@ref). # Keyword Arguments -- `basesize::Integer = length(reducible) ÷ nthreads()`: A size of +- `basesize::Integer = ⌈length(reducible) / nthreads()⌉`: bA size of chunk in `reducible` that is processed by each worker. A smaller size may be required when: * computation time for processing each item fluctuates a lot @@ -135,7 +135,7 @@ function transduce_assoc( init, coll; simd::SIMDFlag = Val(false), - basesize::Integer = length(coll) ÷ Threads.nthreads(), + basesize::Integer = cld(length(coll), Threads.nthreads()), stoppable::Union{Bool,Nothing} = nothing, ) rf = maybe_usesimd(Reduction(xform, step), simd) @@ -424,7 +424,7 @@ function tcopy( ::typeof(Map(identity)), T::Type{<:AbstractSet}, array::PartitionableArray; - basesize::Integer = max(1, length(array) ÷ Threads.nthreads()), + basesize::Integer = max(1, cld(length(array), Threads.nthreads())), kwargs..., ) @argcheck basesize >= 1