Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to StatsBase v0.34 #59

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: CompatHelper
on:
schedule:
- cron: 0 0 * * *
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Check if Julia is already available in the PATH
id: julia_in_path
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ runner.arch }}
if: steps.julia_in_path.outcome != 'success'
- name: "Add the General registry via Git"
run: |
import Pkg
ENV["JULIA_PKG_SERVER"] = ""
Pkg.Registry.add("General")
shell: julia --color=yes {0}
- name: "Install CompatHelper"
run: |
import Pkg
name = "CompatHelper"
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
version = "3"
Pkg.add(; name, uuid, version)
shell: julia --color=yes {0}
- name: "Run CompatHelper"
run: |
import CompatHelper
CompatHelper.main()
shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "MLBase"
uuid = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
version = "0.9.1"
version = "0.9.2"

[deps]
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Expand All @@ -11,7 +11,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
[compat]
IterTools = "1"
Reexport = "1"
StatsBase = "0.33"
StatsBase = "0.33, 0.34"
julia = "1"

[extras]
Expand Down
1 change: 0 additions & 1 deletion src/MLBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ module MLBase
import Base: length, show, keys, precision, length, getindex
import Base: iterate
import Base.Order: lt, Ordering, ForwardOrdering, ReverseOrdering, Forward, Reverse
import StatsBase: RealVector, IntegerVector, RealMatrix, IntegerMatrix, RealArray
import IterTools: product

export
Expand Down
38 changes: 19 additions & 19 deletions src/classification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# classify

function classify(x::RealVector, ord::Ordering)
function classify(x::AbstractVector{<:Real}, ord::Ordering)
n = length(x)
v = x[1]
k::Int = 1
Expand All @@ -18,9 +18,9 @@ function classify(x::RealVector, ord::Ordering)
return k
end

classify(x::RealVector) = classify(x, Forward)
classify(x::AbstractVector{<:Real}) = classify(x, Forward)

function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering)
function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -30,15 +30,15 @@ function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering)
return r
end

classify!(r::IntegerVector, x::RealMatrix) = classify!(r, x, Forward)
classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}) = classify!(r, x, Forward)

# - this one throws a deprecation
classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord)
classify(x::RealMatrix) = classify(x, Forward)
classify(x::AbstractMatrix{<:Real}, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord)
classify(x::AbstractMatrix{<:Real}) = classify(x, Forward)

# classify with score(s)

function classify_withscore(x::RealVector, ord::Ordering)
function classify_withscore(x::AbstractVector{<:Real}, ord::Ordering)
n = length(x)
v = x[1]
k::Int = 1
Expand All @@ -52,9 +52,9 @@ function classify_withscore(x::RealVector, ord::Ordering)
return (k, v)
end

classify_withscore(x::RealVector) = classify_withscore(x, Forward)
classify_withscore(x::AbstractVector{<:Real}) = classify_withscore(x, Forward)

function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, ord::Ordering)
function classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -66,27 +66,27 @@ function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, or
return (r, s)
end

classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) =
classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}) =
classify_withscores!(r, s, x, Forward)

function classify_withscores(x::RealMatrix{T}, ord::Ordering) where T<:Real
function classify_withscores(x::AbstractMatrix{<:Real}{T}, ord::Ordering) where T<:Real
n = size(x, 2)
r = Array{Int}(undef, n)
s = Array{T}(undef, n)
return classify_withscores!(r, s, x, ord)
end

classify_withscores(x::RealMatrix{T}) where {T<:Real} = classify_withscores(x, Forward)
classify_withscores(x::AbstractMatrix{<:Real}{T}) where {T<:Real} = classify_withscores(x, Forward)


# classify with threshold

classify(x::RealVector, t::Real, ord::Ordering) =
classify(x::AbstractVector{<:Real}, t::Real, ord::Ordering) =
((k, v) = classify_withscore(x, ord); ifelse(lt(ord, v, t), 0, k))

classify(x::RealVector, t::Real) = classify(x, t, Forward)
classify(x::AbstractVector{<:Real}, t::Real) = classify(x, t, Forward)

function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering)
function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real, ord::Ordering)
m = size(x, 1)
n = size(x, 2)
length(r) == n || throw(DimensionMismatch("Mismatched length of r."))
Expand All @@ -96,10 +96,10 @@ function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering)
return r
end

classify!(r::IntegerVector, x::RealMatrix, t::Real) = classify!(r, x, t, Forward)
classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real) = classify!(r, x, t, Forward)

classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord)
classify(x::RealMatrix, t::Real) = classify(x, t, Forward)
classify(x::AbstractMatrix{<:Real}, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord)
classify(x::AbstractMatrix{<:Real}, t::Real) = classify(x, t, Forward)


## label map
Expand Down Expand Up @@ -154,7 +154,7 @@ labeldecode(lmap::LabelMap{T}, ys::AbstractArray{Int}) where {T} =

## group labels

function groupindices(k::Int, xs::IntegerVector; warning::Bool=true)
function groupindices(k::Int, xs::AbstractVector{<:Integer}; warning::Bool=true)
gs = Array{Vector{Int}}(undef, k)
for i = 1:k
gs[i] = Int[]
Expand Down
66 changes: 33 additions & 33 deletions src/perfeval.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

## correctrate & errorrate

correctrate(gt::IntegerVector, r::IntegerVector) = counteq(gt, r) / length(gt)
errorrate(gt::IntegerVector, r::IntegerVector) = countne(gt, r) / length(gt)
correctrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = counteq(gt, r) / length(gt)
errorrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = countne(gt, r) / length(gt)

## confusion matrix

function confusmat(k::Integer, gts::IntegerVector, preds::IntegerVector)
function confusmat(k::Integer, gts::AbstractVector{<:Integer}, preds::AbstractVector{<:Integer})
n = length(gts)
length(preds) == n || throw(DimensionMismatch("Inconsistent lengths."))
R = zeros(Int, k, k)
Expand All @@ -21,7 +21,7 @@ end

## counthits & hitrate

function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer)
function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer)
n = length(gt)
size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch."))
m = min(size(rklst, 1), Int(k))
Expand All @@ -40,7 +40,7 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer)
return cnt::Int
end

function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer})
n = length(gt)
size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch."))
issorted(ks) || throw(DimensionMismatch("ks must be sorted."))
Expand All @@ -67,10 +67,10 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
end


hitrate(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) =
hitrate(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer) =
(counthits(gt, rklst, k) / length(gt))::Float64

function hitrates(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
function hitrates(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer})
n = length(gt)
h = counthits(gt, rklst, ks)
nk = length(ks)
Expand Down Expand Up @@ -124,7 +124,7 @@ f1score(x::ROCNums) = (tp2 = x.tp + x.tp; tp2 / (tp2 + x.fp + x.fn) )
_ispos(x::Bool) = x
_ispos(x::Real) = x > zero(x)

function _roc(gt::IntegerVector, pr)
function _roc(gt::AbstractVector{<:Integer}, pr)
len = length(gt)
length(pr) == len || throw(DimensionMismatch("Inconsistent lengths."))

Expand Down Expand Up @@ -159,14 +159,14 @@ function _roc(gt::IntegerVector, pr)
end

# compute roc numbers based on prediction
roc(gt::IntegerVector, pr::IntegerVector) = _roc(gt, pr)
roc(gt::AbstractVector{<:Integer}, pr::AbstractVector{<:Integer}) = _roc(gt, pr)

##
# BinaryThresPredVec immutates a vector:
#
# v[i] := scores[i] < thres ? 0 : 1
#
struct BinaryThresPredVec{ScoreVec <: RealVector,
struct BinaryThresPredVec{ScoreVec <: AbstractVector{<:Real},
T <: Real,
Ord <: Ordering}
scores::ScoreVec
Expand All @@ -178,19 +178,19 @@ length(v::BinaryThresPredVec) = length(v.scores)
getindex(v::BinaryThresPredVec, i::Integer) = !lt(v.ord, v.scores[i], v.thres)

# compute roc numbers based on scores & threshold
roc(gt::IntegerVector, scores::RealVector, t::Real, ord::Ordering) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, t::Real, ord::Ordering) =
_roc(gt, BinaryThresPredVec(scores, t, ord))

roc(gt::IntegerVector, scores::RealVector, thres::Real) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thres::Real) =
roc(gt, scores, thres, Forward)

##
# ThresPredVec immutates a vector:
#
# v[i] := scores[i] < thres ? 0 : preds[i]
#
struct ThresPredVec{PredVec <: IntegerVector,
ScoreVec <: RealVector,
struct ThresPredVec{PredVec <: AbstractVector{<:Integer},
ScoreVec <: AbstractVector{<:Real},
T <: Real,
Ord <: Ordering}

Expand All @@ -201,7 +201,7 @@ struct ThresPredVec{PredVec <: IntegerVector,
end

function ThresPredVec(
preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:IntegerVector,SVec<:RealVector,T<:Real,Ord<:Ordering}
preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:AbstractVector{<:Integer},SVec<:AbstractVector{<:Real},T<:Real,Ord<:Ordering}
n = length(preds)
length(scores) == n || throw(DimensionMismatch("Inconsistent lengths."))
ThresPredVec{PVec,SVec,T,Ord}(preds, scores, thres, ord)
Expand All @@ -211,10 +211,10 @@ length(v::ThresPredVec) = length(v.preds)
getindex(v::ThresPredVec, i::Integer) = ifelse(lt(v.ord, v.scores[i], v.thres), 0, v.preds[i])

# compute roc numbers based on predictions & scores & threshold
roc(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} =
_roc(gt, ThresPredVec(preds..., t, ord))

roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVector,SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thres::Real) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} =
roc(gt, preds, thres, Forward)


Expand All @@ -226,7 +226,7 @@ roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVecto
# threshold[i] <= x < threshold[i+1] --> i+1
# x >= threshold[n] --> n+1
#
function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering)
function find_thresbin(x::Real, thresholds::AbstractVector{<:Real}, ord::Ordering)
n = length(thresholds)
r = 1
if !lt(ord, x, thresholds[1])
Expand All @@ -244,16 +244,16 @@ function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering)
return r::Int
end

find_thresbin(x::Real, thresholds::RealVector) = find_thresbin(x, thresholds, Forward)
find_thresbin(x::Real, thresholds::AbstractVector{<:Real}) = find_thresbin(x, thresholds, Forward)

lin_thresholds(scores::RealArray, n::Integer, ord::ForwardOrdering) =
lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ForwardOrdering) =
((s0, s1) = extrema(scores); intv = (s1 - s0) / (n-1); s0:intv:s1)

lin_thresholds(scores::RealArray, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
((s0, s1) = extrema(scores); intv = (s0 - s1) / (n-1); s1:intv:s0)

# roc for binary predictions
function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord::Ordering)
function roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}, ord::Ordering)
issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.")

ns = length(scores)
Expand Down Expand Up @@ -291,19 +291,19 @@ function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord:
return r
end

roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector) = roc(gt, scores, thresholds, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}) = roc(gt, scores, thresholds, Forward)

roc(gt::IntegerVector, scores::RealVector, n::Integer, ord::Ordering) =
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer, ord::Ordering) =
roc(gt, scores, lin_thresholds(scores, n, ord), ord)

roc(gt::IntegerVector, scores::RealVector, n::Integer) = roc(gt, scores, n, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer) = roc(gt, scores, n, Forward)

roc(gt::IntegerVector, scores::RealVector, ord::Ordering) = roc(gt, scores, 100, ord)
roc(gt::IntegerVector, scores::RealVector) = roc(gt, scores, Forward)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, ord::Ordering) = roc(gt, scores, 100, ord)
roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}) = roc(gt, scores, Forward)

# roc for multi-way predictions
function roc(
gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector}
gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}}

issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.")
pr::PV = preds[1]
Expand Down Expand Up @@ -354,17 +354,17 @@ function roc(
return r
end

roc(gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, thresholds, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, lin_thresholds(preds[2],n,ord), ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, n, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, 100, ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} =
roc(gt, preds, Forward)
Loading
Loading