Add methods for calling multi-response gaussian family (#66)

Fixes #65
JuliaStats · Sep 10, 2024 · 4167f0c · 4167f0c · JackDunnNZ · Sep 10, 2024
1 parent 47da7a0
commit 4167f0c
Show file tree

Hide file tree

Showing 19 changed files with 1,111 additions and 10 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "GLMNet"
 uuid = "8d5ece8b-de18-5317-b113-243142960cc6"
-version = "0.7.2"
+version = "0.7.3"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -22,6 +22,8 @@ julia = "1.6"
 [extras]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+
 
 [targets]
-test = ["Test", "CategoricalArrays"]
+test = ["Test", "CategoricalArrays", "CSV"]
diff --git a/README.md b/README.md
@@ -145,7 +145,7 @@ julia> vline!([lambdamin(iris_cv)])
 
 ## Fitting models
 
-`glmnet` has two required parameters: the n x m predictor matrix `X` and the dependent variable `y`. It additionally accepts an optional third argument, `family`, which can be used to specify a generalized linear model. Currently, `Normal()` (least squares, default), `Binomial()` (logistic), `Poisson()` , `Multinomial()`, `CoxPH()` (Cox model) are supported. 
+`glmnet` has two required parameters: the n x m predictor matrix `X` and the dependent variable `y`. It additionally accepts an optional third argument, `family`, which can be used to specify a generalized linear model. Currently, `Normal()` (least squares, default), `MvNormal()` Multi-response gaussian, `Binomial()` (logistic), `Poisson()` , `Multinomial()`, `CoxPH()` (Cox model) are supported. 
 
 - For linear and Poisson models, `y` is a numerical vector of length n.
 - For logistic models, `y` is either a string vector of length n or a n x 2 matrix, where the first column is the count of negative responses for each row in `X` and the second column is the count of positive responses. 
@@ -166,6 +166,7 @@ julia> vline!([lambdamin(iris_cv)])
  - `lambda`: The λ values to consider. By default, this is determined from `nlambda` and `lambda_min_ratio`.
  - `tol`: Convergence criterion, with the default value of `1e-7`.
  - `standardize`: Whether to standardize predictors so that they are in the same units, with the default value of `true`. Beta values are always presented on the original scale.
+ - `standardize_response`: (only for `MvNormal()`), Whether to standardize the response variables so that they are the same units. defaults to `false`.
  - `intercept`: Whether to fit an intercept term. The intercept is always unpenalized. The default value is `true`.
  - `maxit`: The maximum number of iterations of the cyclic coordinate descent algorithm. If convergence is not achieved, a warning is returned. The default value is `1e6`.
 

diff --git a/src/GLMNet.jl b/src/GLMNet.jl
@@ -235,7 +235,7 @@ macro validate_and_init()
         if !isempty(lambda)
             # user-specified lambda values
             nlambda == 100 || error("cannot specify both lambda and nlambda")
-            lambda_min_ratio == (length(y) < size(X, 2) ? 1e-2 : 1e-4) ||
+            lambda_min_ratio == (size(y, 1) < size(X, 2) ? 1e-2 : 1e-4) ||
                 error("cannot specify both lambda and lambda_min_ratio")
             nlambda = length(lambda)
             lambda_min_ratio = 2.0
@@ -514,6 +514,7 @@ function show(io::IO, cv::GLMNetCrossValidation)
 end
 
 include("Multinomial.jl")
+include("MultiResponseNet.jl")
 include("CoxNet.jl")
 
 function glmnetcv(X::AbstractMatrix, y::Union{AbstractVector{<:Number},AbstractMatrix{<:Number}},
@@ -534,10 +535,10 @@ function glmnetcv(X::AbstractMatrix, y::Union{AbstractVector{<:Number},AbstractM
         y = convert(Array{Float64}, y)
     end
     # Fit full model once to determine parameters
-    offsets = (offsets != nothing) ? offsets : isa(family, Multinomial) ?  y*0.0 : zeros(size(X, 1))
+    offsets = (offsets != nothing) ? offsets : isa(family, Union{Multinomial, MvNormal}) ?  y*0.0 : zeros(size(X, 1))
 
 
-    if isa(family, Normal)
+    if isa(family, Union{Normal, MvNormal})
         path = glmnet(X, y, family; weights = weights, kw...)
     else
         path = glmnet(X, y, family; weights = weights, offsets = offsets, kw...)
@@ -560,7 +561,7 @@ function glmnetcv(X::AbstractMatrix, y::Union{AbstractVector{<:Number},AbstractM
         f = folds .== i
         holdoutidx = findall(f)
         modelidx = findall(!,f)
-        if isa(family, Normal)
+        if isa(family, Union{Normal, MvNormal})
             g = glmnet!(X[modelidx, :], isa(y, AbstractVector) ? y[modelidx] : y[modelidx, :], family;
                         weights=weights[modelidx], lambda=path.lambda, kw...)
         else

diff --git a/src/MultiResponseNet.jl b/src/MultiResponseNet.jl
@@ -0,0 +1,188 @@
+import Distributions.MvNormal
+
+MvNormal() = MvNormal([0, 0], [1 0; 0 1]) # MvNormal(0, ([1;]))
+modeltype(::MvNormal) = "MvNormal"
+
+
+function predict(path::GLMNetPath{<:MvNormal}, X::AbstractMatrix,
+    model::Union{Int, AbstractVector{Int}}=1:length(path.lambda); 
+    outtype = :link, offsets = zeros(size(X, 1), size(path.betas, 2)))
+
+    nresp = size(path.betas, 2);
+    out = zeros(Float64, size(X, 1), nresp, length(model));
+    for i = 1:length(model)
+       out[:, :, i] = repeat(path.a0[:,model[i]]', size(X, 1)) + X * path.betas[:, :, model[i]] + offsets
+    end
+    if outtype != :link
+       for i = 1:size(X, 1), j = 1:length(model)
+           out = exp.(out)
+       end
+    end
+    if length(model) == 1
+       return out[:, :, 1]
+    else
+       return out
+    end
+end
+
+nactive(g::GLMNetPath{<:MvNormal}, b::AbstractVector{Int}=1:size(g.betas, 3)) =
+    [nactive(g.betas, j, dims=2) for j in b]
+
+function show(io::IO, g::GLMNetPath{<:MvNormal})
+    println(io, "$(modeltype(g.family)) GLMNet Solution Path ($(size(g.betas, 3)) solutions for $(size(g.betas, 1)) predictors in $(g.npasses) passes):")
+    print(io, DataFrame(df=nactive(g), pct_dev=g.dev_ratio, λ=g.lambda))
+end
+
+struct MultiMSE <: Loss
+    y::Matrix{Float64}
+end
+loss(l::MultiMSE, i, mu) = sum(abs2.(l.y[i,:] .- mu))
+
+devloss(::MvNormal, y) = MultiMSE(y)
+
+function loss(path::GLMNetPath{<:MvNormal}, X::AbstractMatrix{Float64},
+    y::Union{AbstractVector{Float64}, AbstractMatrix{Float64}},
+    weights::AbstractVector{Float64}=ones(size(y,1)),
+    lossfun::Loss=devloss(path.family, y),
+    model::Union{Int, AbstractVector{Int}}=1:length(path.lambda);
+    offsets = zeros(size(X, 1), size(path.betas, 2)))
+
+    validate_x_y_weights(X, y, weights)
+    mu = predict(path, X; offsets = offsets)
+    devs = zeros(size(mu, 3))
+    for j = 1:size(mu, 3), i = 1:size(mu, 1)
+        devs[j] += loss(lossfun, i, vec(mu[i, :, j]))*weights[i]
+    end
+    devs/sum(weights)
+end
+
+loss(path::GLMNetPath{<:MvNormal}, X::AbstractMatrix, y::Union{AbstractVector, AbstractMatrix}, 
+        weights::AbstractVector=ones(size(y,1)), va...; kw...) =
+    loss(path, convert(Matrix{Float64}, X), 
+        convert(Array{Float64}, y),
+        convert(Vector{Float64}, weights), va...; kw...)
+
+
+
+macro check_and_return_mvnormal()
+    esc(quote
+        check_jerr(jerr[], maxit,pmax)
+        lmu = lmu_ref[]
+        # first lambda is infinity; changed to entry point
+        if isempty(lambda) && length(alm) > 2
+            alm[1] = exp(2*log(alm[2])-log(alm[3]))
+        end
+        GLMNetPath(family, a0[:, 1:lmu], ca[sortperm(ia), :, 1:lmu],
+                    null_dev[], fdev[1:lmu], alm[1:lmu], Int(nlp[]))
+    end)
+end
+
+# change of parameters from elnet to multelnet
+# ka,parm,no,ni,nr, x,y,w,jd, vp,cl,ne,nx, nlam,flmin,ulam,thr, isd,   ,intr,maxit,lmu, a0,ca,ia,nin, rsq,alm,nlp,jerr
+#    parm,no,ni,nr, x,y,w,jd, vp,cl,ne,nx, nlam,flmin,ulam,thr, isd,jsd,intr,maxit,lmu, a0,ca,ia,nin, rsq,alm,nlp,jerr
+#  multi-response normal 
+function glmnet!(X::Matrix{Float64}, y::Matrix{Float64},
+    family::MvNormal=MvNormal();
+    weights::Vector{Float64}=ones(size(y,1)),
+    naivealgorithm::Bool=(size(X, 2) >= 500), alpha::Real=1.0,
+    penalty_factor::Vector{Float64}=ones(size(X, 2)),
+    constraints::Array{Float64, 2}=[x for x in (-Inf, Inf), y in 1:size(X, 2)],
+    dfmax::Int=size(X, 2), pmax::Int=min(dfmax*2+20, size(X, 2)), nlambda::Int=100,
+    lambda_min_ratio::Real=(size(y, 1) < size(X, 2) ? 1e-2 : 1e-4),
+    lambda::Vector{Float64}=Float64[], tol::Real=1e-7, standardize::Bool=true,
+    standardize_response::Bool=false,
+    intercept::Bool=true, maxit::Int=1000000)
+
+    @validate_and_init_multi
+    standardize_response = Int32(standardize_response)
+
+    # Compute null deviance
+    yw = y .* repeat(weights, 1, size(y, 2))
+    mu = mean(y, dims=1)
+    if intercept == 0
+        mu = fill(intercept, 1, size(y,2))
+    end
+    # Sum of squared error (weighted by obervation weights)
+    null_dev[] = sum(weights) .* mean(abs2.(yw .- mu))
+
+
+    ccall((:multelnet_, libglmnet), Cvoid,
+
+        (Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Int32}, 
+        Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, 
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32},
+        Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Float64},
+        Ref{Int32}, Ref{Int32}, Ref{Int32}, Ref{Int32}, Ref{Int32},
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32},
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}),
+
+        alpha, nobs, nvars, nresp,
+        X, y, weights, jd,
+        penalty_factor, constraints, dfmax, pmax,
+        nlambda, lambda_min_ratio, lambda, tol,
+        standardize, standardize_response, intercept, maxit, lmu_ref,
+        a0, ca, ia, nin,
+        fdev, alm, nlp, jerr
+    )
+
+    @check_and_return_mvnormal
+end
+
+
+
+# multi-response sparse normal
+function glmnet!(X::AbstractSparseMatrix{Float64}, y::Matrix{Float64},
+    family::MvNormal=MvNormal();
+    weights::Vector{Float64}=ones(size(y, 1)),
+    naivealgorithm::Bool=(size(X, 2) >= 500), alpha::Real=1.0,
+    penalty_factor::Vector{Float64}=ones(size(X, 2)),
+    constraints::Array{Float64, 2}=[x for x in (-Inf, Inf), y in 1:size(X, 2)],
+    dfmax::Int=size(X, 2), pmax::Int=min(dfmax*2+20, size(X, 2)), nlambda::Int=100,
+    lambda_min_ratio::Real=(size(y, 1) < size(X, 2) ? 1e-2 : 1e-4),
+    lambda::Vector{Float64}=Float64[], tol::Real=1e-7, standardize::Bool=true,
+    standardize_response::Bool=false,
+    intercept::Bool=true, maxit::Int=1000000)
+
+    @validate_and_init_multi
+    standardize_response = Int32(standardize_response)
+
+    # Compute null deviance
+    yw = y .* repeat(weights, 1, size(y, 2))
+    mu = mean(y, dims=1)
+    if intercept == 0
+        mu = fill(intercept, 1, size(y,2))
+    end
+    # Sum of squared error (weighted by obervation weights)
+    null_dev[] = sum(weights) .* mean(abs2.(yw .- mu))
+
+
+    ccall((:multspelnet_, libglmnet), Cvoid,
+
+        (Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Int32}, 
+        Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32}, 
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32},
+        Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Float64},
+        Ref{Int32}, Ref{Int32}, Ref{Int32}, Ref{Int32}, Ref{Int32},
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32},
+        Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}),
+
+        alpha, nobs, nvars, nresp,
+        X.nzval, X.colptr, X.rowval, y, weights, jd,
+        penalty_factor, constraints, dfmax, pmax,
+        nlambda, lambda_min_ratio, lambda, tol,
+        standardize, standardize_response, intercept, maxit, lmu_ref,
+        a0, ca, ia, nin,
+        fdev, alm, nlp, jerr
+    )
+
+    @check_and_return_mvnormal
+end
+
+glmnet(X::Matrix{Float64}, y::Matrix{Float64}, family::MvNormal; kw...) =
+    glmnet!(copy(X), copy(y), family; kw...)
+glmnet(X::SparseMatrixCSC{Float64,Int32}, y::Matrix{Float64}, family::MvNormal; kw...) =
+    glmnet!(copy(X), copy(y), family; kw...)
+glmnet(X::AbstractMatrix, y::AbstractMatrix{<:Number}, family::MvNormal; kw...) =
+    glmnet(convert(Matrix{Float64}, X), convert(Matrix{Float64}, y), family; kw...)
+glmnet(X::SparseMatrixCSC, y::AbstractMatrix{<:Number}, family::MvNormal; kw...) =
+    glmnet(convert(SparseMatrixCSC{Float64,Int32}, X), convert(Matrix{Float64}, y), family; kw...)
diff --git a/src/Multinomial.jl b/src/Multinomial.jl
@@ -62,14 +62,14 @@ loss(path::GLMNetPath{<:Multinomial}, X::AbstractMatrix, y::Union{AbstractVector
 
 # Get number of active predictors for a model in X
 # nin can be > non-zero predictors under some circumstances...
-nactive(X::Array{Float64, 3}, b::Int) = sum(sum(X[:,:,b] .!= 0., dims=1) .> 0)
+nactive(X::Array{Float64, 3}, b::Int; dims=1) = sum(sum(X[:,:,b] .!= 0., dims=dims) .> 0)
 
 nactive(X::Array{Float64, 3}, b::AbstractVector{Int}=1:size(X, 3)) =
     [nactive(X, j) for j in b]
 
 
 function show(io::IO, g::GLMNetPath{<:Multinomial})
-    println(io, "$(modeltype(g.family)) GLMNet Solution Path ($(size(g.betas, 2)) solutions for $(size(g.betas, 1)) predictors in $(g.npasses) passes):")
+    println(io, "$(modeltype(g.family)) GLMNet Solution Path ($(size(g.betas, 3)) solutions for $(size(g.betas, 1)) predictors in $(g.npasses) passes):")
     print(io, DataFrame(df=nactive(g.betas), pct_dev=g.dev_ratio, λ=g.lambda))
 end
 
@@ -86,7 +86,7 @@ macro validate_and_init_multi()
         if !isempty(lambda)
             # user-specified lambda values
             nlambda == 100 || error("cannot specify both lambda and nlambda")
-            lambda_min_ratio == (length(y) < size(X, 2) ? 1e-2 : 1e-4) ||
+            lambda_min_ratio == (size(y, 1) < size(X, 2) ? 1e-2 : 1e-4) ||
                 error("cannot specify both lambda and lambda_min_ratio")
             nlambda = length(lambda)
             lambda_min_ratio = 2.0