Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow simulating from GLMM distributions that we can't fit #787

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/MixedModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using BSplineKit: interpolate
using Compat: @compat
using DataAPI: DataAPI, levels, refpool, refarray, refvalue
using Distributions: Distributions, Bernoulli, Binomial, Chisq, Distribution, Gamma
using Distributions: InverseGaussian, Normal, Poisson, ccdf
using Distributions: Geometric, InverseGaussian, Normal, Poisson, ccdf
using GLM: GLM, GeneralizedLinearModel, IdentityLink, InverseLink, LinearModel
using GLM: Link, LogLink, LogitLink, ProbitLink, SqrtLink
using GLM: canonicallink, glm, linkinv, dispersion, dispersion_parameter
Expand Down Expand Up @@ -55,6 +55,7 @@ export @formula,
Grouping,
Gamma,
GeneralizedLinearMixedModel,
Geometric,
HelmertCoding,
HypothesisCoding,
IdentityLink,
Expand Down
20 changes: 15 additions & 5 deletions src/generalizedlinearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,20 @@
)
end

_distwarn(::T) where T <: Union{Bernoulli,Binomial,Poisson} = nothing
function _distwarn(::T) where T <: Union{Gamma,InverseGaussian,Normal}
@warn """Results for families with a dispersion parameter are not reliable.
It is best to avoid trying to fit such models in MixedModels until
the authors gain a better understanding of those cases."""
return nothing
end

function _distwarn(::Geometric)
@warn "The results for geometric family models have not been confirmed to be reliable."
end

_distwarn(::T) where {T <: Distribution} = error("$(nameof(T)) distribution is not supported")

function GeneralizedLinearMixedModel(
f::FormulaTerm,
tbl::Tables.ColumnTable,
Expand All @@ -400,11 +414,7 @@
ArgumentError("use LinearMixedModel for Normal distribution with IdentityLink")
)

if !any(isa(d, dist) for dist in (Bernoulli, Binomial, Poisson))
@warn """Results for families with a dispersion parameter are not reliable.
It is best to avoid trying to fit such models in MixedModels until
the authors gain a better understanding of those cases."""
end
_distwarn(d)

LMM = LinearMixedModel(f, tbl; contrasts, wts, amalgamate)
y = copy(LMM.y)
Expand Down Expand Up @@ -719,7 +729,7 @@
io::IO, ::MIME"text/plain", m::GeneralizedLinearMixedModel{T,D}
) where {T,D}
if m.optsum.feval < 0
@warn("Model has not been fit")

Check warning on line 732 in src/generalizedlinearmixedmodel.jl

View workflow job for this annotation

GitHub Actions / Documentation

Model has not been fit
return nothing
end
nAGQ = m.LMM.optsum.nAGQ
Expand Down
39 changes: 28 additions & 11 deletions src/simulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,34 @@ Note that `n` is the `n` parameter for the Binomial distribution,
*not* the number of draws from the RNG. It is then used to change the
random draw (an integer in [0, n]) into a probability (a float in [0,1]).
"""
function _rand(rng::AbstractRNG, d::Distribution, location, scale=NaN, n=1)
if !ismissing(scale)
throw(ArgumentError("Families with a dispersion parameter not yet supported"))
end
function _rand(rng::AbstractRNG, ::Binomial, location, scale=missing, n=1)
dist = Binomial(Int(n), location)
return rand(rng, dist) / n
end

if d isa Binomial
dist = Binomial(Int(n), location)
else
dist = typeof(d)(location)
end
function _rand(rng::AbstractRNG, ::T, location, scale=missing, n=1) where T <: Union{Bernoulli,Poisson}
dist = T(location)
return rand(rng, dist)
end

return rand(rng, dist) / n
function _rand(rng::AbstractRNG, ::Geometric, location, scale=missing, n=1)
dist = Geometric(location)
return rand(rng, dist)
end

function _rand(rng::AbstractRNG, ::T, location, scale, n=1) where T <: Union{Gamma,InverseGaussian}
@warn "Gamma and Inverse Gaussian are not supported in fitting and poorly tested in simulation" maxwarn=1
dist = T(location, scale)
return rand(rng, dist)
end

function _rand(rng::AbstractRNG, ::Normal, location, scale=missing, n=1)
# skip constructing a distribution
return scale * randn(rng) + location
end

function _rand(::AbstractRNG, ::T, ::Any, scale=missing, n=1) where T <: Distribution
throw(ArgumentError("$(nameof(T)) distribution is not currently supported"))
end

function simulate!(m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]) where {T}
Expand Down Expand Up @@ -275,7 +291,8 @@ function _simulate!(
# families with a dispersion parameter
mul!(η, fullrankx(lm), β, one(T), one(T))

μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu
@info "" resp
μ = isnothing(resp) ? GLM.linkfun.(Link(m), η) : GLM.updateμ!(resp, η).mu

# convert to the distribution / add in noise
@inbounds for (idx, val) in enumerate(μ)
Expand Down
47 changes: 44 additions & 3 deletions test/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,51 @@ end
gm2sim = refit!(simulate!(StableRNG(42), deepcopy(gm2)); fast=true, progress=false)
@test isapprox(gm2.β, gm2sim.β; atol=norm(stderror(gm2)))
end

@testset "Geometric" begin
n = 1000
data = (; y=fill(1, n), g=rand('A':'G', n))
data = (; y=rand(StableRNG(42), Geometric(0.5), n),
g=rand('A':'G', n))
@test_logs (:warn, r"geometric") MixedModel(@formula(y ~ 1 + (1|g)), data, Geometric())
m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Geometric())
v = simulate(StableRNG(42), m; β=[exp(0.5)], θ=[0])
gmean = mean(rand(StableRNG(42), Geometric(), n))
@test mean(v) ≈ gmean atol=0.005

simulate!(StableRNG(42), m; β=[exp(0.5)], θ=[0])
v = response(m)
@test mean(v) ≈ gmean atol=0.005
end

@testset "_rand with dispersion" begin
@test_throws ArgumentError MixedModels._rand(StableRNG(42), Normal(), 1, 1, 1)
@test_throws ArgumentError MixedModels._rand(StableRNG(42), Gamma(), 1, 1, 1)
@test_throws ArgumentError MixedModels._rand(StableRNG(42), InverseGaussian(), 1, 1, 1)
@test_throws ArgumentError MixedModels._rand(StableRNG(42), NegativeBinomial(), 1, 1)

n = 1000
data = (; y=fill(1, n), g=rand('A':'G', n))
m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Normal(), LogLink())
simulate!(StableRNG(42), m; β=[1], θ=[0], σ=1)
v = response(m)
# inverse link is exp
@test mean(v) ≈ exp(1) atol=0.05

m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, Gamma())
# with StableRNG(42) we actually get a few negative values?!?
simulate!(StableRNG(43), m; β=[1], θ=[0], σ=1)
v = response(m)
# inverse link of the reciprocal link is the reciprocal, which is just 1 here
@test mean(v) ≈ 1 atol=0.05
# m0 = glm(fill(1, n, 1), v, Gamma())
# @test only(coef(m0)) ≈ 1 atol=0.05

m = @suppress MixedModel(@formula(y ~ 1 + (1|g)), data, InverseGaussian())
# with StableRNG(42) we actually get a few negative values?!?
simulate!(StableRNG(43), m; β=[1], θ=[0], σ=1)
v = response(m)
# canonical link is InverseSquareLink(), but 1 is a fixed point under that link
@test mean(v) ≈ 1 atol=0.05
# m0 = glm(fill(1, n, 1), v, InverseGaussian())
# @test only(coef(m0)) ≈ 1 atol=0.05
end
end

Expand Down
Loading