From 3fe7a5b6989a2a52974fb2d08a83430df8bd5618 Mon Sep 17 00:00:00 2001
From: Jason Chow <jasonchow@meta.com>
Date: Fri, 8 Nov 2024 22:11:53 -0800
Subject: [PATCH] derivativeGP gpu support (#444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:

Add gpu support for derivative GP.

I noticed that this model isn’t actually like a normal model that can show up in a live experiment with a config, but we should still make it work for GPU. I did most of that but it did require some pretty arcane shenanigans with overriding GPyTorch’s underlying handling of train_inputs. This in turn made me do some arcane mypy stuff.

Differential Revision: D65515631
---
 aepsych/config.py                      |  2 +-
 aepsych/likelihoods/bernoulli.py       |  8 +++---
 aepsych/likelihoods/semi_p.py          |  6 ++--
 aepsych/means/constant_partial_grad.py |  2 +-
 aepsych/models/base.py                 | 12 +++++---
 aepsych/models/derivative_gp.py        |  5 +++-
 aepsych/plotting.py                    |  1 -
 tests_gpu/models/test_derivative_gp.py | 39 ++++++++++++++++++++++++++
 8 files changed, 60 insertions(+), 15 deletions(-)
 create mode 100644 tests_gpu/models/test_derivative_gp.py

diff --git a/aepsych/config.py b/aepsych/config.py
index afbc323be..e7c34698f 100644
--- a/aepsych/config.py
+++ b/aepsych/config.py
@@ -16,13 +16,13 @@
     Callable,
     ClassVar,
     Dict,
-    Dict,
     List,
     Mapping,
     Optional,
     Sequence,
     TypeVar,
 )
+
 import botorch
 import gpytorch
 import numpy as np
diff --git a/aepsych/likelihoods/bernoulli.py b/aepsych/likelihoods/bernoulli.py
index 7f6fc66ac..8a71c2fdd 100644
--- a/aepsych/likelihoods/bernoulli.py
+++ b/aepsych/likelihoods/bernoulli.py
@@ -19,7 +19,7 @@ class BernoulliObjectiveLikelihood(_OneDimensionalLikelihood):
 
     def __init__(self, objective: Callable) -> None:
         """Initialize BernoulliObjectiveLikelihood.
-        
+
         Args:
             objective (Callable): Objective function that maps function samples to probabilities."""
         super().__init__()
@@ -42,13 +42,13 @@ def forward(
     @classmethod
     def from_config(cls, config: Config) -> "BernoulliObjectiveLikelihood":
         """Create an instance from a configuration object.
-        
+
         Args:
             config (Config): Configuration object.
-            
+
         Returns:
             BernoulliObjectiveLikelihood: BernoulliObjectiveLikelihood instance.
         """
         objective_cls = config.getobj(cls.__name__, "objective")
         objective = objective_cls.from_config(config)
-        return cls(objective=objective)
\ No newline at end of file
+        return cls(objective=objective)
diff --git a/aepsych/likelihoods/semi_p.py b/aepsych/likelihoods/semi_p.py
index f6a337488..2d9bf9b9b 100644
--- a/aepsych/likelihoods/semi_p.py
+++ b/aepsych/likelihoods/semi_p.py
@@ -111,10 +111,10 @@ def expected_log_prob(
         # modified, TODO fixme upstream (cc @bletham)
         def log_prob_lambda(function_samples: torch.Tensor) -> torch.Tensor:
             """Lambda function to compute the log probability.
-            
+
             Args:
                 function_samples (torch.Tensor): Function samples.
-                
+
             Returns:
                 torch.Tensor: Log probability.
             """
@@ -142,4 +142,4 @@ def from_config(cls, config: Config) -> "LinearBernoulliLikelihood":
         else:
             objective = objective
 
-        return cls(objective=objective)
\ No newline at end of file
+        return cls(objective=objective)
diff --git a/aepsych/means/constant_partial_grad.py b/aepsych/means/constant_partial_grad.py
index ead7ee6ed..e0af2c29a 100644
--- a/aepsych/means/constant_partial_grad.py
+++ b/aepsych/means/constant_partial_grad.py
@@ -26,6 +26,6 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
         idx = input[..., -1].to(dtype=torch.long) > 0
         mean_fit = super(ConstantMeanPartialObsGrad, self).forward(input[..., ~idx, :])
         sz = mean_fit.shape[:-1] + torch.Size([input.shape[-2]])
-        mean = torch.zeros(sz)
+        mean = torch.zeros(sz).to(input)
         mean[~idx] = mean_fit
         return mean
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
index 67f2af75a..0490a20da 100644
--- a/aepsych/models/base.py
+++ b/aepsych/models/base.py
@@ -116,7 +116,7 @@ class AEPsychMixin(GPyTorchModel):
 
     extremum_solver = "Nelder-Mead"
     outcome_types: List[str] = []
-    train_inputs: Optional[Tuple[torch.Tensor]]
+    train_inputs: Optional[Tuple[torch.Tensor, ...]]
     train_targets: Optional[torch.Tensor]
 
     @property
@@ -398,7 +398,7 @@ def p_below_threshold(
 
 
 class AEPsychModelDeviceMixin(AEPsychMixin):
-    _train_inputs: Optional[Tuple[torch.Tensor]]
+    _train_inputs: Optional[Tuple[torch.Tensor, ...]]
     _train_targets: Optional[torch.Tensor]
 
     def set_train_data(self, inputs=None, targets=None, strict=False):
@@ -423,13 +423,17 @@ def device(self) -> torch.device:
         return next(self.parameters()).device
 
     @property
-    def train_inputs(self) -> Optional[Tuple[torch.Tensor]]:
+    def train_inputs(self) -> Optional[Tuple[torch.Tensor, ...]]:
         if self._train_inputs is None:
             return None
 
         # makes sure the tensors are on the right device, move in place
+        _train_inputs = []
         for input in self._train_inputs:
-            input.to(self.device)
+            _train_inputs.append(input.to(self.device))
+
+        _tuple_inputs: Tuple[torch.Tensor, ...] = tuple(_train_inputs)
+        self._train_inputs = _tuple_inputs
 
         return self._train_inputs
 
diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py
index b338f5a7a..3b5a92292 100644
--- a/aepsych/models/derivative_gp.py
+++ b/aepsych/models/derivative_gp.py
@@ -13,6 +13,7 @@
 import torch
 from aepsych.kernels.rbf_partial_grad import RBFKernelPartialObsGrad
 from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad
+from aepsych.models.base import AEPsychModelDeviceMixin
 from botorch.models.gpytorch import GPyTorchModel
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import Kernel
@@ -22,7 +23,9 @@
 from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
 
 
-class MixedDerivativeVariationalGP(gpytorch.models.ApproximateGP, GPyTorchModel):
+class MixedDerivativeVariationalGP(
+    gpytorch.models.ApproximateGP, AEPsychModelDeviceMixin, GPyTorchModel
+):
     """A variational GP with mixed derivative observations.
 
     For more on GPs with derivative observations, see e.g. Riihimaki & Vehtari 2010.
diff --git a/aepsych/plotting.py b/aepsych/plotting.py
index bb4cab779..12f0e60f5 100644
--- a/aepsych/plotting.py
+++ b/aepsych/plotting.py
@@ -10,7 +10,6 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-
 import torch
 from aepsych.strategy import Strategy
 from aepsych.utils import get_lse_contour, get_lse_interval, make_scaled_sobol
diff --git a/tests_gpu/models/test_derivative_gp.py b/tests_gpu/models/test_derivative_gp.py
new file mode 100644
index 000000000..200ef62eb
--- /dev/null
+++ b/tests_gpu/models/test_derivative_gp.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from aepsych import Config, SequentialStrategy
+from aepsych.models.derivative_gp import MixedDerivativeVariationalGP
+from botorch.fit import fit_gpytorch_mll
+from botorch.utils.testing import BotorchTestCase
+from gpytorch.likelihoods import BernoulliLikelihood
+from gpytorch.mlls.variational_elbo import VariationalELBO
+
+
+class TestDerivativeGP(BotorchTestCase):
+    def test_MixedDerivativeVariationalGP_gpu(self):
+        train_x = torch.cat(
+            (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros(4, 1)), dim=1
+        )
+        train_y = torch.tensor([1.0, 2.0, 3.0, 4.0])
+        m = MixedDerivativeVariationalGP(
+            train_x=train_x,
+            train_y=train_y,
+            inducing_points=train_x,
+            fixed_prior_mean=0.5,
+        ).cuda()
+
+        self.assertEqual(m.mean_module.constant.item(), 0.5)
+        self.assertEqual(
+            m.covar_module.base_kernel.raw_lengthscale.shape, torch.Size([1, 1])
+        )
+        mll = VariationalELBO(
+            likelihood=BernoulliLikelihood(), model=m, num_data=train_y.numel()
+        ).cuda()
+        mll = fit_gpytorch_mll(mll)
+        test_x = torch.tensor([[1.0, 0], [3.0, 1.0]]).cuda()
+        m(test_x)