From d9d4effcfc52b93299a0d81bfdc17462b32b523f Mon Sep 17 00:00:00 2001 From: Yousif Alsaffar <95584811+yalsaffar@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:47:40 -0800 Subject: [PATCH] Improve Docstrings in `aepsych/models` (#423) Summary: Improves documentation in `aepsych/models` for better clarity and consistency. - Adds missing docstrings to functions and methods across all models. - Updates existing docstrings with refined type hints and a unified structure. Pull Request resolved: https://github.com/facebookresearch/aepsych/pull/423 Reviewed By: crasanders Differential Revision: D65858741 Pulled By: JasonKChow fbshipit-source-id: e10d788a0d271838157a6dc421052369ff02ff9e --- aepsych/models/base.py | 121 ++++++++++++++++----- aepsych/models/derivative_gp.py | 2 +- aepsych/models/gp_classification.py | 43 ++++++-- aepsych/models/gp_regression.py | 34 +++--- aepsych/models/monotonic_projection_gp.py | 40 +++++++ aepsych/models/monotonic_rejection_gp.py | 111 ++++++++++++------- aepsych/models/multitask_regression.py | 49 +++++++-- aepsych/models/ordinal_gp.py | 27 ++++- aepsych/models/pairwise_probit.py | 84 ++++++++++++++- aepsych/models/semi_p.py | 125 ++++++++++++++++++---- aepsych/models/utils.py | 103 +++++++++++++++--- 11 files changed, 599 insertions(+), 140 deletions(-) diff --git a/aepsych/models/base.py b/aepsych/models/base.py index 7a00b14b5..18ab9e939 100644 --- a/aepsych/models/base.py +++ b/aepsych/models/base.py @@ -10,7 +10,7 @@ import time from collections.abc import Iterable from copy import deepcopy -from typing import Any, Dict, List, Mapping, Optional, Protocol, Tuple, Union +from typing import Any, Callable, Dict, List, Mapping, Optional, Protocol, Tuple, Union import gpytorch import numpy as np @@ -91,7 +91,7 @@ def _get_extremum( extremum_type: str, locked_dims: Optional[Mapping[int, List[float]]], n_samples=1000, - ) -> Tuple[float, np.ndarray]: + ) -> Tuple[float, torch.Tensor]: pass def dim_grid(self, gridsize: int = 30) -> torch.Tensor: @@ -131,14 +131,17 @@ def get_max( max_time: Optional[float] = None, ) -> Tuple[float, torch.Tensor]: """Return the maximum of the modeled function, subject to constraints + Args: - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the - inverse is along a slice of the full surface. + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the + inverse is along a slice of the full surface. Defaults to None. probability_space (bool): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. - n_samples int: number of coarse grid points to sample for optimization estimate. + n_samples (int): number of coarse grid points to sample for optimization estimate. + max_time (float, optional): Maximum time to spend optimizing. Defaults to None. + Returns: - Tuple[float, np.ndarray]: Tuple containing the max and its location (argmax). + Tuple[float, torch.Tensor]: Tuple containing the max and its location (argmax). """ locked_dims = locked_dims or {} _, _arg = get_extremum( @@ -160,11 +163,13 @@ def get_min( ) -> Tuple[float, torch.Tensor]: """Return the minimum of the modeled function, subject to constraints Args: - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the inverse is along a slice of the full surface. probability_space (bool): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. - n_samples int: number of coarse grid points to sample for optimization estimate. + n_samples (int): number of coarse grid points to sample for optimization estimate. + max_time (float, optional): Maximum time to spend optimizing. Defaults to None. + Returns: Tuple[float, torch.Tensor]: Tuple containing the min and its location (argmin). """ @@ -191,12 +196,17 @@ def inv_query( """Query the model inverse. Return nearest x such that f(x) = queried y, and also return the value of f at that point. + Args: y (float): Points at which to find the inverse. - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the inverse is along a slice of the full surface. probability_space (bool): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. + n_samples (int): number of coarse grid points to sample for optimization estimate. Defaults to 1000. + max_time (float, optional): Maximum time to spend optimizing. Defaults to None. + weights (torch.Tensor, optional): Weights for the optimization. Defaults to None. + Returns: Tuple[float, torch.Tensor]: Tuple containing the value of f nearest to queried y and the x position of this value. @@ -220,7 +230,7 @@ def inv_query( def get_jnd( self: ModelProtocol, - grid: Optional[Union[np.ndarray, torch.Tensor]] = None, + grid: Optional[torch.Tensor] = None, cred_level: Optional[float] = None, intensity_dim: int = -1, confsamps: int = 500, @@ -239,20 +249,17 @@ def get_jnd( Both definitions are equivalent for linear psychometric functions. Args: - grid (Optional[np.ndarray], optional): Mesh grid over which to find the JND. - Defaults to a square grid of size as determined by aepsych.utils.dim_grid + grid (torch.Tensor, optional): Mesh grid over which to find the JND. + Defaults to a square grid of size as determined by aepsych.utils.dim_grid. cred_level (float, optional): Credible level for computing an interval. Defaults to None, computing no interval. - intensity_dim (int, optional): Dimension over which to compute the JND. + intensity_dim (int): Dimension over which to compute the JND. Defaults to -1. - confsamps (int, optional): Number of posterior samples to use for + confsamps (int): Number of posterior samples to use for computing the credible interval. Defaults to 500. - method (str, optional): "taylor" or "step" method (see docstring). + method (str): "taylor" or "step" method (see docstring). Defaults to "step". - Raises: - RuntimeError: for passing an unknown method. - Returns: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: either the mean JND, or a median, lower, upper tuple of the JND posterior. @@ -316,6 +323,12 @@ def dim_grid( gridsize: int = 30, slice_dims: Optional[Mapping[int, float]] = None, ) -> torch.Tensor: + """Generate a grid based on lower, upper, and dim. + + Args: + gridsize (int): Number of points in each dimension. Defaults to 30. + slice_dims (Mapping[int, float], optional): Dimensions to fix at a certain value. Defaults to None. + """ return dim_grid(self.lb, self.ub, gridsize, slice_dims) def set_train_data( @@ -325,9 +338,13 @@ def set_train_data( strict: bool = False, ): """ - :param torch.Tensor inputs: The new training inputs. - :param torch.Tensor targets: The new training targets. - :param bool strict: (default False, ignored). Here for compatibility with + Set the training data for the model. + + Args: + inputs (torch.Tensor, optional): The new training inputs. + targets (torch.Tensor, optional): The new training targets. + strict (bool): Default is False. Ignored, just for compatibility. + input transformers. TODO: actually use this arg or change input transforms to not require it. """ @@ -356,9 +373,16 @@ def _fit_mll( self, mll: MarginalLogLikelihood, optimizer_kwargs: Optional[Dict[str, Any]] = None, - optimizer=fit_gpytorch_mll_scipy, + optimizer: Callable = fit_gpytorch_mll_scipy, **kwargs, ) -> None: + """Fits the model by maximizing the marginal log likelihood. + + Args: + mll (MarginalLogLikelihood): Marginal log likelihood object. + optimizer_kwargs (Dict[str, Any], optional): Keyword arguments for the optimizer. + optimizer (Callable): Optimizer to use. Defaults to fit_gpytorch_mll_scipy. + """ self.train() train_x, train_y = mll.model.train_inputs[0], mll.model.train_targets optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() @@ -385,8 +409,19 @@ def _fit_mll( return res def p_below_threshold( - self, x, f_thresh - ) -> torch.Tensor: # Return a tensor instead of NumPy array + self, + x: torch.Tensor, + f_thresh: torch.Tensor + ) -> torch.Tensor: + """Compute the probability that the latent function is below a threshold. + + Args: + x (torch.Tensor): Points at which to evaluate the probability. + f_thresh (torch.Tensor): Threshold value. + + Returns: + torch.Tensor: Probability that the latent function is below the threshold. + """ f, var = self.predict(x) f_thresh = f_thresh.reshape(-1, 1) f = f.reshape(1, -1) @@ -400,11 +435,14 @@ class AEPsychModelDeviceMixin(AEPsychMixin): _train_inputs: Optional[Tuple[torch.Tensor]] _train_targets: Optional[torch.Tensor] - def set_train_data(self, inputs=None, targets=None, strict=False): - """ - :param torch.Tensor inputs: The new training inputs. - :param torch.Tensor targets: The new training targets. - :param bool strict: (default False, ignored). Here for compatibility with + def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optional[torch.Tensor] = None, strict: bool = False) -> None: + """Set the training data for the model. + + Args: + inputs (torch.Tensor, optional): The new training inputs X. + targets (torch.Tensor, optional): The new training targets Y. + strict (bool): Whether to strictly enforce the device of the inputs and targets. + input transformers. TODO: actually use this arg or change input transforms to not require it. """ @@ -417,12 +455,22 @@ def set_train_data(self, inputs=None, targets=None, strict=False): @property def device(self) -> torch.device: + """Get the device of the model. + + Returns: + torch.device: Device of the model. + """ # We assume all models have some parameters and all models will only use one device # notice that this has no setting, don't let users set device, use .to(). return next(self.parameters()).device @property def train_inputs(self) -> Optional[Tuple[torch.Tensor]]: + """Get the training inputs. + + Returns: + Optional[Tuple[torch.Tensor]]: Training inputs. + """ if self._train_inputs is None: return None @@ -434,6 +482,11 @@ def train_inputs(self) -> Optional[Tuple[torch.Tensor]]: @train_inputs.setter def train_inputs(self, train_inputs: Optional[Tuple[torch.Tensor]]) -> None: + """Set the training inputs. + + Args: + train_inputs (Tuple[torch.Tensor]): Training inputs. + """ if train_inputs is None: self._train_inputs = None else: @@ -446,6 +499,11 @@ def train_inputs(self, train_inputs: Optional[Tuple[torch.Tensor]]) -> None: @property def train_targets(self) -> Optional[torch.Tensor]: + """Get the training targets. + + Returns: + Optional[torch.Tensor]: Training targets. + """ if self._train_targets is None: return None @@ -456,6 +514,11 @@ def train_targets(self) -> Optional[torch.Tensor]: @train_targets.setter def train_targets(self, train_targets: Optional[torch.Tensor]) -> None: + """Set the training targets. + + Args: + train_targets (torch.Tensor, optional): Training targets. + """ if train_targets is None: self._train_targets = None else: diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py index b338f5a7a..08691bd43 100644 --- a/aepsych/models/derivative_gp.py +++ b/aepsych/models/derivative_gp.py @@ -50,7 +50,7 @@ def __init__( is an observation of df/dx_i. train_y (torch.Tensor): Training y points inducing_points (torch.Tensor): Inducing points to use - scales (Union[torch.Tensor, float], optional): Typical scale of each dimension + scales (Union[torch.Tensor, float]): Typical scale of each dimension of input space (this is used to set the lengthscale prior). Defaults to 1.0. mean_module (Mean, optional): A mean class that supports derivative diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py index bdd8fdf0d..325fdd8cc 100644 --- a/aepsych/models/gp_classification.py +++ b/aepsych/models/gp_classification.py @@ -62,8 +62,8 @@ def __init__( """Initialize the GP Classification model Args: - lb torch.Tensor: Lower bounds of the parameters. - ub torch.Tensor: Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. @@ -140,7 +140,7 @@ def __init__( @classmethod def from_config(cls, config: Config) -> GPClassificationModel: - """Alternate constructor for GPClassification model. + """Alternate constructor for GPClassification model from a configuration. This is used when we recursively build a full sampling strategy from a configuration. TODO: document how this works in some tutorial. @@ -196,6 +196,7 @@ def from_config(cls, config: Config) -> GPClassificationModel: ) def _reset_hyperparameters(self) -> None: + """Reset hyperparameters to their initial values.""" # warmstart_hyperparams affects hyperparams but not the variational strat, # so we keep the old variational strat (which is only refreshed # if warmstart_induc=False). @@ -270,7 +271,7 @@ def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: Args: x (torch.Tensor): Points at which to sample. - num_samples (int, optional): Number of samples to return. Defaults to None. + num_samples (int): Number of samples to return. kwargs are ignored Returns: @@ -286,7 +287,7 @@ def predict( Args: x (torch.Tensor): Points at which to predict from the model. - probability_space (bool, optional): Return outputs in units of + probability_space (bool): Return outputs in units of response probability instead of latent function value. Defaults to False. Returns: @@ -324,10 +325,23 @@ def predict( return promote_0d(fmean), promote_0d(fvar) def predict_probability(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance in probability space. + + Args: + x (torch.Tensor): Points at which to predict from the model. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ return self.predict(x, probability_space=True) def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs): - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Inputs. + train_y (torch.Tensor): Responses. + """ return self.fit( train_x, train_y, warmstart_hyperparams=True, warmstart_induc=True, **kwargs ) @@ -349,6 +363,23 @@ def __init__( inducing_point_method: str = "auto", optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: + """Initialize the GP Beta Regression model + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None. + covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. + likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to + Beta likelihood. + inducing_size (int, optional): Number of inducing points. Defaults to 100. + max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, + there is no limit to the fitting time. Defaults to None. + inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". + """ if likelihood is None: likelihood = BetaLikelihood() super().__init__( diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py index 6f954229f..a3be95cc6 100644 --- a/aepsych/models/gp_regression.py +++ b/aepsych/models/gp_regression.py @@ -7,7 +7,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch import numpy as np @@ -33,8 +33,8 @@ class GPRegressionModel(AEPsychModelDeviceMixin, ExactGP): def __init__( self, - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, mean_module: Optional[gpytorch.means.Mean] = None, covar_module: Optional[gpytorch.kernels.Kernel] = None, @@ -45,8 +45,8 @@ def __init__( """Initialize the GP regression model Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. @@ -88,6 +88,14 @@ def __init__( @classmethod def construct_inputs(cls, config: Config) -> Dict: + """Construct inputs for the GP regression model from configuration. + + Args: + config (Config): A configuration containing keys/values matching this class. + + Returns: + Dict: Dictionary of inputs for the GP regression model. + """ classname = cls.__name__ lb = config.gettensor(classname, "lb") @@ -133,7 +141,7 @@ def from_config(cls, config: Config) -> GPRegressionModel: from a configuration. TODO: document how this works in some tutorial. Args: - config (Config): A configuration containing keys/values matching this class + config (Config): A configuration containing keys/values matching this class. Returns: GPRegressionModel: Configured class instance. @@ -159,8 +167,7 @@ def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: Args: x (torch.Tensor): Points at which to sample. - num_samples (int, optional): Number of samples to return. Defaults to None. - kwargs are ignored + num_samples (int): Number of samples to return. Returns: torch.Tensor: Posterior samples [num_samples x dim] @@ -168,7 +175,12 @@ def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: return self.posterior(x).rsample(torch.Size([num_samples])).detach().squeeze() def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs): - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Inputs. + train_y (torch.Tensor): Responses. + """ return self.fit(train_x, train_y, **kwargs) def predict(self, x: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, torch.Tensor]: @@ -176,11 +188,9 @@ def predict(self, x: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, torch.Tensor Args: x (torch.Tensor): Points at which to predict from the model. - probability_space (bool, optional): Return outputs in units of - response probability instead of latent function value. Defaults to False. Returns: - Tuple[np.ndarray, np.ndarray]: Posterior mean and variance at queries points. + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. """ with torch.no_grad(): post = self.posterior(x) diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py index f4c1b21f7..4e1573788 100644 --- a/aepsych/models/monotonic_projection_gp.py +++ b/aepsych/models/monotonic_projection_gp.py @@ -107,6 +107,27 @@ def __init__( inducing_point_method: str = "auto", optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: + """Initialize the MonotonicProjectionGP model. + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + monotonic_dims (List[int]): A list of the dimensions on which monotonicity should + be enforced. + monotonic_grid_size (int): The size of the grid, s, in 1. above. Defaults to 20. + min_f_val (float, optional): If provided, maintains this minimum in the projection in 5. Defaults to None. + dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None. + covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. Defaults to None. + likelihood (Likelihood, optional): The likelihood function to use. If None defaults to + Gaussian likelihood. Defaults to None. + inducing_size (int, optional): The number of inducing points to use. Defaults to None. + max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, + there is no limit to the fitting time. Defaults to None. + inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". + """ assert len(monotonic_dims) > 0 self.monotonic_dims = [int(d) for d in monotonic_dims] self.mon_grid_size = monotonic_grid_size @@ -130,6 +151,16 @@ def posterior( observation_noise: Union[bool, torch.Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: + """Compute the posterior at X, projecting to enforce monotonicity. + + Args: + X (torch.Tensor): The input points at which to compute the posterior. + observation_noise (Union[bool, torch.Tensor]): Whether or not to include the observation noise in the + posterior. Defaults to False. + + Returns: + GPyTorchPosterior: The posterior at X. + """ # Augment X with monotonicity grid points, for each monotonic dim n, d = X.shape # Require no batch dimensions m = len(self.monotonic_dims) @@ -170,6 +201,15 @@ def posterior( return GPyTorchPosterior(mvn_proj) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: + """Sample from the model. + + Args: + x (torch.Tensor): The input points at which to sample. + num_samples (int): The number of samples to draw. + + Returns: + torch.Tensor: The samples at x. + """ samps = super().sample(x=x, num_samples=num_samples) if self.min_f_val is not None: samps = samps.clamp(min=self.min_f_val) diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py index 23ad9bc4a..b670136c7 100644 --- a/aepsych/models/monotonic_rejection_gp.py +++ b/aepsych/models/monotonic_rejection_gp.py @@ -8,7 +8,7 @@ from __future__ import annotations import warnings -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple import gpytorch import numpy as np @@ -29,7 +29,6 @@ from gpytorch.models import ApproximateGP from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy from scipy.stats import norm -from torch import Tensor class MonotonicRejectionGP(AEPsychMixin, ApproximateGP): @@ -52,8 +51,8 @@ class MonotonicRejectionGP(AEPsychMixin, ApproximateGP): def __init__( self, monotonic_idxs: Sequence[int], - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, mean_module: Optional[Mean] = None, covar_module: Optional[Kernel] = None, @@ -68,21 +67,22 @@ def __init__( """Initialize MonotonicRejectionGP. Args: - likelihood (str): Link function and likelihood. Can be 'probit-bernoulli' or - 'identity-gaussian'. - monotonic_idxs (List[int]): List of which columns of x should be given monotonicity + monotonic_idxs (Sequence[int]): List of which columns of x should be given monotonicity constraints. - fixed_prior_mean (Optional[float], optional): Fixed prior mean. If classification, should be the prior + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size. + covar_module (Kernel, optional): Covariance kernel to use. Default is scaled RBF. + mean_module (Mean, optional): Mean module to use. Default is constant mean. + likelihood (str, optional): Link function and likelihood. Can be 'probit-bernoulli' or + 'identity-gaussian'. + fixed_prior_mean (float, optional): Fixed prior mean. If classification, should be the prior classification probability (not the latent function value). Defaults to None. - covar_module (Optional[Kernel], optional): Covariance kernel to use (default: scaled RBF). - mean_module (Optional[Mean], optional): Mean module to use (default: constant mean). - num_induc (int, optional): Number of inducing points for variational GP.]. Defaults to 25. - num_samples (int, optional): Number of samples for estimating posterior on preDict or + num_induc (int): Number of inducing points for variational GP.]. Defaults to 25. + num_samples (int): Number of samples for estimating posterior on preDict or acquisition function evaluation. Defaults to 250. - num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096. - acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE. - objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli. - extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None. + num_rejection_samples (int): Number of samples used for rejection sampling. Defaults to 4096. + inducing_point_method (str): Method for selecting inducing points. Defaults to "auto". optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during fitting. Assumes we are using L-BFGS-B. """ @@ -152,12 +152,12 @@ def __init__( {"options": optimizer_options} if optimizer_options else {"options": {}} ) - def fit(self, train_x: Tensor, train_y: Tensor, **kwargs) -> None: + def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None: """Fit the model Args: - train_x (Tensor): Training x points - train_y (Tensor): Training y points. Should be (n x 1). + train_x (torch.Tensor): Training x points + train_y (torch.Tensor): Training y points. Should be (n x 1). """ self.set_train_data(train_x, train_y) @@ -172,11 +172,19 @@ def fit(self, train_x: Tensor, train_y: Tensor, **kwargs) -> None: def _set_model( self, - train_x: Tensor, - train_y: Tensor, - model_state_dict: Optional[Dict[str, Tensor]] = None, - likelihood_state_dict: Optional[Dict[str, Tensor]] = None, + train_x: torch.Tensor, + train_y: torch.Tensor, + model_state_dict: Optional[Dict[str, torch.Tensor]] = None, + likelihood_state_dict: Optional[Dict[str, torch.Tensor]] = None, ) -> None: + """Sets the model with the given data and state dicts. + + Args: + train_x (torch.Tensor): Training x points + train_y (torch.Tensor): Training y points. Should be (n x 1). + model_state_dict (Dict[str, torch.Tensor], optional): State dict for the model + likelihood_state_dict (Dict[str, torch.Tensor], optional): State dict for the likelihood + """ train_x_aug = self._augment_with_deriv_index(train_x, 0) self.set_train_data(train_x_aug, train_y) # Set model parameters @@ -191,16 +199,16 @@ def _set_model( ) mll = fit_gpytorch_mll(mll, optimizer_kwargs=self.optimizer_options) - def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> None: + def update(self, train_x: torch.Tensor, train_y: torch.Tensor, warmstart: bool = True) -> None: """ Update the model with new data. Expects the full set of data, not the incremental new data. Args: - train_x (Tensor): Train X. - train_y (Tensor): Train Y. Should be (n x 1). - warmstart (bool): If True, warm-start model fitting with current parameters. + train_x (torch.Tensor): Train X. + train_y (torch.Tensor): Train Y. Should be (n x 1). + warmstart (bool): If True, warm-start model fitting with current parameters. Defaults to True. """ if warmstart: model_state_dict = self.state_dict() @@ -217,15 +225,16 @@ def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> No def sample( self, - x: Tensor, + x: torch.Tensor, num_samples: Optional[int] = None, num_rejection_samples: Optional[int] = None, ) -> torch.Tensor: """Sample from monotonic GP Args: - x (Tensor): tensor of n points at which to sample - num_samples (int, optional): how many points to sample (default: self.num_samples) + x (torch.Tensor): tensor of n points at which to sample + num_samples (int, optional): how many points to sample. Default is self.num_samples. + num_rejection_samples (int): how many samples to use for rejection sampling. Default is self.num_rejection_samples. Returns: a Tensor of shape [n_samp, n] """ @@ -263,14 +272,16 @@ def sample( return samples_f def predict( - self, x: Tensor, probability_space: bool = False - ) -> Tuple[Tensor, Tensor]: + self, x: torch.Tensor, probability_space: bool = False + ) -> Tuple[torch.Tensor, torch.Tensor]: """Predict Args: - x: tensor of n points at which to predict. + x (torch.Tensor): tensor of n points at which to predict. + probability_space (bool): whether to return in probability space. Defaults to False. - Returns: tuple (f, var) where f is (n,) and var is (n,) + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at query points. """ samples_f = self.sample(x) mean = torch.mean(samples_f, dim=0).squeeze() @@ -285,17 +296,35 @@ def predict( return mean, variance def predict_probability( - self, x: Union[torch.Tensor, np.ndarray] + self, x: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor]: + """Predict in probability space + + Args: + x (torch.Tensor): Points at which to predict. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at query points. + """ return self.predict(x, probability_space=True) - def _augment_with_deriv_index(self, x: Tensor, indx) -> Tensor: + def _augment_with_deriv_index(self, x: torch.Tensor, indx: int) -> torch.Tensor: + """Augment input with derivative index + + Args: + x (torch.Tensor): Input tensor + indx (int): Derivative index + + Returns: + torch.Tensor: Augmented tensor + """ return torch.cat( (x, indx * torch.ones(x.shape[0], 1)), dim=1, ) - def _get_deriv_constraint_points(self) -> Tensor: + def _get_deriv_constraint_points(self) -> torch.Tensor: + """Get derivative constraint points""" deriv_cp = torch.tensor([]) for i in self.monotonic_idxs: induc_i = self._augment_with_deriv_index(self.inducing_points, i + 1) @@ -304,6 +333,14 @@ def _get_deriv_constraint_points(self) -> Tensor: @classmethod def from_config(cls, config: Config) -> MonotonicRejectionGP: + """ Alternate constructor for MonotonicRejectionGP + + Args: + config (Config): a configuration containing keys/values matching this class + + Returns: + MonotonicRejectionGP: configured class instance + """ classname = cls.__name__ num_induc = config.gettensor(classname, "num_induc", fallback=25) num_samples = config.gettensor(classname, "num_samples", fallback=250) diff --git a/aepsych/models/multitask_regression.py b/aepsych/models/multitask_regression.py index aab0b396c..5e2b0faa0 100644 --- a/aepsych/models/multitask_regression.py +++ b/aepsych/models/multitask_regression.py @@ -46,13 +46,13 @@ def __init__( """Initialize multitask GPR model. Args: - num_outputs (int, optional): Number of tasks (outputs). Defaults to 2. - rank (int, optional): Rank of cross-task covariance. Lower rank is a simpler model. + num_outputs (int): Number of tasks (outputs). Defaults to 2. + rank (int): Rank of cross-task covariance. Lower rank is a simpler model. Should be less than or equal to num_outputs. Defaults to 1. - mean_module (Optional[gpytorch.means.Mean], optional): GP mean. Defaults to a constant mean. - covar_module (Optional[gpytorch.kernels.Kernel], optional): GP kernel module. + mean_module (gpytorch.means.Mean, optional): GP mean. Defaults to a constant mean. + covar_module (gpytorch.kernels.Kernel, optional): GP kernel module. Defaults to scaled RBF kernel. - likelihood (Optional[gpytorch.likelihoods.Likelihood], optional): Likelihood + likelihood (gpytorch.likelihoods.Likelihood, optional): Likelihood (should be a multitask-compatible likelihood). Defaults to multitask Gaussian likelihood. """ self._num_outputs = num_outputs @@ -79,12 +79,25 @@ def __init__( def forward( self, x: torch.Tensor ) -> gpytorch.distributions.MultitaskMultivariateNormal: + """ Evaluate GP. + + Args: + x (torch.Tensor): Tensor of points at which GP should be evaluated. + + Returns: + gpytorch.distributions.MultitaskMultivariateNormal: Distribution object + holding the mean and covariance at x.""" mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) @classmethod def construct_inputs(cls, config: Config): + """Construct inputs for the Multitask GPR model from configuration. + + Args: + config (Config): A configuration containing keys/values matching this class. + """ classname = cls.__name__ args = super().construct_inputs(config) args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2) @@ -97,7 +110,6 @@ class IndependentMultitaskGPRModel(GPRegressionModel): fitting a batch of independent GPRegression models. It wraps the GPyTorch tutorial here https://docs.gpytorch.ai/en/stable/examples/03_Multitask_Exact_GPs/Batch_Independent_Multioutput_GP.html with AEPsych API and convenience fitting / prediction methods. - """ _num_outputs = 1 @@ -117,11 +129,11 @@ def __init__( """Initialize independent multitask GPR model. Args: - num_outputs (int, optional): Number of tasks (outputs). Defaults to 2. - mean_module (Optional[gpytorch.means.Mean], optional): GP mean. Defaults to a constant mean. - covar_module (Optional[gpytorch.kernels.Kernel], optional): GP kernel module. + num_outputs (int): Number of tasks (outputs). Defaults to 2. + mean_module (gpytorch.means.Mean, optional): GP mean. Defaults to a constant mean. + covar_module (gpytorch.kernels.Kernel, optional): GP kernel module. Defaults to scaled RBF kernel. - likelihood (Optional[gpytorch.likelihoods.Likelihood], optional): Likelihood + likelihood (gpytorch.likelihoods.Likelihood, optional): Likelihood (should be a multitask-compatible likelihood). Defaults to multitask Gaussian likelihood. """ @@ -152,6 +164,15 @@ def __init__( def forward( self, x: torch.Tensor ) -> gpytorch.distributions.MultitaskMultivariateNormal: + """ Evaluate GP. + + Args: + x (torch.Tensor): Tensor of points at which GP should be evaluated. + + Returns: + gpytorch.distributions.MultitaskMultivariateNormal: Distribution object + holding the mean and covariance at x. + """ base_mvn = super().forward(x) # do transforms return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn( base_mvn @@ -159,6 +180,14 @@ def forward( @classmethod def get_config_args(cls, config: Config) -> Dict[str, Any]: + """Get configuration arguments for the model. + + Args: + config (Config): A configuration containing keys/values matching this class. + + Returns: + Dict[str, Any]: Dictionary of configuration arguments. + """ classname = cls.__name__ args = super().get_config_args(config) args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2) diff --git a/aepsych/models/ordinal_gp.py b/aepsych/models/ordinal_gp.py index c33ed60c2..1ccdc532f 100644 --- a/aepsych/models/ordinal_gp.py +++ b/aepsych/models/ordinal_gp.py @@ -5,8 +5,6 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -from typing import Optional, Union - import gpytorch import torch from aepsych.likelihoods import OrdinalLikelihood @@ -27,6 +25,12 @@ class OrdinalGPModel(GPClassificationModel): outcome_type = "ordinal" def __init__(self, likelihood=None, *args, **kwargs): + """Initialize the OrdinalGPModel + + Args: + likelihood (Likelihood): The likelihood function to use. If None defaults to + Ordinal likelihood. + """ covar_module = kwargs.pop("covar_module", None) dim = kwargs.get("dim") if covar_module is None: @@ -52,11 +56,28 @@ def __init__(self, likelihood=None, *args, **kwargs): **kwargs, ) - def predict_probs(self, xgrid: torch.Tensor) -> torch.Tensor: + def predict_probs(self, xgrid:torch.Tensor) -> torch.Tensor: + """Predict probabilities of each ordinal level at xgrid + + Args: + xgrid (torch.Tensor): Tensor of input points to predict at + + Returns: + torch.Tensor: Tensor of probabilities of each ordinal level at xgrid + """ fmean, fvar = self.predict(xgrid) return self.calculate_probs(fmean, fvar) def calculate_probs(self, fmean: torch.Tensor, fvar: torch.Tensor) -> torch.Tensor: + """Calculate probabilities of each ordinal level given a mean and variance + + Args: + fmean (torch.Tensor): Mean of the latent function + fvar (torch.Tensor): Variance of the latent function + + Returns: + torch.Tensor: Tensor of probabilities of each ordinal level + """ fsd = torch.sqrt(1 + fvar) probs = torch.zeros(*fmean.size(), self.likelihood.n_levels) diff --git a/aepsych/models/pairwise_probit.py b/aepsych/models/pairwise_probit.py index ee5ce0d3c..3cb589417 100644 --- a/aepsych/models/pairwise_probit.py +++ b/aepsych/models/pairwise_probit.py @@ -30,9 +30,18 @@ class PairwiseProbitModel(PairwiseGP, AEPsychMixin): def _pairs_to_comparisons( self, x: torch.Tensor, y: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Takes x, y structured as pairs and judgments and - returns pairs and comparisons as PairwiseGP requires + """Convert pairs of points and their judgements to comparisons. + + Args: + x (torch.Tensor): Tensor of shape (n, d, 2) where n is the number of pairs and d is the dimensionality of the + parameter space. + y (torch.Tensor): Tensor of shape (n,) where n is the number of pairs. Each element is 0 if the first point + in the pair is preferred, and 1 if the second point is preferred. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: A tuple of tensors. The first tensor is of shape (n, d) and contains the + unique points in the pairs. The second tensor is of shape (n, 2) and contains the indices of the unique + points in the first tensor that correspond to the points in the pairs. """ # This needs to take a unique over the feature dim by flattening # over pairs but not instances/batches. This is actually tensor @@ -65,6 +74,17 @@ def __init__( max_fit_time: Optional[float] = None, optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: + """Initialize the PairwiseProbitModel + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. Defaults to None. + max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. Defaults to None. + """ self.lb, self.ub, dim = _process_bounds(lb, ub, dim) self.max_fit_time = max_fit_time @@ -104,12 +124,20 @@ def fit( optimizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ) -> None: + """Fit the model to the training data. + + Args: + train_x (torch.Tensor): Trainin x points. + train_y (torch.Tensor): Training y points. + optimizer_kwargs (Dict[str, Any], optional): Keyword arguments to pass to the optimizer. Defaults to None. + """ if optimizer_kwargs is not None: if not "optimizer_kwargs" in optimizer_kwargs: optimizer_kwargs = optimizer_kwargs.copy() optimizer_kwargs.update(self.optimizer_options) else: optimizer_kwargs = {"options": self.optimizer_options} + self.train() mll = PairwiseLaplaceMarginalLogLikelihood(self.likelihood, self) datapoints, comparisons = self._pairs_to_comparisons(train_x, train_y) @@ -138,7 +166,13 @@ def fit( def update( self, train_x: torch.Tensor, train_y: torch.Tensor, warmstart: bool = True ) -> None: - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Train X. + train_y (torch.Tensor): Train Y. + warmstart (bool): If True, warm-start model fitting with current parameters. Defaults to True. + """ self.fit(train_x, train_y) def predict( @@ -148,6 +182,17 @@ def predict( num_samples: int = 1000, rereference: str = "x_min", ) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance. + + Args: + x (torch.Tensor): Points at which to predict from the model. + probability_space (bool): Return outputs in units of response probability instead of latent function value. Defaults to False. + num_samples (int): Number of samples to return. Defaults to 1000. + rereference (str): How to sample. Options are "x_min", "x_max", "f_min", "f_max". Defaults to "x_min". + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ if rereference is not None: samps = self.sample(x, num_samples, rereference) fmean, fvar = samps.mean(0).squeeze(), samps.var(0).squeeze() @@ -170,6 +215,17 @@ def predict_probability( num_samples: int = 1000, rereference: str = "x_min", ) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance in probability space. + + Args: + x (torch.Tensor): Points at which to predict from the model. + probability_space (bool): Return outputs in units of response probability instead of latent function value. Defaults to False. + num_samples (int): Number of samples to return. Defaults to 1000. + rereference (str): How to sample. Options are "x_min", "x_max", "f_min", "f_max". Defaults to "x_min". + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ return self.predict( x, probability_space=True, num_samples=num_samples, rereference=rereference ) @@ -177,6 +233,16 @@ def predict_probability( def sample( self, x: torch.Tensor, num_samples: int, rereference: str = "x_min" ) -> torch.Tensor: + """Sample from the model model posterior. + + Args: + x (torch.Tensor): Points at which to sample. + num_samples (int): Number of samples to return. + rereference (str): How to sample. Options are "x_min", "x_max", "f_min", "f_max". Defaults to "x_min". + + Returns: + torch.Tensor: Posterior samples [num_samples x dim] + """ if len(x.shape) < 2: x = x.reshape(-1, 1) if rereference is None: @@ -204,7 +270,15 @@ def sample( return -samps + samps_ref @classmethod - def from_config(cls, config: Config) -> "PairwiseProbitModel": + def from_config(cls, config: Config) -> 'PairwiseProbitModel': + """Initialize the model from a config object. + + Args: + config (Config): a configuration containing keys/values matching this class + + Returns: + PairwiseProbitModel: Configured class instance. + """ classname = cls.__name__ mean_covar_factory = config.getobj( diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py index 2d5f20f00..310569cf0 100644 --- a/aepsych/models/semi_p.py +++ b/aepsych/models/semi_p.py @@ -8,7 +8,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch import numpy as np @@ -28,7 +28,6 @@ from gpytorch.likelihoods import BernoulliLikelihood, Likelihood from gpytorch.means import ConstantMean, ZeroMean from gpytorch.priors import GammaPrior -from torch import Tensor from torch.distributions import Normal # TODO: Implement a covar factory and analytic method for getting the lse @@ -46,6 +45,16 @@ def _hadamard_mvn_approx( MVN approximation to the hadamard product of GPs (from the SemiP paper, extending the zero-mean results in https://mathoverflow.net/questions/293955/normal-approximation-to-the-pointwise-hadamard-schur-product-of-two-multivariat) + + Args: + x_intensity (torch.Tensor): The intensity dimension + slope_mean (torch.Tensor): The mean of the slope GP + slope_cov (torch.Tensor): The covariance of the slope GP + offset_mean (torch.Tensor): The mean of the offset GP + offset_cov (torch.Tensor): The covariance of the offset GP + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The mean and covariance of the approximated MVN """ offset_mean = offset_mean + x_intensity @@ -65,6 +74,14 @@ def _hadamard_mvn_approx( def semi_p_posterior_transform(posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform a posterior from a SemiP model to a Hadamard model. + + Args: + posterior (GPyTorchPosterior): The posterior to transform + + Returns: + GPyTorchPosterior: The transformed posterior. + """ batch_mean = posterior.mvn.mean batch_cov = posterior.mvn.covariance_matrix offset_mean = batch_mean[..., 0, :] @@ -90,6 +107,14 @@ def __init__( likelihood: LinearBernoulliLikelihood, Xi: torch.Tensor, ) -> None: + """Initialize a SemiPPosterior object. + + Args: + mvn (MultivariateNormal): The MVN object to use + likelihood (LinearBernoulliLikelihood): The likelihood object + Xi (torch.Tensor): The intensity dimension + """ + super().__init__(distribution=mvn) self.likelihood = likelihood self.Xi = Xi @@ -97,12 +122,19 @@ def __init__( def rsample_from_base_samples( self, sample_shape: torch.Size, - base_samples: Tensor, - ) -> Tensor: - r"""Sample from the posterior (with gradients) using base samples. + base_samples: torch.Tensor, + ) -> torch.Tensor: + """Sample from the posterior (with gradients) using base samples. This is intended to be used with a sampler that produces the corresponding base samples, and enables acquisition optimization via Sample Average Approximation. + + Args: + sample_shape (torch.Size): The desired shape of the samples + base_samples (torch.Tensor): The base samples + + Returns: + torch.Tensor: The sampled values from the posterior distribution """ return ( super() @@ -117,6 +149,15 @@ def rsample( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the posterior distribution using the reparameterization trick + + Args: + sample_shape (torch.Size, optional): The desired shape of the samples. Defaults to None. + base_samples (torch.Tensor, optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled values from the posterior distribution. + """ if base_samples is None: samps_ = super().rsample(sample_shape=sample_shape) else: @@ -135,6 +176,15 @@ def sample_p( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the likelihood distribution of the modeled function. + + Args: + sample_shape (torch.Size, optional): The desired shape of the samples. Defaults to None. + base_samples (torch.Tensor, optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled values from the likelihood distribution. + """ kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return self.likelihood.p(function_samples=kcsamps, Xi=self.Xi).squeeze(-1) @@ -143,6 +193,16 @@ def sample_f( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the function values of the modeled distribution. + + Args: + sample_shape (torch.Size, optional): The desired shape of the samples. Defaults to None. + base_samples (torch.Tensor, optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled function values from the likelihood. + """ + kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return self.likelihood.f(function_samples=kcsamps, Xi=self.Xi).squeeze(-1) @@ -152,6 +212,17 @@ def sample_thresholds( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> SemiPThresholdObjective: + """Sample the thresholds based on the given threshold level. + + Args: + threshold_level (float): The target threshold level for sampling. + sample_shape (torch.Size, optional): The desired shape of the samples. Defaults to None. + base_samples (torch.Tensor, optional): The base samples. Defaults to None. + + Returns: + SemiPThresholdObjective: The sampled thresholds based on the threshold level. + """ + fsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return SemiPThresholdObjective( likelihood=self.likelihood, target=threshold_level @@ -178,8 +249,8 @@ class SemiParametricGPModel(GPClassificationModel): def __init__( self, - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, stim_dim: int = 0, mean_module: Optional[gpytorch.means.Mean] = None, @@ -194,18 +265,18 @@ def __init__( """ Initialize SemiParametricGP. Args: - Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size - of lb and ub. + of lb and ub. Defaults to None. stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a gamma prior. likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to linear-Bernouli likelihood with probit link. - inducing_size (int): Number of inducing points. Defaults to 99. + slope_mean (float): The mean of the slope. Defaults to 2. + inducing_size (int, optional): Number of inducing points. Defaults to 99. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". @@ -346,13 +417,15 @@ def sample( self, x: torch.Tensor, num_samples: int, - probability_space=False, + probability_space: bool = False, ) -> torch.Tensor: """Sample from underlying model. Args: - x ((n x d) torch.Tensor): Points at which to sample. - num_samples (int, optional): Number of samples to return. Defaults to None. + + x (torch.Tensor): `n x d` Points at which to sample. + num_samples (int): Number of samples to return. Defaults to None. + probability_space (bool): Whether to sample from the probability space (True) or the latent function. Defaults to False. kwargs are ignored Returns: @@ -374,7 +447,7 @@ def predict( Args: x (torch.Tensor): Points at which to predict from the model. - probability_space (bool, optional): Return outputs in units of + probability_space (bool): Return outputs in units of response probability instead of latent function value. Defaults to False. Returns: @@ -390,6 +463,15 @@ def predict( def posterior( self, X: torch.Tensor, posterior_transform: Optional[PosteriorTransform] = None ) -> SemiPPosterior: + """Get the posterior distribution at the given points. + + Args: + X (torch.Tensor): Points at which to evaluate the posterior. + posterior_transform (PosteriorTransform, optional): A transform to apply to the posterior. Defaults to None. + + Returns: + SemiPPosterior: The posterior distribution at the given points. + """ # Assume x is (b) x n x d if X.ndim > 3: raise ValueError @@ -452,8 +534,8 @@ def __init__( """ Initialize HadamardSemiPModel. Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0. @@ -462,7 +544,8 @@ def __init__( offset_mean_module (gpytorch.means.Mean, optional): Mean module to use (default: constant mean) for offset. offset_covar_module (gpytorch.kernels.Kernel, optional): Covariance kernel to use (default: scaled RBF) for offset. likelihood (gpytorch.likelihood.Likelihood, optional)): defaults to bernoulli with logistic input and a floor of .5 - inducing_size (int): Number of inducing points. Defaults to 99. + slope_mean (float): The mean of the slope. Defaults to 2. + inducing_size (int, optional): Number of inducing points. Defaults to 99. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". @@ -529,7 +612,7 @@ def __init__( self._fresh_likelihood_dict = deepcopy(self.likelihood.state_dict()) def forward(self, x: torch.Tensor) -> MultivariateNormal: - """Forward pass for semip GP. + """Forward pass for HadamardSemiPModel GP. generates a k(c + x[:,stim_dim]) = kc + kx[:,stim_dim] mvn object where k and c are slope and offset GPs and x[:,stim_dim] are the intensity stimulus (x) @@ -632,7 +715,7 @@ def predict( Args: x (torch.Tensor): Points at which to predict from the model. - probability_space (bool, optional): Return outputs in units of + probability_space (bool): Return outputs in units of response probability instead of latent function value. Defaults to False. Returns: diff --git a/aepsych/models/utils.py b/aepsych/models/utils.py index 25aa21f0a..ca9559ca6 100644 --- a/aepsych/models/utils.py +++ b/aepsych/models/utils.py @@ -27,7 +27,6 @@ from scipy.cluster.vq import kmeans2 from scipy.special import owens_t from scipy.stats import norm -from torch import Tensor from torch.distributions import Normal @@ -46,6 +45,14 @@ def compute_p_quantile( A 95% CI for p can be computed as p_l = compute_p_quantile(f_mean, f_std, 0.025) p_u = compute_p_quantile(f_mean, f_std, 0.975) + + Args: + f_mean (torch.Tensor): The mean of the latent function. + f_std (torch.Tensor): The standard deviation of the latent function. + alpha (Union[torch.Tensor, float]): The quantile to compute. + + Returns: + torch.Tensor: The quantile of p. """ norm = torch.distributions.Normal(0, 1) alpha = torch.tensor(alpha, dtype=f_mean.dtype) @@ -56,9 +63,22 @@ def select_inducing_points( inducing_size: int, covar_module: Kernel = None, X: Optional[torch.Tensor] = None, - bounds: Optional[Union[torch.Tensor, np.ndarray]] = None, + bounds: Optional[torch.Tensor] = None, method: str = "auto", ) -> torch.Tensor: + """Select inducing points for GP model + + Args: + inducing_size (int): Number of inducing points to select. + covar_module (Kernel): The kernel module to use for inducing point selection. + X (torch.Tensor, optional): The training data. + bounds (torch.Tensor, optional): The bounds of the input space. + method (str): The method to use for inducing point selection. One of + "pivoted_chol", "kmeans++", "auto", or "sobol". + + Returns: + torch.Tensor: The selected inducing points. + """ with torch.no_grad(): assert ( method @@ -111,6 +131,15 @@ def select_inducing_points( def get_probability_space( likelihood: Likelihood, posterior: GPyTorchPosterior ) -> Tuple[torch.Tensor, torch.Tensor]: + """Get the mean and variance of the probability space for a given posterior + + Args: + likelihood (Likelihood): The likelihood function. + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The mean and variance of the probability space. + """ fmean = posterior.mean.squeeze() fvar = posterior.variance.squeeze() if isinstance(likelihood, BernoulliLikelihood): @@ -149,13 +178,15 @@ def get_extremum( """Return the extremum (min or max) of the modeled function Args: extremum_type (str): Type of extremum (currently 'min' or 'max'. - bounds (tensor): Lower and upper bounds of the search space. - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the + bounds (torch.Tensor): Lower and upper bounds of the search space. + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the inverse is along a slice of the full surface. n_samples (int): number of coarse grid points to sample for optimization estimate. - max_time (float): Maximum amount of time in seconds to spend optimizing. + posterior_transform (PosteriorTransform, optional): Posterior transform to apply to the model. + max_time (float, optional): Maximum amount of time in seconds to spend optimizing. + weights (torch.Tensor, optional): Weights to apply to the target value. Defaults to None. Returns: - Tuple[float, np.ndarray]: Tuple containing the min and its location (argmin). + Tuple[float, torch.Tensor]: Tuple containing the min and its location (argmin). """ locked_dims = locked_dims or {} @@ -202,17 +233,18 @@ def inv_query( Return nearest x such that f(x) = queried y, and also return the value of f at that point. Args: - y (float): Points at which to find the inverse. - bounds (tensor): Lower and upper bounds of the search space. - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the - inverse is along a slice of the full surface. + y (Union[float, torch.Tensor]): Points at which to find the inverse. + bounds (torch.Tensor): Lower and upper bounds of the search space. + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the + inverse is along a slice of the full surface. Defaults to None. probability_space (bool): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. - n_samples (int): number of coarse grid points to sample for optimization estimate. - max_time float: Maximum amount of time in seconds to spend optimizing. + n_samples (int): number of coarse grid points to sample for optimization estimate. Defaults to 1000. + max_time (float, optional): Maximum amount of time in seconds to spend optimizing. Defaults to None. + weights (torch.Tensor, optional): Weights to apply to the target value. Defaults to None. Returns: - Tuple[float, np.ndarray]: Tuple containing the value of f + Tuple[float, torch.Tensor]: Tuple containing the value of f nearest to queried y and the x position of this value. """ locked_dims = locked_dims or {} @@ -241,16 +273,39 @@ def inv_query( class TargetDistancePosteriorTransform(PosteriorTransform): def __init__( - self, target_value: Union[float, Tensor], weights: Optional[Tensor] = None + self, target_value: Union[float, torch.Tensor], weights: Optional[torch.Tensor] = None ) -> None: + """Initialize the TargetDistancePosteriorTransform + + Args: + target_value (Union[float, torch.Tensor]): The target value to transform the posterior to. + weights (torch.Tensor, optional): Weights to apply to the target value. Defaults to None. + """ super().__init__() self.target_value = target_value self.weights = weights - def evaluate(self, Y: Tensor) -> Tensor: + def evaluate(self, Y: torch.Tensor) -> torch.Tensor: + """Evaluate the squared distance from the target value. + + Args: + Y (torch.Tensor): The tensor to evaluate. + + Returns: + torch.Tensor: The squared distance from the target value. + """ return (Y - self.target_value) ** 2 - def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior: + def _forward(self, mean: torch.Tensor, var: torch.Tensor) -> GPyTorchPosterior: + """Transform the posterior mean and variance based on the target value. + + Args: + mean (torch.Tensor): The posterior mean. + var (torch.Tensor): The posterior variance. + + Returns: + GPyTorchPosterior: The transformed posterior. + """ q, _ = mean.shape[-2:] batch_shape = mean.shape[:-2] @@ -265,6 +320,14 @@ def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior: return GPyTorchPosterior(mvn) def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform the given posterior distribution to reflect the target distance. + + Args: + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + GPyTorchPosterior: The transformed posterior. + """ mean = posterior.mean var = posterior.variance return self._forward(mean, var) @@ -273,6 +336,14 @@ def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: # Requires botorch approximate model to accept posterior transforms class TargetProbabilityDistancePosteriorTransform(TargetDistancePosteriorTransform): def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform the given posterior distribution to reflect the target probability distance. + + Args: + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + GPyTorchPosterior: The transformed posterior distribution reflecting the target probability distance. + """ pmean, pvar = get_probability_space(BernoulliLikelihood(), posterior) pmean = pmean.unsqueeze(-1).unsqueeze(-1) pvar = pvar.unsqueeze(-1).unsqueeze(-1)