From 547063e58c5a24017969ee767655140821287b61 Mon Sep 17 00:00:00 2001 From: Yousif Alsaffar Date: Thu, 31 Oct 2024 15:12:16 +0300 Subject: [PATCH] adding docstrings to models files --- aepsych/models/base.py | 71 +++++++++++--- aepsych/models/gp_classification.py | 44 +++++++-- aepsych/models/gp_regression.py | 30 ++++-- aepsych/models/monotonic_projection_gp.py | 40 ++++++++ aepsych/models/monotonic_rejection_gp.py | 69 +++++++++---- aepsych/models/multitask_regression.py | 29 ++++++ aepsych/models/ordinal_gp.py | 23 +++++ aepsych/models/pairwise_probit.py | 78 ++++++++++++++- aepsych/models/semi_p.py | 112 ++++++++++++++++++---- aepsych/models/utils.py | 61 ++++++++++++ 10 files changed, 488 insertions(+), 69 deletions(-) diff --git a/aepsych/models/base.py b/aepsych/models/base.py index 0baee322b..f2f36ad05 100644 --- a/aepsych/models/base.py +++ b/aepsych/models/base.py @@ -125,14 +125,17 @@ def get_max( max_time: Optional[float] = None, ) -> Tuple[float, torch.Tensor]: """Return the maximum of the modeled function, subject to constraints + Args: - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the - inverse is along a slice of the full surface. - probability_space (bool): Is y (and therefore the returned nearest_y) in + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the + inverse is along a slice of the full surface. Defaults to None. + probability_space (bool, optional): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. - n_samples int: number of coarse grid points to sample for optimization estimate. + n_samples (int, optional): number of coarse grid points to sample for optimization estimate. + max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None. + Returns: - Tuple[float, np.ndarray]: Tuple containing the max and its location (argmax). + Tuple[float, torch.Tensor]: Tuple containing the max and its location (argmax). """ locked_dims = locked_dims or {} _, _arg = get_extremum( @@ -154,11 +157,13 @@ def get_min( ) -> Tuple[float, torch.Tensor]: """Return the minimum of the modeled function, subject to constraints Args: - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the inverse is along a slice of the full surface. - probability_space (bool): Is y (and therefore the returned nearest_y) in + probability_space (bool, optional): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. - n_samples int: number of coarse grid points to sample for optimization estimate. + n_samples (int, optional): number of coarse grid points to sample for optimization estimate. + max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None. + Returns: Tuple[float, torch.Tensor]: Tuple containing the min and its location (argmin). """ @@ -185,12 +190,17 @@ def inv_query( """Query the model inverse. Return nearest x such that f(x) = queried y, and also return the value of f at that point. + Args: y (float): Points at which to find the inverse. - locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the + locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the inverse is along a slice of the full surface. - probability_space (bool): Is y (and therefore the returned nearest_y) in + probability_space (bool, optional): Is y (and therefore the returned nearest_y) in probability space instead of latent function space? Defaults to False. + n_samples (int, optional): number of coarse grid points to sample for optimization estimate. Defaults to 1000. + max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None. + weights (Optional[torch.Tensor], optional): Weights for the optimization. Defaults to None. + Returns: Tuple[float, torch.Tensor]: Tuple containing the value of f nearest to queried y and the x position of this value. @@ -233,8 +243,8 @@ def get_jnd( Both definitions are equivalent for linear psychometric functions. Args: - grid (Optional[np.ndarray], optional): Mesh grid over which to find the JND. - Defaults to a square grid of size as determined by aepsych.utils.dim_grid + grid (Optional[Union[np.ndarray, torch.Tensor]], optional): Mesh grid over which to find the JND. + Defaults to a square grid of size as determined by aepsych.utils.dim_grid. cred_level (float, optional): Credible level for computing an interval. Defaults to None, computing no interval. intensity_dim (int, optional): Dimension over which to compute the JND. @@ -310,13 +320,21 @@ def dim_grid( gridsize: int = 30, slice_dims: Optional[Mapping[int, float]] = None, ) -> torch.Tensor: + """Generate a grid based on lower, upper, and dim. + + Args: + gridsize (int, optional): Number of points in each dimension. Defaults to 30. + slice_dims (Optional[Mapping[int, float]], optional): Dimensions to fix at a + certain value. Defaults to None.""" return dim_grid(self.lb, self.ub, gridsize, slice_dims) def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optional[torch.Tensor] = None, strict: bool = False): """ - :param torch.Tensor inputs: The new training inputs. - :param torch.Tensor targets: The new training targets. - :param bool strict: (default False, ignored). Here for compatibility with + Args: + inputs (Optional[torch.Tensor], optional): The new training inputs. + targets (Optional[torch.Tensor], optional): The new training targets. + strict (bool, optional): Default is False. Ignored, just for compatibility. + input transformers. TODO: actually use this arg or change input transforms to not require it. """ @@ -327,6 +345,13 @@ def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optiona self.train_targets = targets def normalize_inputs(self, x: torch.Tensor) -> torch.Tensor: + """Normalize the input based on the defined bounds. + + Args: + x (torch.Tensor): Tensor of points to normalize. + + Returns: + torch.Tensor: Normalized tensor of points.""" scale = self.ub - self.lb return (x - self.lb) / scale @@ -353,6 +378,13 @@ def _fit_mll( optimizer=fit_gpytorch_mll_scipy, **kwargs, ) -> None: + """Fits the model by maximizing the marginal log likelihood. + + Args: + mll (MarginalLogLikelihood): Marginal log likelihood object. + optimizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for the optimizer. + optimizer: Optimizer to use. Defaults to fit_gpytorch_mll_scipy. + """ self.train() train_x, train_y = mll.model.train_inputs[0], mll.model.train_targets optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() @@ -375,6 +407,15 @@ def _fit_mll( return res def p_below_threshold(self, x: torch.Tensor, f_thresh: torch.Tensor) -> torch.Tensor: + """Compute the probability that the latent function is below a threshold. + + Args: + x (torch.Tensor): Points at which to evaluate the probability. + f_thresh (torch.Tensor): Threshold value. + + Returns: + torch.Tensor: Probability that the latent function is below the threshold. + """ f, var = self.predict(x) f_thresh = f_thresh.reshape(-1, 1) f = f.reshape(1, -1) diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py index ed64c76ba..e84c5f8b0 100644 --- a/aepsych/models/gp_classification.py +++ b/aepsych/models/gp_classification.py @@ -69,10 +69,10 @@ def __init__( gamma prior. likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to Bernouli likelihood. - inducing_size (int, optional): Number of inducing points. Defaults to 99. + inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. - inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". + inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto". If "sobol", a number of Sobol points equal to inducing_size will be selected. If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. @@ -179,6 +179,7 @@ def from_config(cls, config: Config) -> GPClassificationModel: ) def _reset_hyperparameters(self) -> None: + """Reset hyperparameters to their initial values.""" # warmstart_hyperparams affects hyperparams but not the variational strat, # so we keep the old variational strat (which is only refreshed # if warmstart_induc=False). @@ -190,6 +191,7 @@ def _reset_hyperparameters(self) -> None: self.likelihood.load_state_dict(self._fresh_likelihood_dict) def _reset_variational_strategy(self) -> None: + """Reset the variational strategy.""" inducing_points = select_inducing_points( inducing_size=self.inducing_size, covar_module=self.covar_module, @@ -221,9 +223,9 @@ def fit( Args: train_x (torch.Tensor): Inputs. train_y (torch.LongTensor): Responses. - warmstart_hyperparams (bool): Whether to reuse the previous hyperparameters (True) or fit from scratch + warmstart_hyperparams (bool, optional): Whether to reuse the previous hyperparameters (True) or fit from scratch (False). Defaults to False. - warmstart_induc (bool): Whether to reuse the previous inducing points or fit from scratch (False). + warmstart_induc (bool, optional): Whether to reuse the previous inducing points or fit from scratch (False). Defaults to False. """ self.set_train_data(train_x, train_y) @@ -300,10 +302,23 @@ def predict( def predict_probability( self, x: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance in probability space. + + Args: + x (torch.Tensor): Points at which to predict from the model. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ return self.predict(x, probability_space=True) def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs): - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Inputs. + train_y (torch.Tensor): Responses. + """ return self.fit( train_x, train_y, warmstart_hyperparams=True, warmstart_induc=True, **kwargs ) @@ -324,6 +339,23 @@ def __init__( max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", ) -> None: + """Initialize the GP Beta Regression model + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + mean_module (Optional[gpytorch.means.Mean], optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None. + covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. + likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to + Beta likelihood. + inducing_size (Optional[int], optional): Number of inducing points. Defaults to 100. + max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. If None, + there is no limit to the fitting time. Defaults to None. + inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto". + """ if likelihood is None: likelihood = BetaLikelihood() super().__init__( @@ -336,4 +368,4 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, - ) + ) \ No newline at end of file diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py index ece03581a..cb0c274b4 100644 --- a/aepsych/models/gp_regression.py +++ b/aepsych/models/gp_regression.py @@ -33,8 +33,8 @@ class GPRegressionModel(AEPsychMixin, ExactGP): def __init__( self, - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, mean_module: Optional[gpytorch.means.Mean] = None, covar_module: Optional[gpytorch.kernels.Kernel] = None, @@ -44,8 +44,8 @@ def __init__( """Initialize the GP regression model Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. @@ -77,6 +77,14 @@ def __init__( @classmethod def construct_inputs(cls, config: Config) -> Dict: + """Construct inputs for the GP regression model from configuration. + + Args: + config (Config): A configuration containing keys/values matching this class. + + Returns: + Dict: Dictionary of inputs for the GP regression model. + """ classname = cls.__name__ lb = config.gettensor(classname, "lb") @@ -118,7 +126,7 @@ def from_config(cls, config: Config) -> GPRegressionModel: from a configuration. TODO: document how this works in some tutorial. Args: - config (Config): A configuration containing keys/values matching this class + config (Config): A configuration containing keys/values matching this class. Returns: GPRegressionModel: Configured class instance. @@ -140,7 +148,7 @@ def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None: return self._fit_mll(mll, **kwargs) def sample( - self, x: Union[torch.Tensor, np.ndarray], num_samples: int + self, x: torch.Tensor, num_samples: int ) -> torch.Tensor: """Sample from underlying model. @@ -155,11 +163,15 @@ def sample( return self.posterior(x).rsample(torch.Size([num_samples])).detach().squeeze() def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs): - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Inputs. + train_y (torch.Tensor): Responses.""" return self.fit(train_x, train_y, **kwargs) def predict( - self, x: Union[torch.Tensor, np.ndarray], **kwargs + self, x: torch.Tensor, **kwargs ) -> Tuple[torch.Tensor, torch.Tensor]: """Query the model for posterior mean and variance. @@ -169,7 +181,7 @@ def predict( response probability instead of latent function value. Defaults to False. Returns: - Tuple[np.ndarray, np.ndarray]: Posterior mean and variance at queries points. + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. """ with torch.no_grad(): post = self.posterior(x) diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py index 925cca184..d8173de82 100644 --- a/aepsych/models/monotonic_projection_gp.py +++ b/aepsych/models/monotonic_projection_gp.py @@ -105,6 +105,27 @@ def __init__( max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", ) -> None: + """Initialize the MonotonicProjectionGP model. + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + monotonic_dims (List[int]): A list of the dimensions on which monotonicity should + be enforced. + monotonic_grid_size (int, optional): The size of the grid, s, in 1. above. Defaults to 20. + min_f_val (Optional[float], optional): If provided, maintains this minimum in the projection in 5. Defaults to None. + dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + mean_module (Optional[gpytorch.means.Mean], optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None. + covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. Defaults to None. + likelihood (Optional[Likelihood], optional): The likelihood function to use. If None defaults to + Gaussian likelihood. Defaults to None. + inducing_size (Optional[int], optional): The number of inducing points to use. Defaults to None. + max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. If None, + there is no limit to the fitting time. Defaults to None. + inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto". + """ assert len(monotonic_dims) > 0 self.monotonic_dims = [int(d) for d in monotonic_dims] self.mon_grid_size = monotonic_grid_size @@ -127,6 +148,16 @@ def posterior( observation_noise: Union[bool, torch.Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: + """Compute the posterior at X, projecting to enforce monotonicity. + + Args: + X (torch.Tensor): The input points at which to compute the posterior. + observation_noise (Union[bool, torch.Tensor], optional): Whether or not to include the observation noise in the + posterior. Defaults to False. + + Returns: + GPyTorchPosterior: The posterior at X. + """ # Augment X with monotonicity grid points, for each monotonic dim n, d = X.shape # Require no batch dimensions m = len(self.monotonic_dims) @@ -169,6 +200,15 @@ def posterior( def sample( self, x: torch.Tensor, num_samples: int ) -> torch.Tensor: + """Sample from the model. + + Args: + x (torch.Tensor): The input points at which to sample. + num_samples (int): The number of samples to draw. + + Returns: + torch.Tensor: The samples at x. + """ samps = super().sample(x=x, num_samples=num_samples) if self.min_f_val is not None: samps = samps.clamp(min=self.min_f_val) diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py index b35efcb05..970776be0 100644 --- a/aepsych/models/monotonic_rejection_gp.py +++ b/aepsych/models/monotonic_rejection_gp.py @@ -52,8 +52,8 @@ class MonotonicRejectionGP(AEPsychMixin, ApproximateGP): def __init__( self, monotonic_idxs: Sequence[int], - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, mean_module: Optional[Mean] = None, covar_module: Optional[Kernel] = None, @@ -67,22 +67,23 @@ def __init__( """Initialize MonotonicRejectionGP. Args: + monotonic_idxs (Sequence[int]): List of which columns of x should be given monotonicity + constraints. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size. + covar_module (Optional[Kernel], optional): Covariance kernel to use. Default is scaled RBF. + mean_module (Optional[Mean], optional): Mean module to use. Default is constant mean. likelihood (str): Link function and likelihood. Can be 'probit-bernoulli' or 'identity-gaussian'. - monotonic_idxs (List[int]): List of which columns of x should be given monotonicity - constraints. fixed_prior_mean (Optional[float], optional): Fixed prior mean. If classification, should be the prior classification probability (not the latent function value). Defaults to None. - covar_module (Optional[Kernel], optional): Covariance kernel to use (default: scaled RBF). - mean_module (Optional[Mean], optional): Mean module to use (default: constant mean). num_induc (int, optional): Number of inducing points for variational GP.]. Defaults to 25. num_samples (int, optional): Number of samples for estimating posterior on preDict or acquisition function evaluation. Defaults to 250. - num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096. - acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE. - objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli. - extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None. - """ + num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096. + inducing_point_method (str, optional): Method for selecting inducing points. Defaults to "auto". + """ self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim) if likelihood is None: likelihood = BernoulliLikelihood() @@ -172,6 +173,14 @@ def _set_model( model_state_dict: Optional[Dict[str, Tensor]] = None, likelihood_state_dict: Optional[Dict[str, Tensor]] = None, ) -> None: + """Sets the model with the given data and state dicts. + + Args: + train_x (Tensor): Training x points + train_y (Tensor): Training y points. Should be (n x 1). + model_state_dict (Optional[Dict[str, Tensor]], optional): State dict for the model + likelihood_state_dict (Optional[Dict[str, Tensor]], optional): State dict for the likelihood + """ train_x_aug = self._augment_with_deriv_index(train_x, 0) self.set_train_data(train_x_aug, train_y) # Set model parameters @@ -195,7 +204,7 @@ def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> No Args: train_x (Tensor): Train X. train_y (Tensor): Train Y. Should be (n x 1). - warmstart (bool): If True, warm-start model fitting with current parameters. + warmstart (bool, optional): If True, warm-start model fitting with current parameters. Defaults to True. """ if warmstart: model_state_dict = self.state_dict() @@ -220,7 +229,8 @@ def sample( Args: x (Tensor): tensor of n points at which to sample - num_samples (int, optional): how many points to sample (default: self.num_samples) + num_samples (Optional[int], optional): how many points to sample. Default is self.num_samples. + num_rejection_samples (Optional[int], optional): how many samples to use for rejection sampling. Default is self.num_rejection_samples. Returns: a Tensor of shape [n_samp, n] """ @@ -263,7 +273,8 @@ def predict( """Predict Args: - x: tensor of n points at which to predict. + x (torch.Tensor): tensor of n points at which to predict. + probability_space (bool, optional): whether to return in probability space. Defaults to False. Returns: tuple (f, var) where f is (n,) and var is (n,) """ @@ -280,17 +291,35 @@ def predict( return mean, variance def predict_probability( - self, x: Union[torch.Tensor, np.ndarray] + self, x: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor]: + """Predict in probability space + + Args: + x (torch.Tensor): Points at which to predict. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at query points. + """ return self.predict(x, probability_space=True) - def _augment_with_deriv_index(self, x: Tensor, indx) -> Tensor: + def _augment_with_deriv_index(self, x: Tensor, indx: int) -> Tensor: + """Augment input with derivative index + + Args: + x (Tensor): Input tensor + indx (int): Derivative index + + Returns: + Tensor: Augmented tensor + """ return torch.cat( (x, indx * torch.ones(x.shape[0], 1)), dim=1, ) def _get_deriv_constraint_points(self) -> Tensor: + """Get derivative constraint points""" deriv_cp = torch.tensor([]) for i in self.monotonic_idxs: induc_i = self._augment_with_deriv_index(self.inducing_points, i + 1) @@ -299,6 +328,14 @@ def _get_deriv_constraint_points(self) -> Tensor: @classmethod def from_config(cls, config: Config) -> MonotonicRejectionGP: + """ Alternate constructor for MonotonicRejectionGP + + Args: + config (Config): a configuration containing keys/values matching this class + + Returns: + MonotonicRejectionGP: configured class instance + """ classname = cls.__name__ num_induc = config.gettensor(classname, "num_induc", fallback=25) num_samples = config.gettensor(classname, "num_samples", fallback=250) diff --git a/aepsych/models/multitask_regression.py b/aepsych/models/multitask_regression.py index e1b683678..d2e662c55 100644 --- a/aepsych/models/multitask_regression.py +++ b/aepsych/models/multitask_regression.py @@ -79,6 +79,14 @@ def __init__( ) def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivariateNormal: + """ Evaluate GP. + + Args: + x (torch.Tensor): Tensor of points at which GP should be evaluated. + + Returns: + gpytorch.distributions.MultitaskMultivariateNormal: Distribution object + holding the mean and covariance at x.""" transformed_x = self.normalize_inputs(x) mean_x = self.mean_module(transformed_x) covar_x = self.covar_module(transformed_x) @@ -86,6 +94,11 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivaria @classmethod def construct_inputs(cls, config: Config): + """Construct inputs for the Multitask GPR model from configuration. + + Args: + config (Config): A configuration containing keys/values matching this class. + """ classname = cls.__name__ args = super().construct_inputs(config) args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2) @@ -151,6 +164,14 @@ def __init__( ) # type: ignore # mypy issue 4335 def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivariateNormal: + """ Evaluate GP. + + Args: + x (torch.Tensor): Tensor of points at which GP should be evaluated. + + Returns: + gpytorch.distributions.MultitaskMultivariateNormal: Distribution object + holding the mean and covariance at x.""" base_mvn = super().forward(x) # do transforms return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn( base_mvn @@ -158,6 +179,14 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivaria @classmethod def get_config_args(cls, config: Config) -> Dict[str, Any]: + """Get configuration arguments for the model. + + Args: + config (Config): A configuration containing keys/values matching this class. + + Returns: + Dict[str, Any]: Dictionary of configuration arguments. + """ classname = cls.__name__ args = super().get_config_args(config) args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2) diff --git a/aepsych/models/ordinal_gp.py b/aepsych/models/ordinal_gp.py index dc72ead97..1587c1078 100644 --- a/aepsych/models/ordinal_gp.py +++ b/aepsych/models/ordinal_gp.py @@ -26,6 +26,12 @@ class OrdinalGPModel(GPClassificationModel): outcome_type = "ordinal" def __init__(self, likelihood=None, *args, **kwargs): + """Initialize the OrdinalGPModel + + Args: + likelihood (Optional[Likelihood], optional): The likelihood function to use. If None defaults to + Ordinal likelihood. + """ covar_module = kwargs.pop("covar_module", None) dim = kwargs.get("dim") if covar_module is None: @@ -52,10 +58,27 @@ def __init__(self, likelihood=None, *args, **kwargs): ) def predict_probs(self, xgrid:torch.Tensor) -> torch.Tensor: + """Predict probabilities of each ordinal level at xgrid + + Args: + xgrid (torch.Tensor): Tensor of input points to predict at + + Returns: + torch.Tensor: Tensor of probabilities of each ordinal level at xgrid + """ fmean, fvar = self.predict(xgrid) return self.calculate_probs(fmean, fvar) def calculate_probs(self, fmean: torch.Tensor, fvar: torch.Tensor) -> torch.Tensor: + """Calculate probabilities of each ordinal level given a mean and variance + + Args: + fmean (torch.Tensor): Mean of the latent function + fvar (torch.Tensor): Variance of the latent function + + Returns: + torch.Tensor: Tensor of probabilities of each ordinal level + """ fsd = torch.sqrt(1 + fvar) probs = torch.zeros(*fmean.size(), self.likelihood.n_levels) diff --git a/aepsych/models/pairwise_probit.py b/aepsych/models/pairwise_probit.py index f0497d8e7..68cc37baa 100644 --- a/aepsych/models/pairwise_probit.py +++ b/aepsych/models/pairwise_probit.py @@ -29,10 +29,19 @@ class PairwiseProbitModel(PairwiseGP, AEPsychMixin): outcome_type = "binary" def _pairs_to_comparisons(self, x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Takes x, y structured as pairs and judgments and - returns pairs and comparisons as PairwiseGP requires - """ + """Convert pairs of points and their judgements to comparisons. + + Args: + x (torch.Tensor): Tensor of shape (n, 2, d) where n is the number of pairs and d is the dimensionality of the + parameter space. + y (torch.Tensor): Tensor of shape (n,) where n is the number of pairs. Each element is 0 if the first point + in the pair is preferred, and 1 if the second point is preferred. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: A tuple of tensors. The first tensor is of shape (n, d) and contains the + unique points in the pairs. The second tensor is of shape (n, 2) and contains the indices of the unique + points in the first tensor that correspond to the points in the pairs. + """ # This needs to take a unique over the feature dim by flattening # over pairs but not instances/batches. This is actually tensor # matricization over the feature dimension but awkward in numpy @@ -63,6 +72,17 @@ def __init__( covar_module: Optional[gpytorch.kernels.Kernel] = None, max_fit_time: Optional[float] = None, ) -> None: + """Initialize the PairwiseProbitModel + + Args: + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. + dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size + of lb and ub. Defaults to None. + covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a + gamma prior. Defaults to None. + max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. Defaults to None. + """ self.lb, self.ub, dim = _process_bounds(lb, ub, dim) self.max_fit_time = max_fit_time @@ -99,6 +119,13 @@ def fit( optimizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ) -> None: + """Fit the model to the training data. + + Args: + train_x (torch.Tensor): Trainin x points. + train_y (torch.Tensor): Training y points. + optimizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments to pass to the optimizer. Defaults to None. + """ self.train() mll = PairwiseLaplaceMarginalLogLikelihood(self.likelihood, self) datapoints, comparisons = self._pairs_to_comparisons(train_x, train_y) @@ -123,12 +150,27 @@ def fit( def update( self, train_x: torch.Tensor, train_y: torch.Tensor, warmstart: bool = True ) -> None: - """Perform a warm-start update of the model from previous fit.""" + """Perform a warm-start update of the model from previous fit. + + Args: + train_x (torch.Tensor): Train X. + train_y (torch.Tensor): Train Y. + warmstart (bool, optional): If True, warm-start model fitting with current parameters. Defaults to True. + """ self.fit(train_x, train_y) def predict( self, x: torch.Tensor, probability_space: bool =False, num_samples: int =1000, rereference: str ="x_min" ) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance. + + Args: + x (torch.Tensor): Points at which to predict from the model. + probability_space (bool, optional): Return outputs in units of response probability instead of latent function value. Defaults to False. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ if rereference is not None: samps = self.sample(x, num_samples, rereference) fmean, fvar = samps.mean(0).squeeze(), samps.var(0).squeeze() @@ -147,11 +189,30 @@ def predict( def predict_probability( self, x: torch.Tensor, probability_space: bool = False, num_samples: int = 1000, rereference: str = "x_min" ) -> Tuple[torch.Tensor, torch.Tensor]: + """Query the model for posterior mean and variance in probability space. + + Args: + x (torch.Tensor): Points at which to predict from the model. + probability_space (bool, optional): Return outputs in units of response probability instead of latent function value. Defaults to False. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points. + """ return self.predict( x, probability_space=True, num_samples=num_samples, rereference=rereference ) def sample(self, x: torch.Tensor, num_samples: int, rereference: str = "x_min") -> torch.Tensor: + """Sample from the model. + + Args: + x (torch.Tensor): Points at which to sample. + num_samples (int): Number of samples to return. + rereference (str, optional): How to sample. Options are "x_min", "x_max", "f_min", "f_max". Defaults to "x_min". + + Returns: + torch.Tensor: Posterior samples [num_samples x dim] + """ if len(x.shape) < 2: x = x.reshape(-1, 1) if rereference is None: @@ -180,7 +241,14 @@ def sample(self, x: torch.Tensor, num_samples: int, rereference: str = "x_min") @classmethod def from_config(cls, config: Config) -> 'PairwiseProbitModel': + """Initialize the model from a config object. + Args: + config (Config): a configuration containing keys/values matching this class + + Returns: + PairwiseProbitModel: Configured class instance. + """ classname = cls.__name__ mean_covar_factory = config.getobj( diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py index fd3821188..52554892a 100644 --- a/aepsych/models/semi_p.py +++ b/aepsych/models/semi_p.py @@ -41,6 +41,16 @@ def _hadamard_mvn_approx(x_intensity: torch.Tensor, slope_mean: torch.Tensor, sl MVN approximation to the hadamard product of GPs (from the SemiP paper, extending the zero-mean results in https://mathoverflow.net/questions/293955/normal-approximation-to-the-pointwise-hadamard-schur-product-of-two-multivariat) + + Args: + x_intensity (torch.Tensor): The intensity dimension + slope_mean (torch.Tensor): The mean of the slope GP + slope_cov (torch.Tensor): The covariance of the slope GP + offset_mean (torch.Tensor): The mean of the offset GP + offset_cov (torch.Tensor): The covariance of the offset GP + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The mean and covariance of the approximated MVN """ offset_mean = offset_mean + x_intensity @@ -60,6 +70,13 @@ def _hadamard_mvn_approx(x_intensity: torch.Tensor, slope_mean: torch.Tensor, sl def semi_p_posterior_transform(posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform a posterior from a SemiP model to a Hadamard model. + + Args: + posterior (GPyTorchPosterior): The posterior to transform + + Returns: + GPyTorchPosterior: The transformed posterior""" batch_mean = posterior.mvn.mean batch_cov = posterior.mvn.covariance_matrix offset_mean = batch_mean[..., 0, :] @@ -85,6 +102,14 @@ def __init__( likelihood: LinearBernoulliLikelihood, Xi: torch.Tensor, ) -> None: + """Initialize a SemiPPosterior object. + + Args: + mvn (MultivariateNormal): The MVN object to use + likelihood (LinearBernoulliLikelihood): The likelihood object + Xi (torch.Tensor): The intensity dimension + """ + super().__init__(distribution=mvn) self.likelihood = likelihood self.Xi = Xi @@ -98,6 +123,10 @@ def rsample_from_base_samples( This is intended to be used with a sampler that produces the corresponding base samples, and enables acquisition optimization via Sample Average Approximation. + + Args: + sample_shape (torch.Size): The desired shape of the samples + base_samples (Tensor): The base samples """ return ( super() @@ -112,6 +141,15 @@ def rsample( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the posterior distribution using the reparameterization trick + + Args: + sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None. + base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled values from the posterior distribution. + """ if base_samples is None: samps_ = super().rsample(sample_shape=sample_shape) else: @@ -130,6 +168,14 @@ def sample_p( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the likelihood distribution of the modeled function. + + Args: + sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None. + base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled values from the likelihood distribution.""" kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return self.likelihood.p(function_samples=kcsamps, Xi=self.Xi).squeeze(-1) @@ -138,6 +184,15 @@ def sample_f( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> torch.Tensor: + """Sample from the function values of the modeled distribution. + + Args: + sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None. + base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None. + + Returns: + torch.Tensor: The sampled function values from the likelihood.""" + kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return self.likelihood.f(function_samples=kcsamps, Xi=self.Xi).squeeze(-1) @@ -147,6 +202,16 @@ def sample_thresholds( sample_shape: Optional[torch.Size] = None, base_samples: Optional[torch.Tensor] = None, ) -> SemiPThresholdObjective: + """Sample the thresholds based on the given threshold level. + + Args: + threshold_level (float): The target threshold level for sampling. + sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None. + base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None. + + Returns: + SemiPThresholdObjective: The sampled thresholds based on the threshold level. + """ fsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples) return SemiPThresholdObjective( @@ -174,8 +239,8 @@ class SemiParametricGPModel(GPClassificationModel): def __init__( self, - lb: Union[np.ndarray, torch.Tensor], - ub: Union[np.ndarray, torch.Tensor], + lb: torch.Tensor, + ub: torch.Tensor, dim: Optional[int] = None, stim_dim: int = 0, mean_module: Optional[gpytorch.means.Mean] = None, @@ -190,20 +255,20 @@ def __init__( Initialize SemiParametricGP. Args: Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size - of lb and ub. - stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0. + of lb and ub. Defaults to None. + stim_dim (int, optional): Index of the intensity (monotonic) dimension. Defaults to 0. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a gamma prior. likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to linear-Bernouli likelihood with probit link. - inducing_size (int): Number of inducing points. Defaults to 99. + inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. - inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". + inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto". If "sobol", a number of Sobol points equal to inducing_size will be selected. If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. @@ -315,9 +380,9 @@ def fit( Args: train_x (torch.Tensor): Inputs. train_y (torch.LongTensor): Responses. - warmstart_hyperparams (bool): Whether to reuse the previous hyperparameters (True) or fit from scratch + warmstart_hyperparams (bool, optional): Whether to reuse the previous hyperparameters (True) or fit from scratch (False). Defaults to False. - warmstart_induc (bool): Whether to reuse the previous inducing points or fit from scratch (False). + warmstart_induc (bool, optional): Whether to reuse the previous inducing points or fit from scratch (False). Defaults to False. kwargs: Keyword arguments passed to `optimizer=fit_gpytorch_mll_scipy`. """ @@ -335,13 +400,15 @@ def sample( self, x: torch.Tensor, num_samples: int, - probability_space=False, + probability_space: bool = False, ) -> torch.Tensor: """Sample from underlying model. Args: - x ((n x d) torch.Tensor): Points at which to sample. - num_samples (int, optional): Number of samples to return. Defaults to None. + + x (torch.Tensor): `n x d` Points at which to sample. + num_samples (int): Number of samples to return. Defaults to None. + probability_space (bool, optional): Whether to sample from the probability space (True) or the latent function. Defaults to False. kwargs are ignored Returns: @@ -377,6 +444,15 @@ def predict( return promote_0d(m), promote_0d(v) def posterior(self, X: torch.Tensor, posterior_transform: Optional[PosteriorTransform] = None) -> SemiPPosterior: + """Get the posterior distribution at the given points. + + Args: + X (torch.Tensor): Points at which to evaluate the posterior. + posterior_transform (Optional[PosteriorTransform], optional): A transform to apply to the posterior. Defaults to None. + + Returns: + SemiPPosterior: The posterior distribution at the given points. + """ # Assume x is (b) x n x d if X.ndim > 3: raise ValueError @@ -436,8 +512,8 @@ def __init__( """ Initialize HadamardSemiPModel. Args: - lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. - ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. + lb (torch.Tensor): Lower bounds of the parameters. + ub (torch.Tensor): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0. @@ -446,10 +522,10 @@ def __init__( offset_mean_module (gpytorch.means.Mean, optional): Mean module to use (default: constant mean) for offset. offset_covar_module (gpytorch.kernels.Kernel, optional): Covariance kernel to use (default: scaled RBF) for offset. likelihood (gpytorch.likelihood.Likelihood, optional)): defaults to bernoulli with logistic input and a floor of .5 - inducing_size (int): Number of inducing points. Defaults to 99. + inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. - inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". + inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto". If "sobol", a number of Sobol points equal to inducing_size will be selected. If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. @@ -510,7 +586,7 @@ def __init__( self._fresh_likelihood_dict = deepcopy(self.likelihood.state_dict()) def forward(self, x: torch.Tensor) -> MultivariateNormal: - """Forward pass for semip GP. + """Forward pass for HadamardSemiPModel GP. generates a k(c + x[:,stim_dim]) = kc + kx[:,stim_dim] mvn object where k and c are slope and offset GPs and x[:,stim_dim] are the intensity stimulus (x) diff --git a/aepsych/models/utils.py b/aepsych/models/utils.py index 64998ce43..145f42526 100644 --- a/aepsych/models/utils.py +++ b/aepsych/models/utils.py @@ -59,6 +59,19 @@ def select_inducing_points( bounds: Optional[Union[torch.Tensor, np.ndarray]] = None, method: str = "auto", ) -> torch.Tensor: + """Select inducing points for GP model + + Args: + inducing_size (int): Number of inducing points to select. + covar_module (Kernel): The kernel module to use for inducing point selection. + X (torch.Tensor): The training data. + bounds (torch.Tensor): The bounds of the input space. + method (str): The method to use for inducing point selection. One of + "pivoted_chol", "kmeans++", "auto", or "sobol". + + Returns: + torch.Tensor: The selected inducing points. + """ with torch.no_grad(): assert method in ( "pivoted_chol", @@ -104,6 +117,15 @@ def select_inducing_points( def get_probability_space(likelihood: Likelihood, posterior: GPyTorchPosterior) -> Tuple[torch.Tensor, torch.Tensor]: + """Get the mean and variance of the probability space for a given posterior + + Args: + likelihood (Likelihood): The likelihood function. + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The mean and variance of the probability space. + """ fmean = posterior.mean.squeeze() fvar = posterior.variance.squeeze() if isinstance(likelihood, BernoulliLikelihood): @@ -233,14 +255,37 @@ class TargetDistancePosteriorTransform(PosteriorTransform): def __init__( self, target_value: Union[float, Tensor], weights: Optional[Tensor] = None ) -> None: + """Initialize the TargetDistancePosteriorTransform + + Args: + target_value (Union[float, Tensor]): The target value to transform the posterior to. + weights (Optional[Tensor]): Weights to apply to the target value. Defaults to None. + """ super().__init__() self.target_value = target_value self.weights = weights def evaluate(self, Y: Tensor) -> Tensor: + """Evaluate the squared distance from the target value. + + Args: + Y (Tensor): The tensor to evaluate. + + Returns: + Tensor: The squared distance from the target value. + """ return (Y - self.target_value) ** 2 def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior: + """Transform the posterior mean and variance based on the target value. + + Args: + mean (Tensor): The posterior mean. + var (Tensor): The posterior variance. + + Returns: + GPyTorchPosterior: The transformed posterior. + """ q, _ = mean.shape[-2:] batch_shape = mean.shape[:-2] @@ -255,6 +300,14 @@ def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior: return GPyTorchPosterior(mvn) def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform the given posterior distribution to reflect the target distance. + + Args: + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + GPyTorchPosterior: The transformed posterior. + """ mean = posterior.mean var = posterior.variance return self._forward(mean, var) @@ -263,6 +316,14 @@ def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: # Requires botorch approximate model to accept posterior transforms class TargetProbabilityDistancePosteriorTransform(TargetDistancePosteriorTransform): def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior: + """Transform the given posterior distribution to reflect the target probability distance. + + Args: + posterior (GPyTorchPosterior): The posterior to transform. + + Returns: + GPyTorchPosterior: The transformed posterior distribution reflecting the target probability distance. + """ pmean, pvar = get_probability_space(BernoulliLikelihood(), posterior) pmean = pmean.unsqueeze(-1).unsqueeze(-1) pvar = pvar.unsqueeze(-1).unsqueeze(-1)