Improve Docstrings in aepsych/models (#423)

Summary: Improves documentation in `aepsych/models` for better clarity and consistency. - Adds missing docstrings to functions and methods across all models. - Updates existing docstrings with refined type hints and a unified structure. Pull Request resolved: #423 Reviewed By: crasanders Differential Revision: D65858741 Pulled By: JasonKChow fbshipit-source-id: e10d788a0d271838157a6dc421052369ff02ff9e
facebookresearch · Nov 20, 2024 · d9d4eff · d9d4eff
1 parent f1d728e
commit d9d4eff
Show file tree

Hide file tree

Showing 11 changed files with 599 additions and 140 deletions.
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
@@ -10,7 +10,7 @@
 import time
 from collections.abc import Iterable
 from copy import deepcopy
-from typing import Any, Dict, List, Mapping, Optional, Protocol, Tuple, Union
+from typing import Any, Callable, Dict, List, Mapping, Optional, Protocol, Tuple, Union
 
 import gpytorch
 import numpy as np
@@ -91,7 +91,7 @@ def _get_extremum(
         extremum_type: str,
         locked_dims: Optional[Mapping[int, List[float]]],
         n_samples=1000,
-    ) -> Tuple[float, np.ndarray]:
+    ) -> Tuple[float, torch.Tensor]:
         pass
 
     def dim_grid(self, gridsize: int = 30) -> torch.Tensor:
@@ -131,14 +131,17 @@ def get_max(
         max_time: Optional[float] = None,
     ) -> Tuple[float, torch.Tensor]:
         """Return the maximum of the modeled function, subject to constraints
+
         Args:
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
-                inverse is along a slice of the full surface.
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
+                inverse is along a slice of the full surface. Defaults to None.
             probability_space (bool): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
-            n_samples int: number of coarse grid points to sample for optimization estimate.
+            n_samples (int): number of coarse grid points to sample for optimization estimate.
+            max_time (float, optional): Maximum time to spend optimizing. Defaults to None.
+
         Returns:
-            Tuple[float, np.ndarray]: Tuple containing the max and its location (argmax).
+            Tuple[float, torch.Tensor]: Tuple containing the max and its location (argmax).
         """
         locked_dims = locked_dims or {}
         _, _arg = get_extremum(
@@ -160,11 +163,13 @@ def get_min(
     ) -> Tuple[float, torch.Tensor]:
         """Return the minimum of the modeled function, subject to constraints
         Args:
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
                 inverse is along a slice of the full surface.
             probability_space (bool): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
-            n_samples int: number of coarse grid points to sample for optimization estimate.
+            n_samples (int): number of coarse grid points to sample for optimization estimate.
+            max_time (float, optional): Maximum time to spend optimizing. Defaults to None.
+
         Returns:
             Tuple[float, torch.Tensor]: Tuple containing the min and its location (argmin).
         """
@@ -191,12 +196,17 @@ def inv_query(
         """Query the model inverse.
         Return nearest x such that f(x) = queried y, and also return the
             value of f at that point.
+
         Args:
             y (float): Points at which to find the inverse.
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
                 inverse is along a slice of the full surface.
             probability_space (bool): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
+            n_samples (int): number of coarse grid points to sample for optimization estimate. Defaults to 1000.
+            max_time (float, optional): Maximum time to spend optimizing. Defaults to None.
+            weights (torch.Tensor, optional): Weights for the optimization. Defaults to None.
+
         Returns:
             Tuple[float, torch.Tensor]: Tuple containing the value of f
                 nearest to queried y and the x position of this value.
@@ -220,7 +230,7 @@ def inv_query(
 
     def get_jnd(
         self: ModelProtocol,
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        grid: Optional[torch.Tensor] = None,
         cred_level: Optional[float] = None,
         intensity_dim: int = -1,
         confsamps: int = 500,
@@ -239,20 +249,17 @@ def get_jnd(
         Both definitions are equivalent for linear psychometric functions.
 
         Args:
-            grid (Optional[np.ndarray], optional): Mesh grid over which to find the JND.
-                Defaults to a square grid of size as determined by aepsych.utils.dim_grid
+            grid (torch.Tensor, optional): Mesh grid over which to find the JND.
+                Defaults to a square grid of size as determined by aepsych.utils.dim_grid. 
             cred_level (float, optional): Credible level for computing an interval.
                 Defaults to None, computing no interval.
-            intensity_dim (int, optional): Dimension over which to compute the JND.
+            intensity_dim (int): Dimension over which to compute the JND.
                 Defaults to -1.
-            confsamps (int, optional): Number of posterior samples to use for
+            confsamps (int): Number of posterior samples to use for
                 computing the credible interval. Defaults to 500.
-            method (str, optional): "taylor" or "step" method (see docstring).
+            method (str): "taylor" or "step" method (see docstring).
                 Defaults to "step".
 
-        Raises:
-            RuntimeError: for passing an unknown method.
-
         Returns:
             Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: either the
                 mean JND, or a median, lower, upper tuple of the JND posterior.
@@ -316,6 +323,12 @@ def dim_grid(
         gridsize: int = 30,
         slice_dims: Optional[Mapping[int, float]] = None,
     ) -> torch.Tensor:
+        """Generate a grid based on lower, upper, and dim.
+        
+        Args:
+            gridsize (int): Number of points in each dimension. Defaults to 30.
+            slice_dims (Mapping[int, float], optional): Dimensions to fix at a certain value. Defaults to None.
+        """
         return dim_grid(self.lb, self.ub, gridsize, slice_dims)
 
     def set_train_data(
@@ -325,9 +338,13 @@ def set_train_data(
         strict: bool = False,
     ):
         """
-        :param torch.Tensor inputs: The new training inputs.
-        :param torch.Tensor targets: The new training targets.
-        :param bool strict: (default False, ignored). Here for compatibility with
+        Set the training data for the model.
+        
+        Args:
+            inputs (torch.Tensor, optional):  The new training inputs.
+            targets (torch.Tensor, optional): The new training targets.
+            strict (bool):  Default is False. Ignored, just for compatibility.
+
         input transformers. TODO: actually use this arg or change input transforms
         to not require it.
         """
@@ -356,9 +373,16 @@ def _fit_mll(
         self,
         mll: MarginalLogLikelihood,
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
-        optimizer=fit_gpytorch_mll_scipy,
+        optimizer: Callable = fit_gpytorch_mll_scipy,
         **kwargs,
     ) -> None:
+        """Fits the model by maximizing the marginal log likelihood.
+        
+        Args:
+            mll (MarginalLogLikelihood): Marginal log likelihood object.
+            optimizer_kwargs (Dict[str, Any], optional): Keyword arguments for the optimizer.
+            optimizer (Callable): Optimizer to use. Defaults to fit_gpytorch_mll_scipy.
+        """
         self.train()
         train_x, train_y = mll.model.train_inputs[0], mll.model.train_targets
         optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy()
@@ -385,8 +409,19 @@ def _fit_mll(
         return res
 
     def p_below_threshold(
-        self, x, f_thresh
-    ) -> torch.Tensor:  # Return a tensor instead of NumPy array
+        self,
+        x: torch.Tensor,
+        f_thresh: torch.Tensor
+        ) -> torch.Tensor: 
+        """Compute the probability that the latent function is below a threshold.
+        
+        Args:
+            x (torch.Tensor): Points at which to evaluate the probability.
+            f_thresh (torch.Tensor): Threshold value.
+            
+        Returns:
+            torch.Tensor: Probability that the latent function is below the threshold.
+        """
         f, var = self.predict(x)
         f_thresh = f_thresh.reshape(-1, 1)
         f = f.reshape(1, -1)
@@ -400,11 +435,14 @@ class AEPsychModelDeviceMixin(AEPsychMixin):
     _train_inputs: Optional[Tuple[torch.Tensor]]
     _train_targets: Optional[torch.Tensor]
 
-    def set_train_data(self, inputs=None, targets=None, strict=False):
-        """
-        :param torch.Tensor inputs: The new training inputs.
-        :param torch.Tensor targets: The new training targets.
-        :param bool strict: (default False, ignored). Here for compatibility with
+    def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optional[torch.Tensor] = None, strict: bool = False) -> None:
+        """Set the training data for the model.
+
+        Args:
+            inputs (torch.Tensor, optional): The new training inputs X.
+            targets (torch.Tensor, optional): The new training targets Y.
+            strict (bool): Whether to strictly enforce the device of the inputs and targets.
+        
         input transformers. TODO: actually use this arg or change input transforms
         to not require it.
         """
@@ -417,12 +455,22 @@ def set_train_data(self, inputs=None, targets=None, strict=False):
 
     @property
     def device(self) -> torch.device:
+        """Get the device of the model.
+        
+        Returns:
+            torch.device: Device of the model.
+        """
         # We assume all models have some parameters and all models will only use one device
         # notice that this has no setting, don't let users set device, use .to().
         return next(self.parameters()).device
 
     @property
     def train_inputs(self) -> Optional[Tuple[torch.Tensor]]:
+        """Get the training inputs.
+        
+        Returns:
+            Optional[Tuple[torch.Tensor]]: Training inputs.
+        """
         if self._train_inputs is None:
             return None
 
@@ -434,6 +482,11 @@ def train_inputs(self) -> Optional[Tuple[torch.Tensor]]:
 
     @train_inputs.setter
     def train_inputs(self, train_inputs: Optional[Tuple[torch.Tensor]]) -> None:
+        """Set the training inputs.
+
+        Args:
+            train_inputs (Tuple[torch.Tensor]): Training inputs.
+        """
         if train_inputs is None:
             self._train_inputs = None
         else:
@@ -446,6 +499,11 @@ def train_inputs(self, train_inputs: Optional[Tuple[torch.Tensor]]) -> None:
 
     @property
     def train_targets(self) -> Optional[torch.Tensor]:
+        """Get the training targets.
+
+        Returns:
+            Optional[torch.Tensor]: Training targets.
+        """
         if self._train_targets is None:
             return None
 
@@ -456,6 +514,11 @@ def train_targets(self) -> Optional[torch.Tensor]:
 
     @train_targets.setter
     def train_targets(self, train_targets: Optional[torch.Tensor]) -> None:
+        """Set the training targets.
+
+        Args:
+            train_targets (torch.Tensor, optional): Training targets.
+        """
         if train_targets is None:
             self._train_targets = None
         else:

diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py
@@ -50,7 +50,7 @@ def __init__(
                 is an observation of df/dx_i.
             train_y (torch.Tensor): Training y points
             inducing_points (torch.Tensor): Inducing points to use
-            scales (Union[torch.Tensor, float], optional): Typical scale of each dimension
+            scales (Union[torch.Tensor, float]): Typical scale of each dimension
                 of input space (this is used to set the lengthscale prior).
                 Defaults to 1.0.
             mean_module (Mean, optional): A mean class that supports derivative

diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py
@@ -62,8 +62,8 @@ def __init__(
         """Initialize the GP Classification model
 
         Args:
-            lb torch.Tensor: Lower bounds of the parameters.
-            ub torch.Tensor: Upper bounds of the parameters.
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
             dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
                 of lb and ub.
             mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior.
@@ -140,7 +140,7 @@ def __init__(
 
     @classmethod
     def from_config(cls, config: Config) -> GPClassificationModel:
-        """Alternate constructor for GPClassification model.
+        """Alternate constructor for GPClassification model from a configuration.
 
         This is used when we recursively build a full sampling strategy
         from a configuration. TODO: document how this works in some tutorial.
@@ -196,6 +196,7 @@ def from_config(cls, config: Config) -> GPClassificationModel:
         )
 
     def _reset_hyperparameters(self) -> None:
+        """Reset hyperparameters to their initial values."""
         # warmstart_hyperparams affects hyperparams but not the variational strat,
         # so we keep the old variational strat (which is only refreshed
         # if warmstart_induc=False).
@@ -270,7 +271,7 @@ def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor:
 
         Args:
             x (torch.Tensor): Points at which to sample.
-            num_samples (int, optional): Number of samples to return. Defaults to None.
+            num_samples (int): Number of samples to return.
             kwargs are ignored
 
         Returns:
@@ -286,7 +287,7 @@ def predict(
 
         Args:
             x (torch.Tensor): Points at which to predict from the model.
-            probability_space (bool, optional): Return outputs in units of
+            probability_space (bool): Return outputs in units of
                 response probability instead of latent function value. Defaults to False.
 
         Returns:
@@ -324,10 +325,23 @@ def predict(
             return promote_0d(fmean), promote_0d(fvar)
 
     def predict_probability(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Query the model for posterior mean and variance in probability space.
+
+        Args:
+            x (torch.Tensor): Points at which to predict from the model.
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points.
+        """
         return self.predict(x, probability_space=True)
 
     def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs):
-        """Perform a warm-start update of the model from previous fit."""
+        """Perform a warm-start update of the model from previous fit.
+
+        Args:
+            train_x (torch.Tensor): Inputs.
+            train_y (torch.Tensor): Responses.
+        """
         return self.fit(
             train_x, train_y, warmstart_hyperparams=True, warmstart_induc=True, **kwargs
         )
@@ -349,6 +363,23 @@ def __init__(
         inducing_point_method: str = "auto",
         optimizer_options: Optional[Dict[str, Any]] = None,
     ) -> None:
+        """Initialize the GP Beta Regression model
+
+        Args:
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
+            dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
+                of lb and ub. Defaults to None.
+            mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None.
+            covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a
+                gamma prior.
+            likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to
+                Beta likelihood.
+            inducing_size (int, optional): Number of inducing points. Defaults to 100.
+            max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
+                there is no limit to the fitting time. Defaults to None.
+            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
+        """
         if likelihood is None:
             likelihood = BetaLikelihood()
         super().__init__(