From 547063e58c5a24017969ee767655140821287b61 Mon Sep 17 00:00:00 2001
From: Yousif Alsaffar <yalsaffar.ieu2020@student.ie.edu>
Date: Thu, 31 Oct 2024 15:12:16 +0300
Subject: [PATCH] adding docstrings to models files

---
 aepsych/models/base.py                    |  71 +++++++++++---
 aepsych/models/gp_classification.py       |  44 +++++++--
 aepsych/models/gp_regression.py           |  30 ++++--
 aepsych/models/monotonic_projection_gp.py |  40 ++++++++
 aepsych/models/monotonic_rejection_gp.py  |  69 +++++++++----
 aepsych/models/multitask_regression.py    |  29 ++++++
 aepsych/models/ordinal_gp.py              |  23 +++++
 aepsych/models/pairwise_probit.py         |  78 ++++++++++++++-
 aepsych/models/semi_p.py                  | 112 ++++++++++++++++++----
 aepsych/models/utils.py                   |  61 ++++++++++++
 10 files changed, 488 insertions(+), 69 deletions(-)

diff --git a/aepsych/models/base.py b/aepsych/models/base.py
index 0baee322b..f2f36ad05 100644
--- a/aepsych/models/base.py
+++ b/aepsych/models/base.py
@@ -125,14 +125,17 @@ def get_max(
         max_time: Optional[float] = None,
     ) -> Tuple[float, torch.Tensor]:
         """Return the maximum of the modeled function, subject to constraints
+
         Args:
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
-                inverse is along a slice of the full surface.
-            probability_space (bool): Is y (and therefore the returned nearest_y) in
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
+                inverse is along a slice of the full surface. Defaults to None.
+            probability_space (bool, optional): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
-            n_samples int: number of coarse grid points to sample for optimization estimate.
+            n_samples (int, optional): number of coarse grid points to sample for optimization estimate.
+            max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None.
+
         Returns:
-            Tuple[float, np.ndarray]: Tuple containing the max and its location (argmax).
+            Tuple[float, torch.Tensor]: Tuple containing the max and its location (argmax).
         """
         locked_dims = locked_dims or {}
         _, _arg = get_extremum(
@@ -154,11 +157,13 @@ def get_min(
     ) -> Tuple[float, torch.Tensor]:
         """Return the minimum of the modeled function, subject to constraints
         Args:
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
                 inverse is along a slice of the full surface.
-            probability_space (bool): Is y (and therefore the returned nearest_y) in
+            probability_space (bool, optional): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
-            n_samples int: number of coarse grid points to sample for optimization estimate.
+            n_samples (int, optional): number of coarse grid points to sample for optimization estimate.
+            max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None.
+
         Returns:
             Tuple[float, torch.Tensor]: Tuple containing the min and its location (argmin).
         """
@@ -185,12 +190,17 @@ def inv_query(
         """Query the model inverse.
         Return nearest x such that f(x) = queried y, and also return the
             value of f at that point.
+
         Args:
             y (float): Points at which to find the inverse.
-            locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
+            locked_dims (Mapping[int, List[float]], optional): Dimensions to fix, so that the
                 inverse is along a slice of the full surface.
-            probability_space (bool): Is y (and therefore the returned nearest_y) in
+            probability_space (bool, optional): Is y (and therefore the returned nearest_y) in
                 probability space instead of latent function space? Defaults to False.
+            n_samples (int, optional): number of coarse grid points to sample for optimization estimate. Defaults to 1000.
+            max_time (Optional[float], optional): Maximum time to spend optimizing. Defaults to None.
+            weights (Optional[torch.Tensor], optional): Weights for the optimization. Defaults to None.
+
         Returns:
             Tuple[float, torch.Tensor]: Tuple containing the value of f
                 nearest to queried y and the x position of this value.
@@ -233,8 +243,8 @@ def get_jnd(
         Both definitions are equivalent for linear psychometric functions.
 
         Args:
-            grid (Optional[np.ndarray], optional): Mesh grid over which to find the JND.
-                Defaults to a square grid of size as determined by aepsych.utils.dim_grid
+            grid (Optional[Union[np.ndarray, torch.Tensor]], optional): Mesh grid over which to find the JND.
+                Defaults to a square grid of size as determined by aepsych.utils.dim_grid. 
             cred_level (float, optional): Credible level for computing an interval.
                 Defaults to None, computing no interval.
             intensity_dim (int, optional): Dimension over which to compute the JND.
@@ -310,13 +320,21 @@ def dim_grid(
         gridsize: int = 30,
         slice_dims: Optional[Mapping[int, float]] = None,
     ) -> torch.Tensor:
+        """Generate a grid based on lower, upper, and dim.
+        
+        Args:
+            gridsize (int, optional): Number of points in each dimension. Defaults to 30.
+            slice_dims (Optional[Mapping[int, float]], optional): Dimensions to fix at a 
+            certain value. Defaults to None."""
         return dim_grid(self.lb, self.ub, gridsize, slice_dims)
 
     def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optional[torch.Tensor] = None, strict: bool = False):
         """
-        :param torch.Tensor inputs: The new training inputs.
-        :param torch.Tensor targets: The new training targets.
-        :param bool strict: (default False, ignored). Here for compatibility with
+        Args:
+            inputs (Optional[torch.Tensor], optional):  The new training inputs.
+            targets (Optional[torch.Tensor], optional): The new training targets.
+            strict (bool, optional):  Default is False. Ignored, just for compatibility.
+
         input transformers. TODO: actually use this arg or change input transforms
         to not require it.
         """
@@ -327,6 +345,13 @@ def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optiona
             self.train_targets = targets
 
     def normalize_inputs(self, x: torch.Tensor) -> torch.Tensor:
+        """Normalize the input based on the defined bounds.
+        
+        Args:
+            x (torch.Tensor): Tensor of points to normalize.
+            
+        Returns:
+            torch.Tensor: Normalized tensor of points."""
         scale = self.ub - self.lb
         return (x - self.lb) / scale
 
@@ -353,6 +378,13 @@ def _fit_mll(
         optimizer=fit_gpytorch_mll_scipy,
         **kwargs,
     ) -> None:
+        """Fits the model by maximizing the marginal log likelihood.
+        
+        Args:
+            mll (MarginalLogLikelihood): Marginal log likelihood object.
+            optimizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments for the optimizer.
+            optimizer: Optimizer to use. Defaults to fit_gpytorch_mll_scipy.
+        """
         self.train()
         train_x, train_y = mll.model.train_inputs[0], mll.model.train_targets
         optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy()
@@ -375,6 +407,15 @@ def _fit_mll(
         return res
 
     def p_below_threshold(self, x: torch.Tensor, f_thresh: torch.Tensor) -> torch.Tensor: 
+        """Compute the probability that the latent function is below a threshold.
+        
+        Args:
+            x (torch.Tensor): Points at which to evaluate the probability.
+            f_thresh (torch.Tensor): Threshold value.
+            
+        Returns:
+            torch.Tensor: Probability that the latent function is below the threshold.
+        """
         f, var = self.predict(x)
         f_thresh = f_thresh.reshape(-1, 1)
         f = f.reshape(1, -1)
diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py
index ed64c76ba..e84c5f8b0 100644
--- a/aepsych/models/gp_classification.py
+++ b/aepsych/models/gp_classification.py
@@ -69,10 +69,10 @@ def __init__(
                 gamma prior.
             likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to
                 Bernouli likelihood.
-            inducing_size (int, optional): Number of inducing points. Defaults to 99.
+            inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99.
             max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                 there is no limit to the fitting time.
-            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
+            inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto".
                 If "sobol", a number of Sobol points equal to inducing_size will be selected.
                 If "pivoted_chol", selects points based on the pivoted Cholesky heuristic.
                 If "kmeans++", selects points by performing kmeans++ clustering on the training data.
@@ -179,6 +179,7 @@ def from_config(cls, config: Config) -> GPClassificationModel:
         )
 
     def _reset_hyperparameters(self) -> None:
+        """Reset hyperparameters to their initial values."""
         # warmstart_hyperparams affects hyperparams but not the variational strat,
         # so we keep the old variational strat (which is only refreshed
         # if warmstart_induc=False).
@@ -190,6 +191,7 @@ def _reset_hyperparameters(self) -> None:
         self.likelihood.load_state_dict(self._fresh_likelihood_dict)
 
     def _reset_variational_strategy(self) -> None:
+        """Reset the variational strategy."""
         inducing_points = select_inducing_points(
             inducing_size=self.inducing_size,
             covar_module=self.covar_module,
@@ -221,9 +223,9 @@ def fit(
         Args:
             train_x (torch.Tensor): Inputs.
             train_y (torch.LongTensor): Responses.
-            warmstart_hyperparams (bool): Whether to reuse the previous hyperparameters (True) or fit from scratch
+            warmstart_hyperparams (bool, optional): Whether to reuse the previous hyperparameters (True) or fit from scratch
                 (False). Defaults to False.
-            warmstart_induc (bool): Whether to reuse the previous inducing points or fit from scratch (False).
+            warmstart_induc (bool, optional): Whether to reuse the previous inducing points or fit from scratch (False).
                 Defaults to False.
         """
         self.set_train_data(train_x, train_y)
@@ -300,10 +302,23 @@ def predict(
     def predict_probability(
         self, x: torch.Tensor
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Query the model for posterior mean and variance in probability space.
+        
+        Args:
+            x (torch.Tensor): Points at which to predict from the model.
+            
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points.
+        """
         return self.predict(x, probability_space=True)
 
     def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs):
-        """Perform a warm-start update of the model from previous fit."""
+        """Perform a warm-start update of the model from previous fit.
+        
+        Args:
+            train_x (torch.Tensor): Inputs.
+            train_y (torch.Tensor): Responses.
+           """
         return self.fit(
             train_x, train_y, warmstart_hyperparams=True, warmstart_induc=True, **kwargs
         )
@@ -324,6 +339,23 @@ def __init__(
         max_fit_time: Optional[float] = None,
         inducing_point_method: str = "auto",
     ) -> None:
+        """Initialize the GP Beta Regression model
+        
+        Args:
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
+            dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size
+                of lb and ub. Defaults to None.
+            mean_module (Optional[gpytorch.means.Mean], optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None.
+            covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a
+                gamma prior.
+            likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to
+                Beta likelihood.
+            inducing_size (Optional[int], optional): Number of inducing points. Defaults to 100.
+            max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
+                there is no limit to the fitting time. Defaults to None.
+            inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto".
+            """
         if likelihood is None:
             likelihood = BetaLikelihood()
         super().__init__(
@@ -336,4 +368,4 @@ def __init__(
             inducing_size=inducing_size,
             max_fit_time=max_fit_time,
             inducing_point_method=inducing_point_method,
-        )
+        )
\ No newline at end of file
diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py
index ece03581a..cb0c274b4 100644
--- a/aepsych/models/gp_regression.py
+++ b/aepsych/models/gp_regression.py
@@ -33,8 +33,8 @@ class GPRegressionModel(AEPsychMixin, ExactGP):
 
     def __init__(
         self,
-        lb: Union[np.ndarray, torch.Tensor],
-        ub: Union[np.ndarray, torch.Tensor],
+        lb: torch.Tensor,
+        ub: torch.Tensor,
         dim: Optional[int] = None,
         mean_module: Optional[gpytorch.means.Mean] = None,
         covar_module: Optional[gpytorch.kernels.Kernel] = None,
@@ -44,8 +44,8 @@ def __init__(
         """Initialize the GP regression model
 
         Args:
-            lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters.
-            ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters.
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
             dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
                 of lb and ub.
             mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior.
@@ -77,6 +77,14 @@ def __init__(
 
     @classmethod
     def construct_inputs(cls, config: Config) -> Dict:
+        """Construct inputs for the GP regression model from configuration.
+        
+        Args:
+            config (Config): A configuration containing keys/values matching this class.
+            
+        Returns:
+            Dict: Dictionary of inputs for the GP regression model.
+        """
         classname = cls.__name__
 
         lb = config.gettensor(classname, "lb")
@@ -118,7 +126,7 @@ def from_config(cls, config: Config) -> GPRegressionModel:
         from a configuration. TODO: document how this works in some tutorial.
 
         Args:
-            config (Config): A configuration containing keys/values matching this class
+            config (Config): A configuration containing keys/values matching this class.
 
         Returns:
             GPRegressionModel: Configured class instance.
@@ -140,7 +148,7 @@ def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None:
         return self._fit_mll(mll, **kwargs)
 
     def sample(
-        self, x: Union[torch.Tensor, np.ndarray], num_samples: int
+        self, x: torch.Tensor, num_samples: int
     ) -> torch.Tensor:
         """Sample from underlying model.
 
@@ -155,11 +163,15 @@ def sample(
         return self.posterior(x).rsample(torch.Size([num_samples])).detach().squeeze()
 
     def update(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs):
-        """Perform a warm-start update of the model from previous fit."""
+        """Perform a warm-start update of the model from previous fit.
+        
+        Args:
+            train_x (torch.Tensor): Inputs.
+            train_y (torch.Tensor): Responses."""
         return self.fit(train_x, train_y, **kwargs)
 
     def predict(
-        self, x: Union[torch.Tensor, np.ndarray], **kwargs
+        self, x: torch.Tensor, **kwargs
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Query the model for posterior mean and variance.
 
@@ -169,7 +181,7 @@ def predict(
                 response probability instead of latent function value. Defaults to False.
 
         Returns:
-            Tuple[np.ndarray, np.ndarray]: Posterior mean and variance at queries points.
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points.
         """
         with torch.no_grad():
             post = self.posterior(x)
diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py
index 925cca184..d8173de82 100644
--- a/aepsych/models/monotonic_projection_gp.py
+++ b/aepsych/models/monotonic_projection_gp.py
@@ -105,6 +105,27 @@ def __init__(
         max_fit_time: Optional[float] = None,
         inducing_point_method: str = "auto",
     ) -> None:
+        """Initialize the MonotonicProjectionGP model.
+        
+        Args:
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
+            monotonic_dims (List[int]): A list of the dimensions on which monotonicity should
+                be enforced.
+            monotonic_grid_size (int, optional): The size of the grid, s, in 1. above. Defaults to 20.
+            min_f_val (Optional[float], optional): If provided, maintains this minimum in the projection in 5. Defaults to None.
+            dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size
+                of lb and ub. Defaults to None.
+            mean_module (Optional[gpytorch.means.Mean], optional): GP mean class. Defaults to a constant with a normal prior. Defaults to None.
+            covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a
+                gamma prior. Defaults to None.
+            likelihood (Optional[Likelihood], optional): The likelihood function to use. If None defaults to
+                Gaussian likelihood. Defaults to None.
+            inducing_size (Optional[int], optional): The number of inducing points to use. Defaults to None.
+            max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
+                there is no limit to the fitting time. Defaults to None.
+            inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto".
+            """
         assert len(monotonic_dims) > 0
         self.monotonic_dims = [int(d) for d in monotonic_dims]
         self.mon_grid_size = monotonic_grid_size
@@ -127,6 +148,16 @@ def posterior(
         observation_noise: Union[bool, torch.Tensor] = False,
         **kwargs: Any,
     ) -> GPyTorchPosterior:
+        """Compute the posterior at X, projecting to enforce monotonicity.
+
+        Args:
+            X (torch.Tensor): The input points at which to compute the posterior.
+            observation_noise (Union[bool, torch.Tensor], optional): Whether or not to include the observation noise in the
+                posterior. Defaults to False.
+
+        Returns:
+            GPyTorchPosterior: The posterior at X.
+        """
         # Augment X with monotonicity grid points, for each monotonic dim
         n, d = X.shape  # Require no batch dimensions
         m = len(self.monotonic_dims)
@@ -169,6 +200,15 @@ def posterior(
     def sample(
         self, x: torch.Tensor, num_samples: int
     ) -> torch.Tensor:
+        """Sample from the model.
+
+        Args:
+            x (torch.Tensor): The input points at which to sample.
+            num_samples (int): The number of samples to draw.
+
+        Returns:
+            torch.Tensor: The samples at x.
+        """
         samps = super().sample(x=x, num_samples=num_samples)
         if self.min_f_val is not None:
             samps = samps.clamp(min=self.min_f_val)
diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py
index b35efcb05..970776be0 100644
--- a/aepsych/models/monotonic_rejection_gp.py
+++ b/aepsych/models/monotonic_rejection_gp.py
@@ -52,8 +52,8 @@ class MonotonicRejectionGP(AEPsychMixin, ApproximateGP):
     def __init__(
         self,
         monotonic_idxs: Sequence[int],
-        lb: Union[np.ndarray, torch.Tensor],
-        ub: Union[np.ndarray, torch.Tensor],
+        lb: torch.Tensor,
+        ub: torch.Tensor,
         dim: Optional[int] = None,
         mean_module: Optional[Mean] = None,
         covar_module: Optional[Kernel] = None,
@@ -67,22 +67,23 @@ def __init__(
         """Initialize MonotonicRejectionGP.
 
         Args:
+            monotonic_idxs (Sequence[int]): List of which columns of x should be given monotonicity
+            constraints.
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
+            dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size.
+            covar_module (Optional[Kernel], optional): Covariance kernel to use. Default is scaled RBF.
+            mean_module (Optional[Mean], optional): Mean module to use. Default is constant mean.
             likelihood (str): Link function and likelihood. Can be 'probit-bernoulli' or
                 'identity-gaussian'.
-            monotonic_idxs (List[int]): List of which columns of x should be given monotonicity
-            constraints.
             fixed_prior_mean (Optional[float], optional): Fixed prior mean. If classification, should be the prior
             classification probability (not the latent function value). Defaults to None.
-            covar_module (Optional[Kernel], optional): Covariance kernel to use (default: scaled RBF).
-            mean_module (Optional[Mean], optional): Mean module to use (default: constant mean).
             num_induc (int, optional): Number of inducing points for variational GP.]. Defaults to 25.
             num_samples (int, optional): Number of samples for estimating posterior on preDict or
             acquisition function evaluation. Defaults to 250.
-            num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096.
-            acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE.
-            objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli.
-            extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None.
-        """
+            num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096. 
+            inducing_point_method (str, optional): Method for selecting inducing points. Defaults to "auto".
+           """
         self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim)
         if likelihood is None:
             likelihood = BernoulliLikelihood()
@@ -172,6 +173,14 @@ def _set_model(
         model_state_dict: Optional[Dict[str, Tensor]] = None,
         likelihood_state_dict: Optional[Dict[str, Tensor]] = None,
     ) -> None:
+        """Sets the model with the given data and state dicts.
+        
+        Args:
+            train_x (Tensor): Training x points
+            train_y (Tensor): Training y points. Should be (n x 1).
+            model_state_dict (Optional[Dict[str, Tensor]], optional): State dict for the model
+            likelihood_state_dict (Optional[Dict[str, Tensor]], optional): State dict for the likelihood
+            """
         train_x_aug = self._augment_with_deriv_index(train_x, 0)
         self.set_train_data(train_x_aug, train_y)
         # Set model parameters
@@ -195,7 +204,7 @@ def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> No
         Args:
             train_x (Tensor): Train X.
             train_y (Tensor): Train Y. Should be (n x 1).
-            warmstart (bool): If True, warm-start model fitting with current parameters.
+            warmstart (bool, optional): If True, warm-start model fitting with current parameters. Defaults to True.
         """
         if warmstart:
             model_state_dict = self.state_dict()
@@ -220,7 +229,8 @@ def sample(
 
         Args:
             x (Tensor): tensor of n points at which to sample
-            num_samples (int, optional): how many points to sample (default: self.num_samples)
+            num_samples (Optional[int], optional): how many points to sample. Default is self.num_samples.
+            num_rejection_samples (Optional[int], optional): how many samples to use for rejection sampling. Default is self.num_rejection_samples.
 
         Returns: a Tensor of shape [n_samp, n]
         """
@@ -263,7 +273,8 @@ def predict(
         """Predict
 
         Args:
-            x: tensor of n points at which to predict.
+            x (torch.Tensor): tensor of n points at which to predict.
+            probability_space (bool, optional): whether to return in probability space. Defaults to False.
 
         Returns: tuple (f, var) where f is (n,) and var is (n,)
         """
@@ -280,17 +291,35 @@ def predict(
         return mean, variance
 
     def predict_probability(
-        self, x: Union[torch.Tensor, np.ndarray]
+        self, x: torch.Tensor
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Predict in probability space
+
+        Args:
+            x (torch.Tensor): Points at which to predict.
+        
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at query points.
+        """
         return self.predict(x, probability_space=True)
 
-    def _augment_with_deriv_index(self, x: Tensor, indx) -> Tensor:
+    def _augment_with_deriv_index(self, x: Tensor, indx: int) -> Tensor:
+        """Augment input with derivative index
+
+        Args:
+            x (Tensor): Input tensor
+            indx (int): Derivative index
+        
+        Returns:
+            Tensor: Augmented tensor
+        """
         return torch.cat(
             (x, indx * torch.ones(x.shape[0], 1)),
             dim=1,
         )
 
     def _get_deriv_constraint_points(self) -> Tensor:
+        """Get derivative constraint points"""
         deriv_cp = torch.tensor([])
         for i in self.monotonic_idxs:
             induc_i = self._augment_with_deriv_index(self.inducing_points, i + 1)
@@ -299,6 +328,14 @@ def _get_deriv_constraint_points(self) -> Tensor:
 
     @classmethod
     def from_config(cls, config: Config) -> MonotonicRejectionGP:
+        """ Alternate constructor for MonotonicRejectionGP
+        
+        Args:
+            config (Config): a configuration containing keys/values matching this class
+            
+        Returns:
+            MonotonicRejectionGP: configured class instance
+        """
         classname = cls.__name__
         num_induc = config.gettensor(classname, "num_induc", fallback=25)
         num_samples = config.gettensor(classname, "num_samples", fallback=250)
diff --git a/aepsych/models/multitask_regression.py b/aepsych/models/multitask_regression.py
index e1b683678..d2e662c55 100644
--- a/aepsych/models/multitask_regression.py
+++ b/aepsych/models/multitask_regression.py
@@ -79,6 +79,14 @@ def __init__(
         )
 
     def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivariateNormal:
+        """ Evaluate GP.
+        
+        Args:
+            x (torch.Tensor): Tensor of points at which GP should be evaluated.
+            
+        Returns:
+            gpytorch.distributions.MultitaskMultivariateNormal: Distribution object
+                holding the mean and covariance at x."""
         transformed_x = self.normalize_inputs(x)
         mean_x = self.mean_module(transformed_x)
         covar_x = self.covar_module(transformed_x)
@@ -86,6 +94,11 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivaria
 
     @classmethod
     def construct_inputs(cls, config: Config):
+        """Construct inputs for the Multitask GPR model from configuration.
+        
+        Args:
+            config (Config): A configuration containing keys/values matching this class.
+            """
         classname = cls.__name__
         args = super().construct_inputs(config)
         args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2)
@@ -151,6 +164,14 @@ def __init__(
         )  # type: ignore # mypy issue 4335
 
     def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivariateNormal:
+        """ Evaluate GP.
+
+        Args:
+            x (torch.Tensor): Tensor of points at which GP should be evaluated.
+
+        Returns:
+            gpytorch.distributions.MultitaskMultivariateNormal: Distribution object
+                holding the mean and covariance at x."""
         base_mvn = super().forward(x)  # do transforms
         return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn(
             base_mvn
@@ -158,6 +179,14 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivaria
 
     @classmethod
     def get_config_args(cls, config: Config) -> Dict[str, Any]:
+        """Get configuration arguments for the model.
+
+        Args:
+            config (Config): A configuration containing keys/values matching this class.
+
+        Returns:
+            Dict[str, Any]: Dictionary of configuration arguments.
+        """
         classname = cls.__name__
         args = super().get_config_args(config)
         args["num_outputs"] = config.getint(classname, "num_outputs", fallback=2)
diff --git a/aepsych/models/ordinal_gp.py b/aepsych/models/ordinal_gp.py
index dc72ead97..1587c1078 100644
--- a/aepsych/models/ordinal_gp.py
+++ b/aepsych/models/ordinal_gp.py
@@ -26,6 +26,12 @@ class OrdinalGPModel(GPClassificationModel):
     outcome_type = "ordinal"
 
     def __init__(self, likelihood=None, *args, **kwargs):
+        """Initialize the OrdinalGPModel
+        
+        Args:
+            likelihood (Optional[Likelihood], optional): The likelihood function to use. If None defaults to
+                Ordinal likelihood.
+        """
         covar_module = kwargs.pop("covar_module", None)
         dim = kwargs.get("dim")
         if covar_module is None:
@@ -52,10 +58,27 @@ def __init__(self, likelihood=None, *args, **kwargs):
         )
 
     def predict_probs(self, xgrid:torch.Tensor) -> torch.Tensor:
+        """Predict probabilities of each ordinal level at xgrid
+
+        Args:
+            xgrid (torch.Tensor): Tensor of input points to predict at
+
+        Returns:
+            torch.Tensor: Tensor of probabilities of each ordinal level at xgrid
+        """
         fmean, fvar = self.predict(xgrid)
         return self.calculate_probs(fmean, fvar)
 
     def calculate_probs(self, fmean: torch.Tensor, fvar: torch.Tensor) -> torch.Tensor:
+        """Calculate probabilities of each ordinal level given a mean and variance
+
+        Args:
+            fmean (torch.Tensor): Mean of the latent function
+            fvar (torch.Tensor): Variance of the latent function
+
+        Returns:
+            torch.Tensor: Tensor of probabilities of each ordinal level
+        """
         fsd = torch.sqrt(1 + fvar)
         probs = torch.zeros(*fmean.size(), self.likelihood.n_levels)
 
diff --git a/aepsych/models/pairwise_probit.py b/aepsych/models/pairwise_probit.py
index f0497d8e7..68cc37baa 100644
--- a/aepsych/models/pairwise_probit.py
+++ b/aepsych/models/pairwise_probit.py
@@ -29,10 +29,19 @@ class PairwiseProbitModel(PairwiseGP, AEPsychMixin):
     outcome_type = "binary"
 
     def _pairs_to_comparisons(self, x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Takes x, y structured as pairs and judgments and
-        returns pairs and comparisons as PairwiseGP requires
-        """
+        """Convert pairs of points and their judgements to comparisons.
+
+        Args:
+            x (torch.Tensor): Tensor of shape (n, 2, d) where n is the number of pairs and d is the dimensionality of the
+                parameter space.
+            y (torch.Tensor): Tensor of shape (n,) where n is the number of pairs. Each element is 0 if the first point
+                in the pair is preferred, and 1 if the second point is preferred.
+            
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: A tuple of tensors. The first tensor is of shape (n, d) and contains the
+                unique points in the pairs. The second tensor is of shape (n, 2) and contains the indices of the unique
+                points in the first tensor that correspond to the points in the pairs.
+        """        
         # This needs to take a unique over the feature dim by flattening
         # over pairs but not instances/batches. This is actually tensor
         # matricization over the feature dimension but awkward in numpy
@@ -63,6 +72,17 @@ def __init__(
         covar_module: Optional[gpytorch.kernels.Kernel] = None,
         max_fit_time: Optional[float] = None,
     ) -> None:
+        """Initialize the PairwiseProbitModel
+
+        Args:
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
+            dim (Optional[int], optional): The number of dimensions in the parameter space. If None, it is inferred from the size
+                of lb and ub. Defaults to None.
+            covar_module (Optional[gpytorch.kernels.Kernel], optional): GP covariance kernel class. Defaults to scaled RBF with a
+                gamma prior. Defaults to None.
+            max_fit_time (Optional[float], optional): The maximum amount of time, in seconds, to spend fitting the model. Defaults to None.
+            """
         self.lb, self.ub, dim = _process_bounds(lb, ub, dim)
 
         self.max_fit_time = max_fit_time
@@ -99,6 +119,13 @@ def fit(
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs,
     ) -> None:
+        """Fit the model to the training data.
+
+        Args:
+            train_x (torch.Tensor): Trainin x points.
+            train_y (torch.Tensor): Training y points.
+            optimizer_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments to pass to the optimizer. Defaults to None.
+        """
         self.train()
         mll = PairwiseLaplaceMarginalLogLikelihood(self.likelihood, self)
         datapoints, comparisons = self._pairs_to_comparisons(train_x, train_y)
@@ -123,12 +150,27 @@ def fit(
     def update(
         self, train_x: torch.Tensor, train_y: torch.Tensor, warmstart: bool = True
     ) -> None:
-        """Perform a warm-start update of the model from previous fit."""
+        """Perform a warm-start update of the model from previous fit.
+        
+        Args:
+            train_x (torch.Tensor): Train X.
+            train_y (torch.Tensor): Train Y.
+            warmstart (bool, optional): If True, warm-start model fitting with current parameters. Defaults to True.
+        """
         self.fit(train_x, train_y)
 
     def predict(
         self, x: torch.Tensor, probability_space: bool =False, num_samples: int =1000, rereference: str ="x_min"
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Query the model for posterior mean and variance.
+
+        Args:
+            x (torch.Tensor): Points at which to predict from the model.
+            probability_space (bool, optional): Return outputs in units of response probability instead of latent function value. Defaults to False.
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points.
+        """
         if rereference is not None:
             samps = self.sample(x, num_samples, rereference)
             fmean, fvar = samps.mean(0).squeeze(), samps.var(0).squeeze()
@@ -147,11 +189,30 @@ def predict(
     def predict_probability(
         self, x: torch.Tensor, probability_space: bool = False, num_samples: int = 1000, rereference: str = "x_min"
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Query the model for posterior mean and variance in probability space.
+        
+        Args:
+            x (torch.Tensor): Points at which to predict from the model.
+            probability_space (bool, optional): Return outputs in units of response probability instead of latent function value. Defaults to False.
+            
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: Posterior mean and variance at queries points.
+        """
         return self.predict(
             x, probability_space=True, num_samples=num_samples, rereference=rereference
         )
 
     def sample(self, x: torch.Tensor, num_samples: int, rereference: str = "x_min") -> torch.Tensor:
+        """Sample from the model.
+
+        Args:
+            x (torch.Tensor): Points at which to sample.
+            num_samples (int): Number of samples to return.
+            rereference (str, optional): How to sample. Options are "x_min", "x_max", "f_min", "f_max". Defaults to "x_min".
+
+        Returns:
+            torch.Tensor: Posterior samples [num_samples x dim]
+        """
         if len(x.shape) < 2:
             x = x.reshape(-1, 1)
         if rereference is None:
@@ -180,7 +241,14 @@ def sample(self, x: torch.Tensor, num_samples: int, rereference: str = "x_min")
 
     @classmethod
     def from_config(cls, config: Config) -> 'PairwiseProbitModel':
+        """Initialize the model from a config object.
 
+        Args:
+            config (Config): a configuration containing keys/values matching this class
+
+        Returns:
+            PairwiseProbitModel: Configured class instance.
+        """
         classname = cls.__name__
 
         mean_covar_factory = config.getobj(
diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py
index fd3821188..52554892a 100644
--- a/aepsych/models/semi_p.py
+++ b/aepsych/models/semi_p.py
@@ -41,6 +41,16 @@ def _hadamard_mvn_approx(x_intensity: torch.Tensor, slope_mean: torch.Tensor, sl
 
     MVN approximation to the hadamard product of GPs (from the SemiP paper, extending the
     zero-mean results in https://mathoverflow.net/questions/293955/normal-approximation-to-the-pointwise-hadamard-schur-product-of-two-multivariat)
+
+    Args:
+        x_intensity (torch.Tensor): The intensity dimension
+        slope_mean (torch.Tensor): The mean of the slope GP
+        slope_cov (torch.Tensor): The covariance of the slope GP
+        offset_mean (torch.Tensor): The mean of the offset GP
+        offset_cov (torch.Tensor): The covariance of the offset GP
+
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: The mean and covariance of the approximated MVN
     """
     offset_mean = offset_mean + x_intensity
 
@@ -60,6 +70,13 @@ def _hadamard_mvn_approx(x_intensity: torch.Tensor, slope_mean: torch.Tensor, sl
 
 
 def semi_p_posterior_transform(posterior: GPyTorchPosterior) -> GPyTorchPosterior:
+    """Transform a posterior from a SemiP model to a Hadamard model.
+    
+    Args:
+        posterior (GPyTorchPosterior): The posterior to transform
+        
+    Returns:
+        GPyTorchPosterior: The transformed posterior"""
     batch_mean = posterior.mvn.mean
     batch_cov = posterior.mvn.covariance_matrix
     offset_mean = batch_mean[..., 0, :]
@@ -85,6 +102,14 @@ def __init__(
         likelihood: LinearBernoulliLikelihood,
         Xi: torch.Tensor,
     ) -> None:
+        """Initialize a SemiPPosterior object.
+        
+        Args:
+            mvn (MultivariateNormal): The MVN object to use
+            likelihood (LinearBernoulliLikelihood): The likelihood object
+            Xi (torch.Tensor): The intensity dimension
+            """
+        
         super().__init__(distribution=mvn)
         self.likelihood = likelihood
         self.Xi = Xi
@@ -98,6 +123,10 @@ def rsample_from_base_samples(
 
         This is intended to be used with a sampler that produces the corresponding base
         samples, and enables acquisition optimization via Sample Average Approximation.
+
+        Args:
+            sample_shape (torch.Size): The desired shape of the samples
+            base_samples (Tensor): The base samples
         """
         return (
             super()
@@ -112,6 +141,15 @@ def rsample(
         sample_shape: Optional[torch.Size] = None,
         base_samples: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
+        """Sample from the posterior distribution using the reparameterization trick
+        
+        Args:
+            sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None.
+            base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None.
+            
+        Returns:
+            torch.Tensor: The sampled values from the posterior distribution.
+        """
         if base_samples is None:
             samps_ = super().rsample(sample_shape=sample_shape)
         else:
@@ -130,6 +168,14 @@ def sample_p(
         sample_shape: Optional[torch.Size] = None,
         base_samples: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
+        """Sample from the likelihood distribution of the modeled function.
+        
+        Args:
+            sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None.
+            base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None.
+            
+        Returns:
+            torch.Tensor: The sampled values from the likelihood distribution."""
         kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples)
         return self.likelihood.p(function_samples=kcsamps, Xi=self.Xi).squeeze(-1)
 
@@ -138,6 +184,15 @@ def sample_f(
         sample_shape: Optional[torch.Size] = None,
         base_samples: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
+        """Sample from the function values of the modeled distribution.
+        
+        Args:
+            sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None.
+            base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None.
+            
+        Returns:
+            torch.Tensor: The sampled function values from the likelihood."""
+        
         kcsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples)
         return self.likelihood.f(function_samples=kcsamps, Xi=self.Xi).squeeze(-1)
 
@@ -147,6 +202,16 @@ def sample_thresholds(
         sample_shape: Optional[torch.Size] = None,
         base_samples: Optional[torch.Tensor] = None,
     ) -> SemiPThresholdObjective:
+        """Sample the thresholds based on the given threshold level.
+        
+        Args:
+        threshold_level (float): The target threshold level for sampling.
+        sample_shape (Optional[torch.Size], optional): The desired shape of the samples. Defaults to None.
+        base_samples (Optional[torch.Tensor], optional): The base samples. Defaults to None.
+
+        Returns:
+            SemiPThresholdObjective: The sampled thresholds based on the threshold level.
+        """
 
         fsamps = self.rsample(sample_shape=sample_shape, base_samples=base_samples)
         return SemiPThresholdObjective(
@@ -174,8 +239,8 @@ class SemiParametricGPModel(GPClassificationModel):
 
     def __init__(
         self,
-        lb: Union[np.ndarray, torch.Tensor],
-        ub: Union[np.ndarray, torch.Tensor],
+        lb: torch.Tensor,
+        ub: torch.Tensor,
         dim: Optional[int] = None,
         stim_dim: int = 0,
         mean_module: Optional[gpytorch.means.Mean] = None,
@@ -190,20 +255,20 @@ def __init__(
         Initialize SemiParametricGP.
         Args:
         Args:
-            lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters.
-            ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters.
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
             dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
-                of lb and ub.
-            stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0.
+                of lb and ub. Defaults to None.
+            stim_dim (int, optional): Index of the intensity (monotonic) dimension. Defaults to 0.
             mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior.
             covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a
                 gamma prior.
             likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to
                 linear-Bernouli likelihood with probit link.
-            inducing_size (int): Number of inducing points. Defaults to 99.
+            inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99.
             max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                 there is no limit to the fitting time.
-            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
+            inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto".
                 If "sobol", a number of Sobol points equal to inducing_size will be selected.
                 If "pivoted_chol", selects points based on the pivoted Cholesky heuristic.
                 If "kmeans++", selects points by performing kmeans++ clustering on the training data.
@@ -315,9 +380,9 @@ def fit(
         Args:
             train_x (torch.Tensor): Inputs.
             train_y (torch.LongTensor): Responses.
-            warmstart_hyperparams (bool): Whether to reuse the previous hyperparameters (True) or fit from scratch
+            warmstart_hyperparams (bool, optional): Whether to reuse the previous hyperparameters (True) or fit from scratch
                 (False). Defaults to False.
-            warmstart_induc (bool): Whether to reuse the previous inducing points or fit from scratch (False).
+            warmstart_induc (bool, optional): Whether to reuse the previous inducing points or fit from scratch (False).
                 Defaults to False.
             kwargs: Keyword arguments passed to `optimizer=fit_gpytorch_mll_scipy`.
         """
@@ -335,13 +400,15 @@ def sample(
         self,
         x: torch.Tensor,
         num_samples: int,
-        probability_space=False,
+        probability_space: bool = False,
     ) -> torch.Tensor:
         """Sample from underlying model.
 
         Args:
-            x ((n x d) torch.Tensor): Points at which to sample.
-            num_samples (int, optional): Number of samples to return. Defaults to None.
+            
+            x (torch.Tensor): `n x d` Points at which to sample.
+            num_samples (int): Number of samples to return. Defaults to None.
+            probability_space (bool, optional): Whether to sample from the probability space (True) or the latent function. Defaults to False.
             kwargs are ignored
 
         Returns:
@@ -377,6 +444,15 @@ def predict(
         return promote_0d(m), promote_0d(v)
 
     def posterior(self, X: torch.Tensor, posterior_transform: Optional[PosteriorTransform] = None) -> SemiPPosterior:
+        """Get the posterior distribution at the given points.
+
+        Args:
+            X (torch.Tensor): Points at which to evaluate the posterior.
+            posterior_transform (Optional[PosteriorTransform], optional): A transform to apply to the posterior. Defaults to None.
+
+        Returns:
+            SemiPPosterior: The posterior distribution at the given points.
+        """
         # Assume x is (b) x n x d
         if X.ndim > 3:
             raise ValueError
@@ -436,8 +512,8 @@ def __init__(
         """
         Initialize HadamardSemiPModel.
         Args:
-            lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters.
-            ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters.
+            lb (torch.Tensor): Lower bounds of the parameters.
+            ub (torch.Tensor): Upper bounds of the parameters.
             dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
                 of lb and ub.
             stim_dim (int): Index of the intensity (monotonic) dimension. Defaults to 0.
@@ -446,10 +522,10 @@ def __init__(
             offset_mean_module (gpytorch.means.Mean, optional): Mean module to use (default: constant mean) for offset.
             offset_covar_module (gpytorch.kernels.Kernel, optional): Covariance kernel to use (default: scaled RBF) for offset.
             likelihood (gpytorch.likelihood.Likelihood, optional)): defaults to bernoulli with logistic input and a floor of .5
-            inducing_size (int): Number of inducing points. Defaults to 99.
+            inducing_size (Optional[int], optional): Number of inducing points. Defaults to 99.
             max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                 there is no limit to the fitting time.
-            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
+            inducing_point_method (string, optional): The method to use to select the inducing points. Defaults to "auto".
                 If "sobol", a number of Sobol points equal to inducing_size will be selected.
                 If "pivoted_chol", selects points based on the pivoted Cholesky heuristic.
                 If "kmeans++", selects points by performing kmeans++ clustering on the training data.
@@ -510,7 +586,7 @@ def __init__(
         self._fresh_likelihood_dict = deepcopy(self.likelihood.state_dict())
 
     def forward(self, x: torch.Tensor) -> MultivariateNormal:
-        """Forward pass for semip GP.
+        """Forward pass for HadamardSemiPModel GP.
 
         generates a k(c + x[:,stim_dim]) = kc + kx[:,stim_dim] mvn object where k and c are
         slope and offset GPs and x[:,stim_dim] are the intensity stimulus (x)
diff --git a/aepsych/models/utils.py b/aepsych/models/utils.py
index 64998ce43..145f42526 100644
--- a/aepsych/models/utils.py
+++ b/aepsych/models/utils.py
@@ -59,6 +59,19 @@ def select_inducing_points(
     bounds: Optional[Union[torch.Tensor, np.ndarray]] = None,
     method: str = "auto",
 ) -> torch.Tensor:
+    """Select inducing points for GP model
+
+    Args:
+        inducing_size (int): Number of inducing points to select.
+        covar_module (Kernel): The kernel module to use for inducing point selection.
+        X (torch.Tensor): The training data.
+        bounds (torch.Tensor): The bounds of the input space.
+        method (str): The method to use for inducing point selection. One of
+            "pivoted_chol", "kmeans++", "auto", or "sobol".
+        
+    Returns:
+        torch.Tensor: The selected inducing points.
+    """
     with torch.no_grad():
         assert method in (
             "pivoted_chol",
@@ -104,6 +117,15 @@ def select_inducing_points(
 
 
 def get_probability_space(likelihood: Likelihood, posterior: GPyTorchPosterior) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Get the mean and variance of the probability space for a given posterior
+
+    Args:
+        likelihood (Likelihood): The likelihood function.
+        posterior (GPyTorchPosterior): The posterior to transform.
+
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: The mean and variance of the probability space.
+    """
     fmean = posterior.mean.squeeze()
     fvar = posterior.variance.squeeze()
     if isinstance(likelihood, BernoulliLikelihood):
@@ -233,14 +255,37 @@ class TargetDistancePosteriorTransform(PosteriorTransform):
     def __init__(
         self, target_value: Union[float, Tensor], weights: Optional[Tensor] = None
     ) -> None:
+        """Initialize the TargetDistancePosteriorTransform
+        
+        Args:
+            target_value (Union[float, Tensor]): The target value to transform the posterior to.
+            weights (Optional[Tensor]): Weights to apply to the target value. Defaults to None.
+        """
         super().__init__()
         self.target_value = target_value
         self.weights = weights
 
     def evaluate(self, Y: Tensor) -> Tensor:
+        """Evaluate the squared distance from the target value.
+        
+        Args:
+            Y (Tensor): The tensor to evaluate.
+            
+        Returns:
+            Tensor: The squared distance from the target value.
+        """
         return (Y - self.target_value) ** 2
 
     def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior:
+        """Transform the posterior mean and variance based on the target value.
+        
+        Args:
+            mean (Tensor): The posterior mean.
+            var (Tensor): The posterior variance.
+            
+        Returns:
+            GPyTorchPosterior: The transformed posterior.
+        """
         q, _ = mean.shape[-2:]
         batch_shape = mean.shape[:-2]
 
@@ -255,6 +300,14 @@ def _forward(self, mean: Tensor, var: Tensor) -> GPyTorchPosterior:
         return GPyTorchPosterior(mvn)
 
     def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior:
+        """Transform the given posterior distribution to reflect the target distance.
+        
+        Args:
+            posterior (GPyTorchPosterior): The posterior to transform.
+            
+        Returns:
+            GPyTorchPosterior: The transformed posterior.
+        """
         mean = posterior.mean
         var = posterior.variance
         return self._forward(mean, var)
@@ -263,6 +316,14 @@ def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior:
 # Requires botorch approximate model to accept posterior transforms
 class TargetProbabilityDistancePosteriorTransform(TargetDistancePosteriorTransform):
     def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior:
+        """Transform the given posterior distribution to reflect the target probability distance.
+        
+        Args:
+            posterior (GPyTorchPosterior): The posterior to transform.
+            
+        Returns:
+            GPyTorchPosterior: The transformed posterior distribution reflecting the target probability distance.
+        """
         pmean, pvar = get_probability_space(BernoulliLikelihood(), posterior)
         pmean = pmean.unsqueeze(-1).unsqueeze(-1)
         pvar = pvar.unsqueeze(-1).unsqueeze(-1)