diff --git a/aepsych/models/base.py b/aepsych/models/base.py index 4d554be81..7a00b14b5 100644 --- a/aepsych/models/base.py +++ b/aepsych/models/base.py @@ -364,6 +364,9 @@ def _fit_mll( optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() max_fit_time = kwargs.pop("max_fit_time", self.max_fit_time) if max_fit_time is not None: + if "options" not in optimizer_kwargs: + optimizer_kwargs["options"] = {} + # figure out how long evaluating a single samp starttime = time.time() _ = mll(self(train_x), train_y) @@ -371,7 +374,8 @@ def _fit_mll( time.time() - starttime + 1e-6 ) # add an epsilon to avoid divide by zero n_eval = int(max_fit_time / single_eval_time) - optimizer_kwargs["options"] = {"maxfun": n_eval} + + optimizer_kwargs["options"]["maxfun"] = n_eval logger.info(f"fit maxfun is {n_eval}") starttime = time.time() diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py index 0991d882c..bdd8fdf0d 100644 --- a/aepsych/models/gp_classification.py +++ b/aepsych/models/gp_classification.py @@ -8,7 +8,7 @@ import warnings from copy import deepcopy -from typing import Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch import numpy as np @@ -17,7 +17,7 @@ from aepsych.factory.default import default_mean_covar_factory from aepsych.models.base import AEPsychModelDeviceMixin from aepsych.models.utils import select_inducing_points -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from gpytorch.likelihoods import BernoulliLikelihood, BetaLikelihood, Likelihood from gpytorch.models import ApproximateGP @@ -57,6 +57,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize the GP Classification model @@ -78,12 +79,17 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ lb, ub, self.dim = _process_bounds(lb, ub, dim) - self.max_fit_time = max_fit_time self.inducing_size = inducing_size or 99 + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) + if self.inducing_size >= 100: logger.warning( ( @@ -174,6 +180,8 @@ def from_config(cls, config: Config) -> GPClassificationModel: else: likelihood = None # fall back to __init__ default + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -184,6 +192,7 @@ def from_config(cls, config: Config) -> GPClassificationModel: max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, likelihood=likelihood, + optimizer_options=optimizer_options, ) def _reset_hyperparameters(self) -> None: @@ -251,7 +260,10 @@ def fit( n = train_y.shape[0] mll = gpytorch.mlls.VariationalELBO(self.likelihood, self, n) - self._fit_mll(mll, **kwargs) + if "optimizer_kwargs" in kwargs: + self._fit_mll(mll, **kwargs) + else: + self._fit_mll(mll, optimizer_kwargs=self.optimizer_options, **kwargs) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: """Sample from underlying model. @@ -335,6 +347,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: if likelihood is None: likelihood = BetaLikelihood() @@ -348,4 +361,5 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py index a499c9f94..6f954229f 100644 --- a/aepsych/models/gp_regression.py +++ b/aepsych/models/gp_regression.py @@ -7,7 +7,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union import gpytorch import numpy as np @@ -15,7 +15,7 @@ from aepsych.config import Config from aepsych.factory.default import default_mean_covar_factory from aepsych.models.base import AEPsychModelDeviceMixin -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from gpytorch.likelihoods import GaussianLikelihood, Likelihood from gpytorch.models import ExactGP @@ -40,6 +40,7 @@ def __init__( covar_module: Optional[gpytorch.kernels.Kernel] = None, likelihood: Optional[Likelihood] = None, max_fit_time: Optional[float] = None, + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize the GP regression model @@ -55,6 +56,8 @@ def __init__( Gaussian likelihood. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ if likelihood is None: likelihood = GaussianLikelihood() @@ -64,6 +67,10 @@ def __init__( lb, ub, self.dim = _process_bounds(lb, ub, dim) self.max_fit_time = max_fit_time + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) + if mean_module is None or covar_module is None: default_mean, default_covar = default_mean_covar_factory( dim=self.dim, stimuli_per_trial=self.stimuli_per_trial @@ -105,6 +112,8 @@ def construct_inputs(cls, config: Config) -> Dict: max_fit_time = config.getfloat(classname, "max_fit_time", fallback=None) + optimizer_options = get_optimizer_options(config, classname) + return { "lb": lb, "ub": ub, @@ -113,6 +122,7 @@ def construct_inputs(cls, config: Config) -> Dict: "covar_module": covar, "likelihood": likelihood, "max_fit_time": max_fit_time, + "optimizer_options": optimizer_options, } @classmethod @@ -142,7 +152,7 @@ def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None: """ self.set_train_data(train_x, train_y) mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self) - return self._fit_mll(mll, **kwargs) + return self._fit_mll(mll, self.optimizer_options, **kwargs) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: """Sample from underlying model. diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py index 61bec1648..f4c1b21f7 100644 --- a/aepsych/models/monotonic_projection_gp.py +++ b/aepsych/models/monotonic_projection_gp.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Any, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import gpytorch import numpy as np @@ -15,6 +15,7 @@ from aepsych.config import Config from aepsych.factory.default import default_mean_covar_factory from aepsych.models.gp_classification import GPClassificationModel +from aepsych.utils import get_optimizer_options from botorch.posteriors.gpytorch import GPyTorchPosterior from gpytorch.likelihoods import Likelihood from statsmodels.stats.moment_helpers import corr2cov, cov2corr @@ -104,6 +105,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: assert len(monotonic_dims) > 0 self.monotonic_dims = [int(d) for d in monotonic_dims] @@ -119,6 +121,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def posterior( @@ -222,6 +225,8 @@ def from_config(cls, config: Config) -> MonotonicProjectionGP: ) min_f_val = config.getfloat(classname, "min_f_val", fallback=None) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -235,4 +240,5 @@ def from_config(cls, config: Config) -> MonotonicProjectionGP: monotonic_dims=monotonic_dims, monotonic_grid_size=monotonic_grid_size, min_f_val=min_f_val, + optimizer_options=optimizer_options, ) diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py index 9bf761abe..23ad9bc4a 100644 --- a/aepsych/models/monotonic_rejection_gp.py +++ b/aepsych/models/monotonic_rejection_gp.py @@ -8,7 +8,7 @@ from __future__ import annotations import warnings -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import gpytorch import numpy as np @@ -20,7 +20,7 @@ from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad from aepsych.models.base import AEPsychMixin from aepsych.models.utils import select_inducing_points -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from botorch.fit import fit_gpytorch_mll from gpytorch.kernels import Kernel from gpytorch.likelihoods import BernoulliLikelihood, Likelihood @@ -63,6 +63,7 @@ def __init__( num_samples: int = 250, num_rejection_samples: int = 5000, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize MonotonicRejectionGP. @@ -82,6 +83,8 @@ def __init__( acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE. objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli. extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim) if likelihood is None: @@ -145,6 +148,9 @@ def __init__( self.num_rejection_samples = num_rejection_samples self.fixed_prior_mean = fixed_prior_mean self.inducing_points = inducing_points + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) def fit(self, train_x: Tensor, train_y: Tensor, **kwargs) -> None: """Fit the model @@ -183,7 +189,7 @@ def _set_model( mll = VariationalELBO( likelihood=self.likelihood, model=self, num_data=train_y.numel() ) - mll = fit_gpytorch_mll(mll) + mll = fit_gpytorch_mll(mll, optimizer_kwargs=self.optimizer_options) def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> None: """ @@ -319,6 +325,8 @@ def from_config(cls, config: Config) -> MonotonicRejectionGP: classname, "monotonic_idxs", fallback=[-1] ) + optimizer_options = get_optimizer_options(config, classname) + return cls( monotonic_idxs=monotonic_idxs, lb=lb, @@ -329,6 +337,7 @@ def from_config(cls, config: Config) -> MonotonicRejectionGP: num_rejection_samples=num_rejection_samples, mean_module=mean, covar_module=covar, + optimizer_options=optimizer_options, ) def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal: diff --git a/aepsych/models/pairwise_probit.py b/aepsych/models/pairwise_probit.py index a57e15f87..ee5ce0d3c 100644 --- a/aepsych/models/pairwise_probit.py +++ b/aepsych/models/pairwise_probit.py @@ -5,15 +5,14 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. import time -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch -import numpy as np import torch from aepsych.config import Config from aepsych.factory import default_mean_covar_factory from aepsych.models.base import AEPsychMixin -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from botorch.fit import fit_gpytorch_mll from botorch.models import PairwiseGP, PairwiseLaplaceMarginalLogLikelihood @@ -64,6 +63,7 @@ def __init__( dim: Optional[int] = None, covar_module: Optional[gpytorch.kernels.Kernel] = None, max_fit_time: Optional[float] = None, + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: self.lb, self.ub, dim = _process_bounds(lb, ub, dim) @@ -93,6 +93,9 @@ def __init__( ) self.dim = dim # The Pairwise constructor sets self.dim = None. + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) def fit( self, @@ -101,6 +104,12 @@ def fit( optimizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ) -> None: + if optimizer_kwargs is not None: + if not "optimizer_kwargs" in optimizer_kwargs: + optimizer_kwargs = optimizer_kwargs.copy() + optimizer_kwargs.update(self.optimizer_options) + else: + optimizer_kwargs = {"options": self.optimizer_options} self.train() mll = PairwiseLaplaceMarginalLogLikelihood(self.likelihood, self) datapoints, comparisons = self._pairs_to_comparisons(train_x, train_y) @@ -109,17 +118,21 @@ def fit( optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() max_fit_time = kwargs.pop("max_fit_time", self.max_fit_time) if max_fit_time is not None: + if "options" not in optimizer_kwargs: + optimizer_kwargs["options"] = {} + # figure out how long evaluating a single samp starttime = time.time() _ = mll(self(datapoints), comparisons) single_eval_time = time.time() - starttime n_eval = int(max_fit_time / single_eval_time) - optimizer_kwargs["maxfun"] = n_eval + + optimizer_kwargs["options"]["maxfun"] = n_eval logger.info(f"fit maxfun is {n_eval}") logger.info("Starting fit...") starttime = time.time() - fit_gpytorch_mll(mll, **kwargs, **optimizer_kwargs) + fit_gpytorch_mll(mll, optimizer_kwargs=optimizer_kwargs, **kwargs) logger.info(f"Fit done, time={time.time()-starttime}") def update( @@ -209,4 +222,13 @@ def from_config(cls, config: Config) -> "PairwiseProbitModel": max_fit_time = config.getfloat(classname, "max_fit_time", fallback=None) - return cls(lb=lb, ub=ub, dim=dim, covar_module=covar, max_fit_time=max_fit_time) + optimizer_options = get_optimizer_options(config, classname) + + return cls( + lb=lb, + ub=ub, + dim=dim, + covar_module=covar, + max_fit_time=max_fit_time, + optimizer_options=optimizer_options, + ) diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py index da8c98dfb..2d5f20f00 100644 --- a/aepsych/models/semi_p.py +++ b/aepsych/models/semi_p.py @@ -8,7 +8,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union import gpytorch import numpy as np @@ -18,7 +18,7 @@ from aepsych.config import Config from aepsych.likelihoods import BernoulliObjectiveLikelihood, LinearBernoulliLikelihood from aepsych.models import GPClassificationModel -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from botorch.acquisition.objective import PosteriorTransform from botorch.optim.fit import fit_gpytorch_mll_scipy @@ -189,6 +189,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """ Initialize SemiParametricGP. @@ -212,6 +213,8 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ lb, ub, dim = _process_bounds(lb, ub, dim) @@ -252,6 +255,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) @classmethod @@ -294,6 +298,8 @@ def from_config(cls, config: Config) -> SemiParametricGPModel: slope_mean = config.getfloat(classname, "slope_mean", fallback=2) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -304,6 +310,7 @@ def from_config(cls, config: Config) -> SemiParametricGPModel: inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def fit( @@ -440,6 +447,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """ Initialize HadamardSemiPModel. @@ -462,6 +470,8 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ super().__init__( lb=lb, @@ -470,6 +480,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) self.stim_dim = stim_dim @@ -595,6 +606,8 @@ def from_config(cls, config: Config) -> HadamardSemiPModel: stim_dim = config.getint(classname, "stim_dim", fallback=0) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -609,6 +622,7 @@ def from_config(cls, config: Config) -> HadamardSemiPModel: inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def predict( diff --git a/aepsych/utils.py b/aepsych/utils.py index 3ed2640ab..e83a8ec27 100644 --- a/aepsych/utils.py +++ b/aepsych/utils.py @@ -346,3 +346,31 @@ def get_dim(config: Config) -> int: ) # Choice parameters with n_choices < 3 add n_choices - 1 dims return dim + + +def get_optimizer_options(config: Config, name: str) -> Dict[str, Any]: + """Return the optimizer options for the model to pass to the SciPy L-BFGS-B + optimizer. Only the somewhat useful ones for AEPsych are searched for: maxcor, + ftol, gtol, maxfun, maxiter, maxls. See docs for details: + https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html#optimize-minimize-lbfgsb + + Args: + config (Config): Config to search for options. + name (str): Model name to look for options for. + + Return: + Dict[str, Any]: Dictionary of options to pass to SciPy's minimize, assuming the + method is L-BFGS-B. + """ + options: Dict[str, Optional[Union[float, int]]] = {} + + options["maxcor"] = config.getint(name, "maxcor", fallback=None) + options["ftol"] = config.getfloat(name, "ftol", fallback=None) + options["gtol"] = config.getfloat(name, "gtol", fallback=None) + options["maxfun"] = config.getint(name, "maxfun", fallback=None) + options["maxiter"] = config.getint(name, "maxiter", fallback=None) + options["maxls"] = config.getint(name, "maxls", fallback=None) + + # Filter all the nones out, which could just come back as an empty dict + options = {key: value for key, value in options.items() if value is not None} + return options diff --git a/tests/test_config.py b/tests/test_config.py index 87ef7d209..5c1947c14 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1316,6 +1316,52 @@ def test_build_transform(self): self.assertTrue(tf[0] == name) self.assertTrue(isinstance(tf[1], transform)) + def test_optimizer_options_smoketest(self): + config_str = """ + [common] + parnames = [signal1] + outcome_types = [binary] + stimuli_per_trial = 1 + strategy_names = [opt_strat] + + [signal1] + par_type = continuous + lower_bound = 0 + upper_bound = 1 + + [opt_strat] + model = GPClassificationModel + generator = SobolGenerator + min_asks = 1 + + [GPClassificationModel] + maxcor = 1 + maxfun = 0 + maxls = 3 + gtol = 4 + ftol = 5 + maxiter = 6 + max_fit_time = 100 + """ + config = Config() + config.update(config_str=config_str) + + strat = Strategy.from_config(config, "opt_strat") + + strat.add_data(torch.tensor([0.80]), torch.tensor([1])) + strat.fit() + + options = strat.model.optimizer_options["options"] + self.assertTrue(options["maxcor"] == 1) + self.assertTrue(options["ftol"] == 5.0) + self.assertTrue(options["gtol"] == 4.0) + self.assertTrue(options["maxiter"] == 6) + self.assertTrue(options["maxls"] == 3) + + self.assertTrue( + options["maxfun"] != 0, "maxfun should be overridden by max_fit_time" + ) + if __name__ == "__main__": unittest.main()