From 572374fc2f3bf9b92c3e7254b622a76775865b0d Mon Sep 17 00:00:00 2001 From: Jason Chow Date: Tue, 19 Nov 2024 17:46:24 -0800 Subject: [PATCH] Support optimizer options to be modified in models, starting from config (#448) Summary: Pull Request resolved: https://github.com/facebookresearch/aepsych/pull/448 Optimizer options can be used to control the SciPy minimize function during model fit. We now allow these options to be set during model initialization (including from config). Every model includes these options. Reviewed By: crasanders Differential Revision: D65641684 fbshipit-source-id: 4328889313d05c129aaf9d7e32b881c8a24b0807 --- aepsych/models/base.py | 6 ++- aepsych/models/gp_classification.py | 22 +++++++++-- aepsych/models/gp_regression.py | 16 ++++++-- aepsych/models/monotonic_projection_gp.py | 8 +++- aepsych/models/monotonic_rejection_gp.py | 15 ++++++-- aepsych/models/pairwise_probit.py | 34 ++++++++++++++--- aepsych/models/semi_p.py | 18 ++++++++- aepsych/utils.py | 28 ++++++++++++++ tests/test_config.py | 46 +++++++++++++++++++++++ 9 files changed, 173 insertions(+), 20 deletions(-) diff --git a/aepsych/models/base.py b/aepsych/models/base.py index 4d554be81..7a00b14b5 100644 --- a/aepsych/models/base.py +++ b/aepsych/models/base.py @@ -364,6 +364,9 @@ def _fit_mll( optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() max_fit_time = kwargs.pop("max_fit_time", self.max_fit_time) if max_fit_time is not None: + if "options" not in optimizer_kwargs: + optimizer_kwargs["options"] = {} + # figure out how long evaluating a single samp starttime = time.time() _ = mll(self(train_x), train_y) @@ -371,7 +374,8 @@ def _fit_mll( time.time() - starttime + 1e-6 ) # add an epsilon to avoid divide by zero n_eval = int(max_fit_time / single_eval_time) - optimizer_kwargs["options"] = {"maxfun": n_eval} + + optimizer_kwargs["options"]["maxfun"] = n_eval logger.info(f"fit maxfun is {n_eval}") starttime = time.time() diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py index 0991d882c..bdd8fdf0d 100644 --- a/aepsych/models/gp_classification.py +++ b/aepsych/models/gp_classification.py @@ -8,7 +8,7 @@ import warnings from copy import deepcopy -from typing import Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch import numpy as np @@ -17,7 +17,7 @@ from aepsych.factory.default import default_mean_covar_factory from aepsych.models.base import AEPsychModelDeviceMixin from aepsych.models.utils import select_inducing_points -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from gpytorch.likelihoods import BernoulliLikelihood, BetaLikelihood, Likelihood from gpytorch.models import ApproximateGP @@ -57,6 +57,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize the GP Classification model @@ -78,12 +79,17 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ lb, ub, self.dim = _process_bounds(lb, ub, dim) - self.max_fit_time = max_fit_time self.inducing_size = inducing_size or 99 + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) + if self.inducing_size >= 100: logger.warning( ( @@ -174,6 +180,8 @@ def from_config(cls, config: Config) -> GPClassificationModel: else: likelihood = None # fall back to __init__ default + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -184,6 +192,7 @@ def from_config(cls, config: Config) -> GPClassificationModel: max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, likelihood=likelihood, + optimizer_options=optimizer_options, ) def _reset_hyperparameters(self) -> None: @@ -251,7 +260,10 @@ def fit( n = train_y.shape[0] mll = gpytorch.mlls.VariationalELBO(self.likelihood, self, n) - self._fit_mll(mll, **kwargs) + if "optimizer_kwargs" in kwargs: + self._fit_mll(mll, **kwargs) + else: + self._fit_mll(mll, optimizer_kwargs=self.optimizer_options, **kwargs) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: """Sample from underlying model. @@ -335,6 +347,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: if likelihood is None: likelihood = BetaLikelihood() @@ -348,4 +361,5 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py index a499c9f94..6f954229f 100644 --- a/aepsych/models/gp_regression.py +++ b/aepsych/models/gp_regression.py @@ -7,7 +7,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union import gpytorch import numpy as np @@ -15,7 +15,7 @@ from aepsych.config import Config from aepsych.factory.default import default_mean_covar_factory from aepsych.models.base import AEPsychModelDeviceMixin -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from gpytorch.likelihoods import GaussianLikelihood, Likelihood from gpytorch.models import ExactGP @@ -40,6 +40,7 @@ def __init__( covar_module: Optional[gpytorch.kernels.Kernel] = None, likelihood: Optional[Likelihood] = None, max_fit_time: Optional[float] = None, + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize the GP regression model @@ -55,6 +56,8 @@ def __init__( Gaussian likelihood. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ if likelihood is None: likelihood = GaussianLikelihood() @@ -64,6 +67,10 @@ def __init__( lb, ub, self.dim = _process_bounds(lb, ub, dim) self.max_fit_time = max_fit_time + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) + if mean_module is None or covar_module is None: default_mean, default_covar = default_mean_covar_factory( dim=self.dim, stimuli_per_trial=self.stimuli_per_trial @@ -105,6 +112,8 @@ def construct_inputs(cls, config: Config) -> Dict: max_fit_time = config.getfloat(classname, "max_fit_time", fallback=None) + optimizer_options = get_optimizer_options(config, classname) + return { "lb": lb, "ub": ub, @@ -113,6 +122,7 @@ def construct_inputs(cls, config: Config) -> Dict: "covar_module": covar, "likelihood": likelihood, "max_fit_time": max_fit_time, + "optimizer_options": optimizer_options, } @classmethod @@ -142,7 +152,7 @@ def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None: """ self.set_train_data(train_x, train_y) mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self) - return self._fit_mll(mll, **kwargs) + return self._fit_mll(mll, self.optimizer_options, **kwargs) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: """Sample from underlying model. diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py index 61bec1648..f4c1b21f7 100644 --- a/aepsych/models/monotonic_projection_gp.py +++ b/aepsych/models/monotonic_projection_gp.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Any, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import gpytorch import numpy as np @@ -15,6 +15,7 @@ from aepsych.config import Config from aepsych.factory.default import default_mean_covar_factory from aepsych.models.gp_classification import GPClassificationModel +from aepsych.utils import get_optimizer_options from botorch.posteriors.gpytorch import GPyTorchPosterior from gpytorch.likelihoods import Likelihood from statsmodels.stats.moment_helpers import corr2cov, cov2corr @@ -104,6 +105,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: assert len(monotonic_dims) > 0 self.monotonic_dims = [int(d) for d in monotonic_dims] @@ -119,6 +121,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def posterior( @@ -222,6 +225,8 @@ def from_config(cls, config: Config) -> MonotonicProjectionGP: ) min_f_val = config.getfloat(classname, "min_f_val", fallback=None) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -235,4 +240,5 @@ def from_config(cls, config: Config) -> MonotonicProjectionGP: monotonic_dims=monotonic_dims, monotonic_grid_size=monotonic_grid_size, min_f_val=min_f_val, + optimizer_options=optimizer_options, ) diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py index 9bf761abe..23ad9bc4a 100644 --- a/aepsych/models/monotonic_rejection_gp.py +++ b/aepsych/models/monotonic_rejection_gp.py @@ -8,7 +8,7 @@ from __future__ import annotations import warnings -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import gpytorch import numpy as np @@ -20,7 +20,7 @@ from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad from aepsych.models.base import AEPsychMixin from aepsych.models.utils import select_inducing_points -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from botorch.fit import fit_gpytorch_mll from gpytorch.kernels import Kernel from gpytorch.likelihoods import BernoulliLikelihood, Likelihood @@ -63,6 +63,7 @@ def __init__( num_samples: int = 250, num_rejection_samples: int = 5000, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """Initialize MonotonicRejectionGP. @@ -82,6 +83,8 @@ def __init__( acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE. objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli. extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim) if likelihood is None: @@ -145,6 +148,9 @@ def __init__( self.num_rejection_samples = num_rejection_samples self.fixed_prior_mean = fixed_prior_mean self.inducing_points = inducing_points + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) def fit(self, train_x: Tensor, train_y: Tensor, **kwargs) -> None: """Fit the model @@ -183,7 +189,7 @@ def _set_model( mll = VariationalELBO( likelihood=self.likelihood, model=self, num_data=train_y.numel() ) - mll = fit_gpytorch_mll(mll) + mll = fit_gpytorch_mll(mll, optimizer_kwargs=self.optimizer_options) def update(self, train_x: Tensor, train_y: Tensor, warmstart: bool = True) -> None: """ @@ -319,6 +325,8 @@ def from_config(cls, config: Config) -> MonotonicRejectionGP: classname, "monotonic_idxs", fallback=[-1] ) + optimizer_options = get_optimizer_options(config, classname) + return cls( monotonic_idxs=monotonic_idxs, lb=lb, @@ -329,6 +337,7 @@ def from_config(cls, config: Config) -> MonotonicRejectionGP: num_rejection_samples=num_rejection_samples, mean_module=mean, covar_module=covar, + optimizer_options=optimizer_options, ) def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal: diff --git a/aepsych/models/pairwise_probit.py b/aepsych/models/pairwise_probit.py index a57e15f87..ee5ce0d3c 100644 --- a/aepsych/models/pairwise_probit.py +++ b/aepsych/models/pairwise_probit.py @@ -5,15 +5,14 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. import time -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import gpytorch -import numpy as np import torch from aepsych.config import Config from aepsych.factory import default_mean_covar_factory from aepsych.models.base import AEPsychMixin -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from botorch.fit import fit_gpytorch_mll from botorch.models import PairwiseGP, PairwiseLaplaceMarginalLogLikelihood @@ -64,6 +63,7 @@ def __init__( dim: Optional[int] = None, covar_module: Optional[gpytorch.kernels.Kernel] = None, max_fit_time: Optional[float] = None, + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: self.lb, self.ub, dim = _process_bounds(lb, ub, dim) @@ -93,6 +93,9 @@ def __init__( ) self.dim = dim # The Pairwise constructor sets self.dim = None. + self.optimizer_options = ( + {"options": optimizer_options} if optimizer_options else {"options": {}} + ) def fit( self, @@ -101,6 +104,12 @@ def fit( optimizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ) -> None: + if optimizer_kwargs is not None: + if not "optimizer_kwargs" in optimizer_kwargs: + optimizer_kwargs = optimizer_kwargs.copy() + optimizer_kwargs.update(self.optimizer_options) + else: + optimizer_kwargs = {"options": self.optimizer_options} self.train() mll = PairwiseLaplaceMarginalLogLikelihood(self.likelihood, self) datapoints, comparisons = self._pairs_to_comparisons(train_x, train_y) @@ -109,17 +118,21 @@ def fit( optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs.copy() max_fit_time = kwargs.pop("max_fit_time", self.max_fit_time) if max_fit_time is not None: + if "options" not in optimizer_kwargs: + optimizer_kwargs["options"] = {} + # figure out how long evaluating a single samp starttime = time.time() _ = mll(self(datapoints), comparisons) single_eval_time = time.time() - starttime n_eval = int(max_fit_time / single_eval_time) - optimizer_kwargs["maxfun"] = n_eval + + optimizer_kwargs["options"]["maxfun"] = n_eval logger.info(f"fit maxfun is {n_eval}") logger.info("Starting fit...") starttime = time.time() - fit_gpytorch_mll(mll, **kwargs, **optimizer_kwargs) + fit_gpytorch_mll(mll, optimizer_kwargs=optimizer_kwargs, **kwargs) logger.info(f"Fit done, time={time.time()-starttime}") def update( @@ -209,4 +222,13 @@ def from_config(cls, config: Config) -> "PairwiseProbitModel": max_fit_time = config.getfloat(classname, "max_fit_time", fallback=None) - return cls(lb=lb, ub=ub, dim=dim, covar_module=covar, max_fit_time=max_fit_time) + optimizer_options = get_optimizer_options(config, classname) + + return cls( + lb=lb, + ub=ub, + dim=dim, + covar_module=covar, + max_fit_time=max_fit_time, + optimizer_options=optimizer_options, + ) diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py index da8c98dfb..2d5f20f00 100644 --- a/aepsych/models/semi_p.py +++ b/aepsych/models/semi_p.py @@ -8,7 +8,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union import gpytorch import numpy as np @@ -18,7 +18,7 @@ from aepsych.config import Config from aepsych.likelihoods import BernoulliObjectiveLikelihood, LinearBernoulliLikelihood from aepsych.models import GPClassificationModel -from aepsych.utils import _process_bounds, promote_0d +from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d from aepsych.utils_logging import getLogger from botorch.acquisition.objective import PosteriorTransform from botorch.optim.fit import fit_gpytorch_mll_scipy @@ -189,6 +189,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """ Initialize SemiParametricGP. @@ -212,6 +213,8 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ lb, ub, dim = _process_bounds(lb, ub, dim) @@ -252,6 +255,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) @classmethod @@ -294,6 +298,8 @@ def from_config(cls, config: Config) -> SemiParametricGPModel: slope_mean = config.getfloat(classname, "slope_mean", fallback=2) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -304,6 +310,7 @@ def from_config(cls, config: Config) -> SemiParametricGPModel: inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def fit( @@ -440,6 +447,7 @@ def __init__( inducing_size: Optional[int] = None, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", + optimizer_options: Optional[Dict[str, Any]] = None, ) -> None: """ Initialize HadamardSemiPModel. @@ -462,6 +470,8 @@ def __init__( If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. + optimizer_options (Dict[str, Any], optional): Optimizer options to pass to the SciPy optimizer during + fitting. Assumes we are using L-BFGS-B. """ super().__init__( lb=lb, @@ -470,6 +480,7 @@ def __init__( inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) self.stim_dim = stim_dim @@ -595,6 +606,8 @@ def from_config(cls, config: Config) -> HadamardSemiPModel: stim_dim = config.getint(classname, "stim_dim", fallback=0) + optimizer_options = get_optimizer_options(config, classname) + return cls( lb=lb, ub=ub, @@ -609,6 +622,7 @@ def from_config(cls, config: Config) -> HadamardSemiPModel: inducing_size=inducing_size, max_fit_time=max_fit_time, inducing_point_method=inducing_point_method, + optimizer_options=optimizer_options, ) def predict( diff --git a/aepsych/utils.py b/aepsych/utils.py index 3ed2640ab..e83a8ec27 100644 --- a/aepsych/utils.py +++ b/aepsych/utils.py @@ -346,3 +346,31 @@ def get_dim(config: Config) -> int: ) # Choice parameters with n_choices < 3 add n_choices - 1 dims return dim + + +def get_optimizer_options(config: Config, name: str) -> Dict[str, Any]: + """Return the optimizer options for the model to pass to the SciPy L-BFGS-B + optimizer. Only the somewhat useful ones for AEPsych are searched for: maxcor, + ftol, gtol, maxfun, maxiter, maxls. See docs for details: + https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html#optimize-minimize-lbfgsb + + Args: + config (Config): Config to search for options. + name (str): Model name to look for options for. + + Return: + Dict[str, Any]: Dictionary of options to pass to SciPy's minimize, assuming the + method is L-BFGS-B. + """ + options: Dict[str, Optional[Union[float, int]]] = {} + + options["maxcor"] = config.getint(name, "maxcor", fallback=None) + options["ftol"] = config.getfloat(name, "ftol", fallback=None) + options["gtol"] = config.getfloat(name, "gtol", fallback=None) + options["maxfun"] = config.getint(name, "maxfun", fallback=None) + options["maxiter"] = config.getint(name, "maxiter", fallback=None) + options["maxls"] = config.getint(name, "maxls", fallback=None) + + # Filter all the nones out, which could just come back as an empty dict + options = {key: value for key, value in options.items() if value is not None} + return options diff --git a/tests/test_config.py b/tests/test_config.py index 87ef7d209..5c1947c14 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1316,6 +1316,52 @@ def test_build_transform(self): self.assertTrue(tf[0] == name) self.assertTrue(isinstance(tf[1], transform)) + def test_optimizer_options_smoketest(self): + config_str = """ + [common] + parnames = [signal1] + outcome_types = [binary] + stimuli_per_trial = 1 + strategy_names = [opt_strat] + + [signal1] + par_type = continuous + lower_bound = 0 + upper_bound = 1 + + [opt_strat] + model = GPClassificationModel + generator = SobolGenerator + min_asks = 1 + + [GPClassificationModel] + maxcor = 1 + maxfun = 0 + maxls = 3 + gtol = 4 + ftol = 5 + maxiter = 6 + max_fit_time = 100 + """ + config = Config() + config.update(config_str=config_str) + + strat = Strategy.from_config(config, "opt_strat") + + strat.add_data(torch.tensor([0.80]), torch.tensor([1])) + strat.fit() + + options = strat.model.optimizer_options["options"] + self.assertTrue(options["maxcor"] == 1) + self.assertTrue(options["ftol"] == 5.0) + self.assertTrue(options["gtol"] == 4.0) + self.assertTrue(options["maxiter"] == 6) + self.assertTrue(options["maxls"] == 3) + + self.assertTrue( + options["maxfun"] != 0, "maxfun should be overridden by max_fit_time" + ) + if __name__ == "__main__": unittest.main()