Skip to content

Commit

Permalink
Feature Scaling in DoE (#358)
Browse files Browse the repository at this point in the history
* add functionality

* add tests

* update random strategy

* add scaling to doe, DoEStrategy and SpaceFillingStrategy

* remove TransformEnum

* use Bounds for defining the trafo range

* Update bofire/data_models/strategies/doe.py

Co-authored-by: Johannes P. Dürholt <[email protected]>

* Update bofire/data_models/strategies/space_filling.py

Co-authored-by: Johannes P. Dürholt <[email protected]>

* Update bofire/data_models/strategies/space_filling.py

Co-authored-by: Johannes P. Dürholt <[email protected]>

* pre-commit stuff

---------

Co-authored-by: Osburg <[email protected]>
Co-authored-by: Aaron Osburg <[email protected]>
  • Loading branch information
3 people authored Aug 21, 2024
1 parent ba1098b commit b730c29
Show file tree
Hide file tree
Showing 12 changed files with 257 additions and 55 deletions.
8 changes: 7 additions & 1 deletion bofire/data_models/strategies/doe.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Literal, Type, Union
from typing import Literal, Optional, Type, Union

from bofire.data_models.constraints.api import Constraint
from bofire.data_models.features.api import (
Expand All @@ -7,6 +7,8 @@
)
from bofire.data_models.objectives.api import Objective
from bofire.data_models.strategies.strategy import Strategy
from bofire.data_models.types import Bounds
from bofire.strategies.enum import OptimalityCriterionEnum


class DoEStrategy(Strategy):
Expand All @@ -31,6 +33,10 @@ class DoEStrategy(Strategy):

verbose: bool = False

objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY

transform_range: Optional[Bounds] = None

@classmethod
def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
return True
Expand Down
5 changes: 4 additions & 1 deletion bofire/data_models/strategies/space_filling.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Annotated, Literal, Type
from typing import Annotated, Literal, Optional, Type

from pydantic import Field

Expand All @@ -17,6 +17,7 @@
Feature,
)
from bofire.data_models.strategies.strategy import Strategy
from bofire.data_models.types import Bounds


class SpaceFillingStrategy(Strategy):
Expand All @@ -33,6 +34,8 @@ class SpaceFillingStrategy(Strategy):
sampling_fraction: Annotated[float, Field(gt=0, lt=1)] = 0.3
ipopt_options: dict = {"maxiter": 200, "disp": 0}

transform_range: Optional[Bounds] = None

@classmethod
def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
return my_type in [
Expand Down
74 changes: 41 additions & 33 deletions bofire/strategies/doe/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from bofire.data_models.enum import SamplingMethodEnum
from bofire.data_models.features.api import ContinuousInput, Input
from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel
from bofire.data_models.types import Bounds
from bofire.strategies.doe.objective import get_objective_class
from bofire.strategies.doe.utils import (
constraints_as_scipy_constraints,
Expand All @@ -42,6 +43,7 @@ def find_local_max_ipopt_BaB(
categorical_groups: Optional[List[List[ContinuousInput]]] = None,
discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None,
verbose: bool = False,
transform_range: Optional[Bounds] = None,
) -> pd.DataFrame:
"""Function computing a d-optimal design" for a given domain and model.
It allows for the problem to have categorical values which is solved by Branch-and-Bound
Expand All @@ -66,6 +68,8 @@ def find_local_max_ipopt_BaB(
discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs
with key:(relaxed variable, valid values). Defaults to None
verbose (bool): if true, print information during the optimization process
transform_range (Optional[Bounds]): range to which the input variables are transformed.
If None is provided, the features will not be scaled. Defaults to None.
Returns:
A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
local optimum.
Expand All @@ -75,17 +79,20 @@ def find_local_max_ipopt_BaB(
if categorical_groups is None:
categorical_groups = []

n_experiments = get_n_experiments(
domain=domain, model_type=model_type, n_experiments=n_experiments
)

# get objective function
model_formula = get_formula_from_string(
model_type=model_type, rhs_only=True, domain=domain
)

n_experiments = get_n_experiments(model_formula, n_experiments)

# get objective function
objective_class = get_objective_class(objective)
objective_class = objective_class(
domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
domain=domain,
model=model_formula,
n_experiments=n_experiments,
delta=delta,
transform_range=transform_range,
)

# setting up initial node in the branch-and-bound tree
Expand Down Expand Up @@ -131,7 +138,7 @@ def find_local_max_ipopt_BaB(

initial_design = find_local_max_ipopt(
domain,
model_type,
model_formula,
n_experiments,
delta,
ipopt_options,
Expand Down Expand Up @@ -160,7 +167,7 @@ def find_local_max_ipopt_BaB(
result_node = bnb(
initial_queue,
domain=domain,
model_type=model_type,
model_type=model_formula,
n_experiments=n_experiments,
delta=delta,
ipopt_options=ipopt_options,
Expand All @@ -186,6 +193,7 @@ def find_local_max_ipopt_exhaustive(
categorical_groups: Optional[List[List[ContinuousInput]]] = None,
discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None,
verbose: bool = False,
transform_range: Optional[Bounds] = None,
) -> pd.DataFrame:
"""Function computing a d-optimal design" for a given domain and model.
It allows for the problem to have categorical values which is solved by exhaustive search
Expand All @@ -210,6 +218,7 @@ def find_local_max_ipopt_exhaustive(
discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs
with key:(relaxed variable, valid values). Defaults to None
verbose (bool): if true, print information during the optimization process
transform_range (Optional[Bounds]): range to which the input variables are transformed.
Returns:
A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
local optimum.
Expand All @@ -229,7 +238,11 @@ def find_local_max_ipopt_exhaustive(
)
objective_class = get_objective_class(objective)
objective_class = objective_class(
domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
domain=domain,
model=model_formula,
n_experiments=n_experiments,
delta=delta,
transform_range=transform_range,
)

# get binary variables
Expand All @@ -241,9 +254,7 @@ def find_local_max_ipopt_exhaustive(
for group in categorical_groups:
allowed_fixations.append(np.eye(len(group)))

n_experiments = get_n_experiments(
domain=domain, model_type=model_type, n_experiments=n_experiments
)
n_experiments = get_n_experiments(model_formula, n_experiments)
n_non_fixed_experiments = n_experiments
if fixed_experiments is not None:
n_non_fixed_experiments -= len(fixed_experiments)
Expand Down Expand Up @@ -322,7 +333,7 @@ def find_local_max_ipopt_exhaustive(
try:
current_design = find_local_max_ipopt(
domain,
model_type,
model_formula,
n_experiments,
delta,
ipopt_options,
Expand Down Expand Up @@ -363,6 +374,7 @@ def find_local_max_ipopt(
fixed_experiments: Optional[pd.DataFrame] = None,
partially_fixed_experiments: Optional[pd.DataFrame] = None,
objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY,
transform_range: Optional[Bounds] = None,
) -> pd.DataFrame:
"""Function computing an optimal design for a given domain and model.
Args:
Expand All @@ -381,6 +393,7 @@ def find_local_max_ipopt(
Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound
Non-fixed variables have to be set to None or nan.
objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use.
transform_range (Optional[Bounds]): range to which the input variables are transformed.
Returns:
A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
local optimum.
Expand All @@ -400,11 +413,13 @@ def find_local_max_ipopt(
)
raise e

# determine number of experiments (only relevant if n_experiments is not provided by the user)
n_experiments = get_n_experiments(
domain=domain, model_type=model_type, n_experiments=n_experiments
model_formula = get_formula_from_string(
model_type=model_type, rhs_only=True, domain=domain
)

# determine number of experiments (only relevant if n_experiments is not provided by the user)
n_experiments = get_n_experiments(model_formula, n_experiments)

if partially_fixed_experiments is not None:
# check if partially fixed experiments are valid
check_partially_fixed_experiments(
Expand Down Expand Up @@ -467,13 +482,13 @@ def find_local_max_ipopt(
)

# get objective function and its jacobian
model_formula = get_formula_from_string(
model_type=model_type, rhs_only=True, domain=domain
)

objective_class = get_objective_class(objective)
d_optimality = objective_class(
domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
objective_function = objective_class(
domain=domain,
model=model_formula,
n_experiments=n_experiments,
delta=delta,
transform_range=transform_range,
)

# write constraints as scipy constraints
Expand Down Expand Up @@ -511,13 +526,13 @@ def find_local_max_ipopt(
#

result = minimize_ipopt(
d_optimality.evaluate,
objective_function.evaluate,
x0=x0,
bounds=bounds,
# "SLSQP" has no deeper meaning here and just ensures correct constraint standardization
constraints=standardize_constraints(constraints, x0, "SLSQP"),
options=_ipopt_options,
jac=d_optimality.evaluate_jacobian,
jac=objective_function.evaluate_jacobian,
)

design = pd.DataFrame(
Expand Down Expand Up @@ -678,9 +693,7 @@ def check_partially_and_fully_fixed_experiments(
)


def get_n_experiments(
domain: Domain, model_type: Union[str, Formula], n_experiments: Optional[int] = None
):
def get_n_experiments(model_type: Formula, n_experiments: Optional[int] = None):
"""Determines a number of experiments which is appropriate for the model if no
number is provided. Otherwise warns if the provided number of experiments is smaller than recommended.
Expand All @@ -693,12 +706,7 @@ def get_n_experiments(
n_experiments if an integer value for n_experiments is given. Number of model terms + 3 otherwise.
"""
n_experiments_min = (
len(
get_formula_from_string(model_type=model_type, rhs_only=True, domain=domain)
)
+ 3
)
n_experiments_min = len(model_type) + 3

if n_experiments is None:
n_experiments = n_experiments_min
Expand Down
Loading

0 comments on commit b730c29

Please sign in to comment.