Feature Scaling in DoE (#358)

* add functionality * add tests * update random strategy * add scaling to doe, DoEStrategy and SpaceFillingStrategy * remove TransformEnum * use Bounds for defining the trafo range * Update bofire/data_models/strategies/doe.py Co-authored-by: Johannes P. Dürholt <[email protected]> * Update bofire/data_models/strategies/space_filling.py Co-authored-by: Johannes P. Dürholt <[email protected]> * Update bofire/data_models/strategies/space_filling.py Co-authored-by: Johannes P. Dürholt <[email protected]> * pre-commit stuff --------- Co-authored-by: Osburg <[email protected]> Co-authored-by: Aaron Osburg <[email protected]>
experimental-design · Aug 21, 2024 · b730c29 · b730c29
1 parent ba1098b
commit b730c29
Show file tree

Hide file tree

Showing 12 changed files with 257 additions and 55 deletions.
diff --git a/bofire/data_models/strategies/doe.py b/bofire/data_models/strategies/doe.py
@@ -1,4 +1,4 @@
-from typing import Literal, Type, Union
+from typing import Literal, Optional, Type, Union
 
 from bofire.data_models.constraints.api import Constraint
 from bofire.data_models.features.api import (
@@ -7,6 +7,8 @@
 )
 from bofire.data_models.objectives.api import Objective
 from bofire.data_models.strategies.strategy import Strategy
+from bofire.data_models.types import Bounds
+from bofire.strategies.enum import OptimalityCriterionEnum
 
 
 class DoEStrategy(Strategy):
@@ -31,6 +33,10 @@ class DoEStrategy(Strategy):
 
     verbose: bool = False
 
+    objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY
+
+    transform_range: Optional[Bounds] = None
+
     @classmethod
     def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
         return True

diff --git a/bofire/data_models/strategies/space_filling.py b/bofire/data_models/strategies/space_filling.py
@@ -1,4 +1,4 @@
-from typing import Annotated, Literal, Type
+from typing import Annotated, Literal, Optional, Type
 
 from pydantic import Field
 
@@ -17,6 +17,7 @@
     Feature,
 )
 from bofire.data_models.strategies.strategy import Strategy
+from bofire.data_models.types import Bounds
 
 
 class SpaceFillingStrategy(Strategy):
@@ -33,6 +34,8 @@ class SpaceFillingStrategy(Strategy):
     sampling_fraction: Annotated[float, Field(gt=0, lt=1)] = 0.3
     ipopt_options: dict = {"maxiter": 200, "disp": 0}
 
+    transform_range: Optional[Bounds] = None
+
     @classmethod
     def is_constraint_implemented(cls, my_type: Type[Constraint]) -> bool:
         return my_type in [

diff --git a/bofire/strategies/doe/design.py b/bofire/strategies/doe/design.py
@@ -18,6 +18,7 @@
 from bofire.data_models.enum import SamplingMethodEnum
 from bofire.data_models.features.api import ContinuousInput, Input
 from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel
+from bofire.data_models.types import Bounds
 from bofire.strategies.doe.objective import get_objective_class
 from bofire.strategies.doe.utils import (
     constraints_as_scipy_constraints,
@@ -42,6 +43,7 @@ def find_local_max_ipopt_BaB(
     categorical_groups: Optional[List[List[ContinuousInput]]] = None,
     discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None,
     verbose: bool = False,
+    transform_range: Optional[Bounds] = None,
 ) -> pd.DataFrame:
     """Function computing a d-optimal design" for a given domain and model.
     It allows for the problem to have categorical values which is solved by Branch-and-Bound
@@ -66,6 +68,8 @@ def find_local_max_ipopt_BaB(
             discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs
                 with key:(relaxed variable, valid values). Defaults to None
             verbose (bool): if true, print information during the optimization process
+            transform_range (Optional[Bounds]): range to which the input variables are transformed.
+                If None is provided, the features will not be scaled. Defaults to None.
         Returns:
             A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
             local optimum.
@@ -75,17 +79,20 @@ def find_local_max_ipopt_BaB(
     if categorical_groups is None:
         categorical_groups = []
 
-    n_experiments = get_n_experiments(
-        domain=domain, model_type=model_type, n_experiments=n_experiments
-    )
-
-    # get objective function
     model_formula = get_formula_from_string(
         model_type=model_type, rhs_only=True, domain=domain
     )
+
+    n_experiments = get_n_experiments(model_formula, n_experiments)
+
+    # get objective function
     objective_class = get_objective_class(objective)
     objective_class = objective_class(
-        domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
+        domain=domain,
+        model=model_formula,
+        n_experiments=n_experiments,
+        delta=delta,
+        transform_range=transform_range,
     )
 
     # setting up initial node in the branch-and-bound tree
@@ -131,7 +138,7 @@ def find_local_max_ipopt_BaB(
 
     initial_design = find_local_max_ipopt(
         domain,
-        model_type,
+        model_formula,
         n_experiments,
         delta,
         ipopt_options,
@@ -160,7 +167,7 @@ def find_local_max_ipopt_BaB(
     result_node = bnb(
         initial_queue,
         domain=domain,
-        model_type=model_type,
+        model_type=model_formula,
         n_experiments=n_experiments,
         delta=delta,
         ipopt_options=ipopt_options,
@@ -186,6 +193,7 @@ def find_local_max_ipopt_exhaustive(
     categorical_groups: Optional[List[List[ContinuousInput]]] = None,
     discrete_variables: Optional[Dict[str, Tuple[ContinuousInput, List[float]]]] = None,
     verbose: bool = False,
+    transform_range: Optional[Bounds] = None,
 ) -> pd.DataFrame:
     """Function computing a d-optimal design" for a given domain and model.
     It allows for the problem to have categorical values which is solved by exhaustive search
@@ -210,6 +218,7 @@ def find_local_max_ipopt_exhaustive(
             discrete_variables (Optional[Dict[str, Tuple[ContinuousInput, List[float]]]]): dict of relaxed discrete inputs
                 with key:(relaxed variable, valid values). Defaults to None
             verbose (bool): if true, print information during the optimization process
+            transform_range (Optional[Bounds]): range to which the input variables are transformed.
         Returns:
             A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
             local optimum.
@@ -229,7 +238,11 @@ def find_local_max_ipopt_exhaustive(
     )
     objective_class = get_objective_class(objective)
     objective_class = objective_class(
-        domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
+        domain=domain,
+        model=model_formula,
+        n_experiments=n_experiments,
+        delta=delta,
+        transform_range=transform_range,
     )
 
     # get binary variables
@@ -241,9 +254,7 @@ def find_local_max_ipopt_exhaustive(
     for group in categorical_groups:
         allowed_fixations.append(np.eye(len(group)))
 
-    n_experiments = get_n_experiments(
-        domain=domain, model_type=model_type, n_experiments=n_experiments
-    )
+    n_experiments = get_n_experiments(model_formula, n_experiments)
     n_non_fixed_experiments = n_experiments
     if fixed_experiments is not None:
         n_non_fixed_experiments -= len(fixed_experiments)
@@ -322,7 +333,7 @@ def find_local_max_ipopt_exhaustive(
         try:
             current_design = find_local_max_ipopt(
                 domain,
-                model_type,
+                model_formula,
                 n_experiments,
                 delta,
                 ipopt_options,
@@ -363,6 +374,7 @@ def find_local_max_ipopt(
     fixed_experiments: Optional[pd.DataFrame] = None,
     partially_fixed_experiments: Optional[pd.DataFrame] = None,
     objective: OptimalityCriterionEnum = OptimalityCriterionEnum.D_OPTIMALITY,
+    transform_range: Optional[Bounds] = None,
 ) -> pd.DataFrame:
     """Function computing an optimal design for a given domain and model.
     Args:
@@ -381,6 +393,7 @@ def find_local_max_ipopt(
             Variables can be fixed to one value or can be set to a range by setting a tuple with lower and upper bound
             Non-fixed variables have to be set to None or nan.
         objective (OptimalityCriterionEnum): OptimalityCriterionEnum object indicating which objective function to use.
+        transform_range (Optional[Bounds]): range to which the input variables are transformed.
     Returns:
         A pd.DataFrame object containing the best found input for the experiments. In general, this is only a
         local optimum.
@@ -400,11 +413,13 @@ def find_local_max_ipopt(
         )
         raise e
 
-    # determine number of experiments (only relevant if n_experiments is not provided by the user)
-    n_experiments = get_n_experiments(
-        domain=domain, model_type=model_type, n_experiments=n_experiments
+    model_formula = get_formula_from_string(
+        model_type=model_type, rhs_only=True, domain=domain
     )
 
+    # determine number of experiments (only relevant if n_experiments is not provided by the user)
+    n_experiments = get_n_experiments(model_formula, n_experiments)
+
     if partially_fixed_experiments is not None:
         # check if partially fixed experiments are valid
         check_partially_fixed_experiments(
@@ -467,13 +482,13 @@ def find_local_max_ipopt(
             )
 
     # get objective function and its jacobian
-    model_formula = get_formula_from_string(
-        model_type=model_type, rhs_only=True, domain=domain
-    )
-
     objective_class = get_objective_class(objective)
-    d_optimality = objective_class(
-        domain=domain, model=model_formula, n_experiments=n_experiments, delta=delta
+    objective_function = objective_class(
+        domain=domain,
+        model=model_formula,
+        n_experiments=n_experiments,
+        delta=delta,
+        transform_range=transform_range,
     )
 
     # write constraints as scipy constraints
@@ -511,13 +526,13 @@ def find_local_max_ipopt(
     #
 
     result = minimize_ipopt(
-        d_optimality.evaluate,
+        objective_function.evaluate,
         x0=x0,
         bounds=bounds,
         # "SLSQP" has no deeper meaning here and just ensures correct constraint standardization
         constraints=standardize_constraints(constraints, x0, "SLSQP"),
         options=_ipopt_options,
-        jac=d_optimality.evaluate_jacobian,
+        jac=objective_function.evaluate_jacobian,
     )
 
     design = pd.DataFrame(
@@ -678,9 +693,7 @@ def check_partially_and_fully_fixed_experiments(
         )
 
 
-def get_n_experiments(
-    domain: Domain, model_type: Union[str, Formula], n_experiments: Optional[int] = None
-):
+def get_n_experiments(model_type: Formula, n_experiments: Optional[int] = None):
     """Determines a number of experiments which is appropriate for the model if no
     number is provided. Otherwise warns if the provided number of experiments is smaller than recommended.
 
@@ -693,12 +706,7 @@ def get_n_experiments(
         n_experiments if an integer value for n_experiments is given. Number of model terms + 3 otherwise.
 
     """
-    n_experiments_min = (
-        len(
-            get_formula_from_string(model_type=model_type, rhs_only=True, domain=domain)
-        )
-        + 3
-    )
+    n_experiments_min = len(model_type) + 3
 
     if n_experiments is None:
         n_experiments = n_experiments_min