Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Two new y-transformation approaches #611

Open
wants to merge 2 commits into
base: development
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 56 additions & 3 deletions smac/runhistory/runhistory2epm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,22 @@
import typing

import numpy as np
import scipy as sp

from smac.tae.execute_ta_run import StatusType
from smac.runhistory.runhistory import RunHistory, RunKey, RunValue
from smac.configspace import convert_configurations_to_array
from smac.epm.base_imputor import BaseImputor
from smac.utils import constants
from smac.scenario.scenario import Scenario
from smac.utils.constants import VERY_SMALL_NUMBER

__author__ = "Katharina Eggensperger"
__copyright__ = "Copyright 2015, ML4AAD"
__author__ = "Katharina Eggensperger, Marius Lindauer"
__copyright__ = "Copyright 2015-2020, AutoML.org"
__license__ = "3-clause BSD"
__maintainer__ = "Katharina Eggensperger"
__email__ = "[email protected]"
__version__ = "0.0.1"
__version__ = "0.0.3"


class AbstractRunHistory2EPM(object):
Expand Down Expand Up @@ -545,6 +547,57 @@ def transform_response_values(self, values: np.ndarray) -> np.ndarray:
return values


class RunHistory2EPM4BiLogCost(RunHistory2EPM4Cost):
"""TODO"""

def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transform function response values.

Transform the response values by using a bilog transformation

Source: "Scalable Constrained Bayesian Optimization" by Eriksson and Poloczek
https://arxiv.org/pdf/2002.08526.pdf

Parameters
----------
values : np.ndarray
Response values to be transformed.

Returns
-------
np.ndarray
"""
return np.sign(values) * np.log(1 + np.abs(values))


class RunHistory2EPM4GaussianCopula(RunHistory2EPM4Cost):
"""TODO"""

def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transform function response values.

Transform the response values by using a Gaussian Copula:
1. compute quantiles of ECDF on given values
2. compute inverse Gaussian CDF for these quantiles

Source: "Scalable Constrained Bayesian Optimization" by Eriksson and Poloczek
https://arxiv.org/pdf/2002.08526.pdf

Parameters
----------
values : np.ndarray
Response values to be transformed.

Returns
-------
np.ndarray
"""
# ECDF
quants = [sp.stats.percentileofscore(values, v)/100 - VERY_SMALL_NUMBER for v in values]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is incorrect. I reimplemented this according to Salinas et al., which appears to give better, and most importantly, symmetric outputs:

import numpy as np
import scipy.stats

values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
VERY_SMALL_NUMBER = 1e-10

# This PR
quants = [scipy.stats.percentileofscore(values, v)/100 - VERY_SMALL_NUMBER for v in values]
output = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))
print(output)

# Correct
quants = (scipy.stats.rankdata(values.flatten()) - 1) / (len(values) - 1)
cutoff = 1 / (4 * np.power(len(values), 0.25) * np.sqrt(np.pi * np.log(len(values))))
quants = np.clip(quants, a_min=cutoff, a_max=1 - cutoff)
# Inverse Gaussian CDF
rval = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))
print(rval)

output:

[-1.28155157e+00 -8.41621234e-01 -5.24400513e-01 -2.53347103e-01
 -2.50662848e-10  2.53347103e-01  5.24400512e-01  8.41621233e-01
  1.28155156e+00  6.36134089e+00]
[-1.62322583 -1.22064035 -0.76470967 -0.4307273  -0.1397103   0.1397103
  0.4307273   0.76470967  1.22064035  1.62322583]

# Inverse Gaussian CDF
return np.array([sp.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))


class RunHistory2EPM4EIPS(AbstractRunHistory2EPM):
"""TODO"""

Expand Down