Skip to content

Commit

Permalink
Revert as_single
Browse files Browse the repository at this point in the history
  • Loading branch information
geoalgo committed Oct 19, 2023
1 parent 17a71f7 commit 6a1b6b4
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 23 deletions.
14 changes: 10 additions & 4 deletions tabrepo/repository/evaluation_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,26 @@ def eval_metrics(self, tid: int, config_names: List[str], fold: int, check_all_f
return [dict(zip(output_cols, row)) for row in df.loc[mask, output_cols].values]

def val_predictions(self, tid: int, config_name: str, fold: int) -> np.array:
"""
Returns the predictions on the validation set for a given configuration on a given dataset and fold
:return: the model predictions with shape (n_rows, n_classes) or (n_rows) in case of regression
"""
return self._tabular_predictions.predict_val(
dataset=self.tid_to_dataset(tid),
fold=fold,
models=[config_name],
as_single=True,
)
).squeeze()

def test_predictions(self, tid: int, config_name: str, fold: int) -> np.array:
"""
Returns the predictions on a test set for a given configuration on a given dataset and fold
:return: the model predictions with shape (n_rows, n_classes) or (n_rows) in case of regression
"""
return self._tabular_predictions.predict_test(
dataset=self.tid_to_dataset(tid),
fold=fold,
models=[config_name],
as_single=True,
)
).squeeze()

def dataset_metadata(self, tid: int) -> dict:
metadata = self._df_metadata[self._df_metadata.tid == tid]
Expand Down
31 changes: 12 additions & 19 deletions tabrepo/simulation/tabular_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ def to_dict(self) -> TabularPredictionsDict:
"""
raise NotImplementedError()

def predict_val(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
def predict_val(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
"""
Obtains validation predictions on a given dataset and fold for a list of models
:return: predictions with shape (num_models, num_rows, num_classes) for classification and
(num_models, num_rows, ) for regression
"""
raise NotImplementedError()

def predict_test(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
def predict_test(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
"""
Obtains test predictions on a given dataset and fold for a list of models
:return: predictions with shape (num_models, num_rows, num_classes) for classification and
Expand Down Expand Up @@ -137,23 +137,20 @@ def from_dict(cls, pred_dict: TabularPredictionsDict, output_dir: str = None, da
def to_dict(self) -> TabularPredictionsDict:
return self.pred_dict

def predict_val(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
return self._load_pred(dataset=dataset, fold=fold, models=models, split="val", as_single=as_single)
def predict_val(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
return self._load_pred(dataset=dataset, fold=fold, models=models, split="val")

def predict_test(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
return self._load_pred(dataset=dataset, fold=fold, models=models, split="test", as_single=as_single)
def predict_test(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
return self._load_pred(dataset=dataset, fold=fold, models=models, split="test")

def _load_pred(self, dataset: str, split: str, fold: int, models: List[str] = None, as_single=False):
def _load_pred(self, dataset: str, split: str, fold: int, models: List[str] = None):
if models is None:
models = self.models

def get_split(split, models):
split_key = 'pred_proba_dict_test' if split == "test" else 'pred_proba_dict_val'
model_results = self.pred_dict[dataset][fold][split_key]
if as_single and (len(models) == 1):
return np.array(self._get_model_results(model=models[0], model_pred_probas=model_results))
else:
return np.array([self._get_model_results(model=model, model_pred_probas=model_results) for model in models])
return np.array([self._get_model_results(model=model, model_pred_probas=model_results) for model in models])

return get_split(split, models)

Expand Down Expand Up @@ -258,10 +255,10 @@ def to_dict(self) -> TabularPredictionsDict:
dataset: {
fold: {
"pred_proba_dict_val": {
model: self.predict_val(dataset, fold, [model], as_single=True) for model in models
model: self.predict_val(dataset, fold, [model]).squeeze() for model in models
},
"pred_proba_dict_test": {
model: self.predict_test(dataset, fold, [model], as_single=True) for model in models
model: self.predict_test(dataset, fold, [model]).squeeze() for model in models
}
} for fold, models in fold_dict.items()
} for dataset, fold_dict in model_available_dict.items()
Expand All @@ -279,16 +276,12 @@ def _load_metadatas(data_dir):
res[dataset][fold] = metadata
return res

def predict_val(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
def predict_val(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
pred = self._load_pred(dataset=dataset, fold=fold, models=models, split="val")
if as_single:
pred = pred.squeeze()
return pred

def predict_test(self, dataset: str, fold: int, models: List[str] = None, as_single=False) -> np.array:
def predict_test(self, dataset: str, fold: int, models: List[str] = None) -> np.array:
pred = self._load_pred(dataset=dataset, fold=fold, models=models, split="test")
if as_single:
pred = pred.squeeze()
return pred

def _load_pred(self, dataset: str, split: str, fold: int, models: List[str] = None):
Expand Down

0 comments on commit 6a1b6b4

Please sign in to comment.