From 730298f0ff470a1ab0e7d0d788a78c7867a91e70 Mon Sep 17 00:00:00 2001 From: zyliang2001 Date: Sun, 14 Jan 2024 09:15:30 -0800 Subject: [PATCH] update dgp --- .../mdi_local/two_subgroups_linear_sims/dgp.py | 12 ++++++------ .../mdi_local/two_subgroups_linear_sims/models.py | 2 +- .../scripts/competing_methods_local.py | 7 +++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/dgp.py b/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/dgp.py index 6d6bcf2..b1b5152 100644 --- a/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/dgp.py +++ b/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/dgp.py @@ -2,14 +2,14 @@ sys.path.append("../..") from feature_importance.scripts.simulations_util import * - -X_DGP = sample_real_X +### Update start for local MDI+ +X_DGP = sample_normal_X X_PARAMS_DICT = { - "fpath": "/mnt/d/local_MDI+/imodels-experiments/data/X_splicing_cleaned.csv", - "sample_row_n": None, - "sample_col_n": None + "n": 1200, + "d": 50, + "mean": 0, + "scale": 1 } -### Update start for local MDI+ Y_DGP = linear_model_two_groups Y_PARAMS_DICT = { "beta": 1, diff --git a/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/models.py b/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/models.py index fb93f05..aff8710 100644 --- a/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/models.py +++ b/feature_importance/fi_config/mdi_local/two_subgroups_linear_sims/models.py @@ -10,7 +10,7 @@ ] FI_ESTIMATORS = [ - # [FIModelConfig('MDI_all_stumps', MDI_local_all_stumps, model_type='tree')], + [FIModelConfig('MDI_all_stumps', MDI_local_all_stumps, model_type='tree')], [FIModelConfig('MDI_sub_stumps', MDI_local_sub_stumps, model_type='tree')], [FIModelConfig('TreeSHAP', tree_shap_local, model_type='tree')], [FIModelConfig('Permutation', permutation_local, model_type='tree')], diff --git a/feature_importance/scripts/competing_methods_local.py b/feature_importance/scripts/competing_methods_local.py index b8e35e9..46bbd44 100644 --- a/feature_importance/scripts/competing_methods_local.py +++ b/feature_importance/scripts/competing_methods_local.py @@ -106,7 +106,7 @@ def MDI_local_sub_stumps(X, y, fit, scoring_fns="auto", return_stability_scores= rf_plus_model.fit(X, y) try: - mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X=X, y=y, local_scoring_fns=mean_squared_error) + mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X=X, y=y, local_scoring_fns=mean_squared_error, version = "zach") if return_stability_scores: stability_scores = rf_plus_model.get_mdi_plus_stability_scores(B=25) except ValueError as e: @@ -123,6 +123,7 @@ def MDI_local_sub_stumps(X, y, fit, scoring_fns="auto", return_stability_scores= # if return_stability_scores: # mdi_plus_scores = pd.concat([mdi_plus_scores, stability_scores], axis=1) result = mdi_plus_scores["local"] + print(result) # Convert the array to a DataFrame result_table = pd.DataFrame(result, columns=[f'Feature_{i}' for i in range(num_features)]) @@ -146,6 +147,7 @@ def MDI_local_all_stumps(X, y, fit, scoring_fns="auto", return_stability_scores= Var: variable name Importance: MDI+ score """ + num_samples, num_features = X.shape if isinstance(fit, RegressorMixin): RFPlus = RandomForestPlusRegressor @@ -157,7 +159,7 @@ def MDI_local_all_stumps(X, y, fit, scoring_fns="auto", return_stability_scores= rf_plus_model = RFPlus(rf_model=fit, **kwargs) rf_plus_model.fit(X, y) try: - mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X=X, y=y, local_scoring_fns=mean_squared_error) + mdi_plus_scores = rf_plus_model.get_mdi_plus_scores(X=X, y=y, local_scoring_fns=mean_squared_error, version = "tiffany") if return_stability_scores: stability_scores = rf_plus_model.get_mdi_plus_stability_scores(B=25) except ValueError as e: @@ -174,6 +176,7 @@ def MDI_local_all_stumps(X, y, fit, scoring_fns="auto", return_stability_scores= # if return_stability_scores: # mdi_plus_scores = pd.concat([mdi_plus_scores, stability_scores], axis=1) result = mdi_plus_scores["local"] + print(result) # Convert the array to a DataFrame result_table = pd.DataFrame(result, columns=[f'Feature_{i}' for i in range(num_features)])