Skip to content

Commit

Permalink
pushing files from older version of code
Browse files Browse the repository at this point in the history
  • Loading branch information
zachrewolinski committed May 14, 2024
1 parent 0a7508a commit 59a45b3
Show file tree
Hide file tree
Showing 32 changed files with 2,447 additions and 12,204 deletions.
2 changes: 1 addition & 1 deletion feature_importance/01_ablation_regression_script.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
#SBATCH --mail-user=zhongyuan_liang@berkeley.edu
#SBATCH --mail-user=zachrewolinski@berkeley.edu
#SBATCH --mail-type=ALL
#SBATCH --partition=yugroup

Expand Down
4 changes: 3 additions & 1 deletion feature_importance/01_run_ablation_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from imodels.importance import RandomForestPlusRegressor, RandomForestPlusClassifier
sys.path.append(".")
sys.path.append("..")
sys.path.append("../..")
sys.path.append("/accounts/grad/zachrewolinski/research/imodels")
print("sys.path", sys.path)
from imodels.importance import RandomForestPlusRegressor, RandomForestPlusClassifier
import fi_config
from util import ModelConfig, FIModelConfig, tp, fp, neg, pos, specificity_score, auroc_score, auprc_score, compute_nsg_feat_corr_w_sig_subspace, apply_splitting_strategy

Expand Down
3 changes: 1 addition & 2 deletions feature_importance/feature_ranking.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#!/bin/bash
#SBATCH [email protected]
#SBATCH --mail-type=ALL
#SBATCH --partition=yugroup

source activate mdi
command="run_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y --split_seed 1 --ignore_cache --create_rmd --result_name feature_ranking"
command="ranking_importance_local_sims.py --nreps 1 --config mdi_local.real_x_sim_y.diabetes-classification.lss-model --split_seed ${1} --ignore_cache --create_rmd --result_name diabetes-class-lss"

# Execute the command
python $command
8 changes: 8 additions & 0 deletions feature_importance/feature_ranking_master.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

slurm_script="feature_ranking.sh"

for rep in {1..10}
do
sbatch $slurm_script $rep # Submit SLURM job using the specified script
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/classification_data/Diabetes/X_diabetes.csv",
"sample_row_n": 442
}
Y_DGP = hierarchical_poly
Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"m": 3,
"r": 2
}

VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"400": 400, "600": 600}}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusClassifier, model_type='t_plus',
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

Expand All @@ -21,5 +21,5 @@
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
# [FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/classification_data/Diabetes/X_diabetes.csv",
"sample_row_n": 768
}

Y_DGP = logistic_partial_linear_lss_model
Y_PARAMS_DICT = {
"s":1,
"m":3,
"r":2,
"tau":0,
"beta": 1,
"heritability": 0.4
}
VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"400": 400, "600": 600}}

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestClassifier
from imodels.importance.rf_plus import RandomForestPlusClassifier
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestClassifier, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusClassifier, model_type='t_plus',
other_params={'rf_model': RandomForestClassifier(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/classification_data/Diabetes/X_diabetes.csv",
"sample_row_n": 768
}

Y_DGP = logistic_model
Y_PARAMS_DICT = {
"s": 4,
"beta": 1,
"heritability": 0.4
}

VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"400": 400, "600": 600}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestClassifier
from imodels.importance.rf_plus import RandomForestPlusClassifier
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestClassifier, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusClassifier, model_type='t_plus',
other_params={'rf_model': RandomForestClassifier(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *

X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/classification_data/Diabetes/X_diabetes.csv",
"sample_row_n": None
}

Y_DGP = lss_model

Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"tau": 0,
"m": 3,
"r": 2
}

VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"400": 400, "600": 600}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
}
Y_DGP = hierarchical_poly
Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"m": 3,
"r": 2
}

VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"300": 300, "400": 400}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys
sys.path.append("../..")
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_X
X_PARAMS_DICT = {
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
}
Y_DGP = hierarchical_poly
Y_PARAMS_DICT = {
"beta": 1,
"sigma": None,
"heritability": 0.4,
"m": 3,
"r": 2
}

VARY_PARAM_NAME = ["heritability", "sample_row_n"]
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"300": 300, "400": 400}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import copy
import numpy as np
from feature_importance.util import ModelConfig, FIModelConfig
from sklearn.ensemble import RandomForestRegressor
from imodels.importance.rf_plus import RandomForestPlusRegressor
from feature_importance.scripts.competing_methods_local import *



ESTIMATORS = [
[ModelConfig('RF', RandomForestRegressor, model_type='tree',
other_params={'n_estimators': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'random_state': 42})],
[ModelConfig('RF_plus', RandomForestPlusRegressor, model_type='t_plus',
other_params={'rf_model': RandomForestRegressor(n_estimators=100, min_samples_leaf=1, max_features='sqrt', random_state=42)})]
]

FI_ESTIMATORS = [
[FIModelConfig('LFI_with_raw_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('MDI_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"include_raw": False, "cv_ridge": 0, "calc_loo_coef":False, "sample_split":"inbag"})],
[FIModelConfig('LFI_with_raw_OOB_RF', LFI_evaluation_RF, model_type='tree', splitting_strategy = "train-test", other_params={"sample_split":"oob", "fit_on":"test", "calc_loo_coef":False})],
[FIModelConfig('TreeSHAP_RF', tree_shap_evaluation_RF, model_type='tree', splitting_strategy = "train-test")],
[FIModelConfig('LFI_with_raw_RF_plus', LFI_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('Kernel_SHAP_RF_plus', kernel_shap_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
[FIModelConfig('LIME_RF_plus', lime_evaluation_RF_plus, model_type='t_plus', splitting_strategy = "train-test")],
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from feature_importance.scripts.simulations_util import *


X_DGP = sample_real_data
X_DGP = sample_real_X
X_PARAMS_DICT = {
"X_fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": None,
"return_data": "X"
"fpath": "../data/regression_data/Diabetes_regression/X_diabetes_regression.csv",
"sample_row_n": 442
}
# X_PARAMS_DICT = {
# "X_fpath": "../data/classification_data/Fico/X_fico.csv",
Expand All @@ -24,7 +23,7 @@
"beta": 1,
"sigma": None,
"heritability": 0.4,
"s": 4
"s": 5
}
# Y_PARAMS_DICT = {
# "y_fpath": "../data/classification_data/Fico/y_fico.csv",
Expand All @@ -40,4 +39,8 @@
VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
"0.4": 0.4, "0.8": 0.8},
"sample_row_n": {"100": 100, "200": 200,
"300": 300, "442": 442}}
"300": 300, "400": 400}}

# VARY_PARAM_NAME = ["heritability"]
# VARY_PARAM_VALS = {"heritability": {"0.1": 0.1, "0.2": 0.2,
# "0.4": 0.4, "0.8": 0.8}}
Loading

0 comments on commit 59a45b3

Please sign in to comment.