Skip to content

Commit

Permalink
Update pandas>2 and networkx>3 dependencies (#65)
Browse files Browse the repository at this point in the history
* Allow pandas >2 dependency

* Allow networkx >3 dependency

* Github actions add tests for Python 3.10 and 3.11

* Change `faiss` dependency from GPU to CPU

GPU not yet supported for Python 3.11
facebookresearch/faiss#2861 (comment)

* Bump version: v0.9.6

* Fix requirements bump twice for pandas none for nx

Bumped pandas twice instead pandas and networkx

---------

Signed-off-by: Ehud-Karavani <[email protected]>
  • Loading branch information
ehudkr authored Oct 25, 2023
1 parent f871ac1 commit d80e3ea
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
strategy:
fail-fast: false # Don't cancel entire run if one python-version fails
matrix:
python-version: ["3.7", "3.8", "3.9"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
name: Build and test on Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion causallib/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.9.5"
__version__ = "0.9.6"
2 changes: 1 addition & 1 deletion causallib/contrib/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-f https://download.pytorch.org/whl/cpu/ # To support cpu torch installation
torch>=1.2.0
faiss-gpu~=1.7.0
faiss-cpu~=1.7.0 # Can also use gpu for some Python versions
2 changes: 1 addition & 1 deletion causallib/estimation/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def _matches_to_weights_single_matching(self, s, t, match_df):
name = {0: "control", 1: "treatment"}
weights.name = "{s}_to_{t}".format(s=name[s], t=name[t])
s_to_t_matches = match_df.loc[t][self.treatments_ == s].matches
for source_idx, matches_list in s_to_t_matches.iteritems():
for source_idx, matches_list in s_to_t_matches.items():
if matches_list:
weights.loc[source_idx] += 1
for match in matches_list:
Expand Down
41 changes: 27 additions & 14 deletions causallib/simulation/CausalSimulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm

# Create a graph out of matrix topology:
self.topology = topology
self.graph_topology = nx.from_numpy_matrix(topology.transpose(), create_using=nx.DiGraph()) # type:nx.DiGraph
self.graph_topology = nx.from_numpy_array(topology.transpose(), create_using=nx.DiGraph()) # type:nx.DiGraph
self.graph_topology = nx.relabel_nodes(self.graph_topology,
dict(list(zip(list(range(self.m)), self.var_names))))

Expand Down Expand Up @@ -751,13 +751,17 @@ def generate_outcome_col(self, X_parents, link_type, snr, prob_category, outcome
elif outcome_type == SURVIVAL:
if survival_distribution == "expon":
rnd_state = np.random.randint(low=0, high=999999)
param = survival_baseline * np.exp(x_outcome)
param = survival_baseline * np.exp(x_outcome.astype(float))
x_outcome = pd.Series(
stats.expon(loc=0.0, scale=(1.0 / param)).rvs(x_outcome.size, random_state=rnd_state),
index=x_outcome.index)
cf = {i: pd.Series(
stats.expon(loc=0.0, scale=(1 / (survival_baseline * np.exp(cf[i])))).rvs(x_outcome.size,
random_state=rnd_state),
stats.expon(
loc=0.0,
scale=(1 / (survival_baseline * np.exp(cf[i].astype(float))))).rvs(
x_outcome.size,
random_state=rnd_state
),
index=x_outcome.index)
if has_treatment_parent else cf[i] for i in list(cf.keys())}
# Supplying the random state assures that the resulting outcome and cfs is consistent while sampling rvs
Expand Down Expand Up @@ -826,7 +830,7 @@ def generate_censor_col(self, X_parents, link_type, snr, prob_category, outcome_
var_name=var_name)
if survival_distribution == "expon":
# param = survival_baseline * (prob_category.iloc[0]/prob_category.loc[1]) * np.exp(x_signal) # Cox ph
param = survival_baseline * np.exp(x_signal) # Cox ph model
param = survival_baseline * np.exp(x_signal.astype(float)) # Cox ph model
survival_distribution = stats.expon(loc=0.0, scale=(1.0 / param))
x_censor = pd.Series(survival_distribution.rvs(size=x_signal.size), index=x_signal.index)
# scale values with censoring proportions - 0 is non censored, 1 is censored:
Expand Down Expand Up @@ -941,7 +945,7 @@ def _treatment_logistic_dichotomous(x_continuous, prob_category, params=None):
# compute propensities:
t = x_continuous.quantile(prob_category.iloc[1], interpolation="higher")
slope = params.get("slope", 1.0) if params is not None else 1.0
cur_propensity = 1.0 / (1 + np.exp(slope * (x_continuous - np.repeat(t, x_continuous.size))))
cur_propensity = 1.0 / (1 + np.exp(slope * (x_continuous - np.repeat(t, x_continuous.size)).astype(float)))
# assign the propensity values:
propensity.loc[:, columns_names[1]] = cur_propensity
propensity.loc[:, columns_names[0]] = np.ones(cur_propensity.size) - cur_propensity
Expand All @@ -968,11 +972,12 @@ def _treatment_odds_ratio(x_continuous, prob_category, snr):
- **propensity** (*pd.DataFrame*): The marginal conditional probability of treatment given covariates.
A DataFrame shaped (num_samples x num_of_possible_treatment_categories).
"""
x_continuous = x_continuous.astype(float)
index_names = x_continuous.index
columns_names = prob_category.index
propensity = pd.DataFrame(index=index_names, columns=columns_names)
# start with filling up the odds ratio:
for cur_category, p in prob_category.iteritems():
for cur_category, p in prob_category.items():
t = x_continuous.quantile(p, interpolation="higher")
cur_propensity = (1.0 / (1 + np.exp((x_continuous - np.repeat(t, x_continuous.size))))) # type: pd.Series
cur_propensity = cur_propensity.div(np.ones_like(cur_propensity) - cur_propensity)
Expand Down Expand Up @@ -1012,8 +1017,12 @@ def _treatment_quantile_gauss_fit(x_continuous, prob_category, snr):
columns_names = prob_category.index
propensity = pd.DataFrame(index=index_names, columns=columns_names)
# section the signal into bins based on the probabilities (quantiles)
bins = pd.qcut(x=x_continuous, q=np.cumsum(pd.Series(0, index=["null"]).append(prob_category)),
labels=columns_names)
x_continuous = x_continuous.astype(float)
bins = pd.qcut(
x=x_continuous,
q=np.cumsum(pd.concat([pd.Series(0, index=["null"]), prob_category])),
labels=columns_names
)
for cur_category in columns_names:
cur_samples_mask = (bins == cur_category)
cur_samples = x_continuous[cur_samples_mask]
Expand Down Expand Up @@ -1103,8 +1112,10 @@ def _discretize_col(x_col, prob_category, method="empiric", retbins=False, bins=
res = cutoffs.sum(axis="columns")
elif method == "empiric": # discretize according to percentiles from the empirical data itself
try:
cumulative_ps = pd.Series(0, index=["null"]).append(prob_category).cumsum()
res, bins = pd.qcut(x=x_col, q=cumulative_ps,
cumulative_ps = pd.concat(
[pd.Series(0, index=["null"]), prob_category]
).cumsum()
res, bins = pd.qcut(x=x_col.astype(float), q=cumulative_ps,
labels=prob_category.index, retbins=True)
bins = pd.Series(data=bins, index=cumulative_ps.index)
# TODO: maybe noise this a little?
Expand Down Expand Up @@ -1541,7 +1552,7 @@ def generate_random_topology(n_covariates, p, n_treatments=1, n_outcomes=1, n_ce
generated_vars = covariates + treatments + outcomes + censoring
generated_vars = pd.Series(data=generated_vars, index=generated_vars)

total_vars = given_vars.append(generated_vars)
total_vars = pd.concat([given_vars, generated_vars])
topology = pd.DataFrame(data=0, index=total_vars, columns=total_vars, dtype=bool)

# generate between the independent given set to generated set:
Expand Down Expand Up @@ -1577,10 +1588,12 @@ def generate_random_topology(n_covariates, p, n_treatments=1, n_outcomes=1, n_ce
generated_types[treatments] = TREATMENT
generated_types[outcomes] = OUTCOME
generated_types[censoring] = CENSOR
var_types = pd.Series(data=COVARIATE, index=given_vars).append(generated_types)
var_types = pd.concat(
[pd.Series(data=COVARIATE, index=given_vars), generated_types]
)

# create a hidden variables mask:
hidden_vars = given_vars.append(pd.Series(covariates)).sample(frac=p_hidden)
hidden_vars = pd.concat([given_vars, pd.Series(covariates)]).sample(frac=p_hidden)
var_types[hidden_vars] = HIDDEN

return topology, var_types
6 changes: 3 additions & 3 deletions causallib/tests/test_causal_simulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def test_random_topology_generation(self):
np.testing.assert_array_equal(T.loc[X.columns, :].sum(axis="columns"), np.zeros(5))

# Test for DAGness:
from networkx import DiGraph, from_numpy_matrix, is_directed_acyclic_graph
from networkx import DiGraph, from_numpy_array, is_directed_acyclic_graph
NUM_TESTS = 50
for test in range(NUM_TESTS):
n_cov = np.random.randint(low=10, high=100)
Expand All @@ -317,7 +317,7 @@ def test_random_topology_generation(self):
n_cen = np.random.randint(low=0, high=n_tre_out)
T, _ = CS3m.generate_random_topology(n_covariates=n_cov, p=p, n_treatments=n_tre_out, n_outcomes=n_tre_out,
n_censoring=n_cen, given_vars=[], p_hidden=0)
G = from_numpy_matrix(T.values.transpose(), create_using=DiGraph())
G = from_numpy_array(T.values.transpose(), create_using=DiGraph())
res = is_directed_acyclic_graph(G)
self.assertTrue(res)

Expand Down Expand Up @@ -350,7 +350,7 @@ def test_linear_linking(self):
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.values, compute_uv=False)
singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 2,
Expand Down
3 changes: 2 additions & 1 deletion causallib/tests/test_overlap_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def test_ow_weights_reversed_to_propensity(self):
propensity = pd.DataFrame(propensity)
ow_weights = self.estimator.compute_weight_matrix(self.data_r_100["X"], self.data_r_100["a"],
clip_min=None, clip_max=None)
propensity.columns = propensity.columns.astype(ow_weights.columns.dtype) # Avoid column dtype assert
pd.testing.assert_series_equal(propensity.loc[:, 0], ow_weights.loc[:, 1], check_names=False)
pd.testing.assert_series_equal(propensity.loc[:, 1], ow_weights.loc[:, 0], check_names=False)
pd.testing.assert_index_equal(propensity.columns, ow_weights.columns)
pd.testing.assert_index_equal(propensity.columns, ow_weights.columns)
3 changes: 3 additions & 0 deletions causallib/tests/test_survival.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ def test_marginal_kaplan_meier_curves(self):
1: lifelines_km_a1.predict(sorted(self.t.unique()))})
marginal_curves_lifelines.columns.name = 'a'
marginal_curves_lifelines.index.name = 't'
marginal_curves_lifelines.columns = (
marginal_curves_lifelines.columns.astype(marginal_curves_causallib.columns.dtype)
) # Avoid column dtype assert

pd.testing.assert_frame_equal(marginal_curves_causallib, marginal_curves_causallib_lifelines)
pd.testing.assert_frame_equal(marginal_curves_causallib, marginal_curves_lifelines)
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pandas>=0.25.2,<2
pandas>=0.25.2,<3
scipy>=0.19,<2
statsmodels>=0.9,<1
networkx>=1.1,<3
networkx>=1.1,<4
numpy>=1.13,<2
scikit-learn>=0.20,<1.2
matplotlib>=2.2,<4
Expand Down

0 comments on commit d80e3ea

Please sign in to comment.