Skip to content

Commit

Permalink
1. use n_elem for calculating count.avg 2. store count.avg.smoothed a…
Browse files Browse the repository at this point in the history
…nd count.avg.smoothed.agg in cvd table
  • Loading branch information
Yaoyx committed Feb 15, 2024
1 parent ba2be42 commit 0a7c01f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 7 deletions.
17 changes: 11 additions & 6 deletions cooltools/api/expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,8 @@ def _make_diag_table(n_bins, bad_locs):
)
diags = diags[diags["n_elem"] > 0]

diags = diags.drop("n_elem", axis=1)
# keep diags["n_elem"] for calculating count.avg
# diags = diags.drop("n_elem", axis=1)
return diags.astype(int)


Expand Down Expand Up @@ -1009,17 +1010,21 @@ def expected_cis(

# calculate actual averages by dividing sum by n_valid:
for key in chain(["count"], transforms):
result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID]
if key == "count":
result[f"{key}.avg"] = result[f"{key}.sum"] / result["n_elem"]
else:
result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID]


# additional smoothing and aggregating options would add columns only, not replace them
if smooth:
result_smooth = expected_smoothing.agg_smooth_cvd(
result,
sigma_log10=smooth_sigma,
)
# add smoothed columns to the result (only balanced for now)
# add smoothed columns to the result (only balanced for now) (include count as well)
result = result.merge(
result_smooth[["balanced.avg.smoothed", _DIST]],
result_smooth[["balanced.avg.smoothed", "count.avg.smoothed", _DIST]],
on=[_REGION1, _REGION2, _DIST],
how="left",
)
Expand All @@ -1028,10 +1033,10 @@ def expected_cis(
result,
groupby=None,
sigma_log10=smooth_sigma,
).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg"})
).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg", "count.avg.smoothed": "count.avg.smoothed.agg"})
# add smoothed columns to the result
result = result.merge(
result_smooth_agg[["balanced.avg.smoothed.agg", _DIST]],
result_smooth_agg[["balanced.avg.smoothed.agg", "count.avg.smoothed.agg", _DIST]],
on=[
_DIST,
],
Expand Down
26 changes: 25 additions & 1 deletion cooltools/sandbox/expected_smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
"n_contacts": "balanced.sum",
"contact_freq": "balanced.avg",
"smooth_suffix": ".smoothed",
"n_pixels_tot": "n_elem",
"n_contacts_raw": "count.sum",
"contact_freq_raw": "count.avg",
}


Expand Down Expand Up @@ -182,6 +185,8 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
{
cols["n_pixels"]: "sum",
cols["n_contacts"]: "sum",
cols["n_pixels_tot"]: "sum",
cols["n_contacts_raw"]: "sum",
}
)
.reset_index()
Expand All @@ -198,6 +203,18 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
points_per_sigma=points_per_sigma,
)

smoothed_raw_sum, smoothed_n_elem = log_smooth(
cvd_smoothed[cols["dist"]].values.astype(np.float64),
[
cvd_smoothed[cols["n_contacts_raw"]].values.astype(np.float64),
cvd_smoothed[cols["n_pixels_tot"]].values.astype(np.float64),
],
sigma_log10=sigma_log10,
window_sigma=window_sigma,
points_per_sigma=points_per_sigma,
)


# cvd_smoothed[cols["contact_freq"]] = cvd_smoothed[cols["n_contacts"]] / cvd_smoothed[cols["n_pixels"]]

cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]] = smoothed_n_valid
Expand All @@ -207,6 +224,13 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non
/ cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]]
)

cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]] = smoothed_n_elem
cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]] = smoothed_raw_sum
cvd_smoothed[cols["contact_freq_raw"] + cols["smooth_suffix"]] = (
cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]]
/ cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]]
)

return cvd_smoothed


Expand Down Expand Up @@ -291,7 +315,7 @@ def agg_smooth_cvd(
)

cvd_smoothed.drop(
[cols["n_pixels"], cols["n_contacts"]], axis="columns", inplace=True
[cols["n_pixels"], cols["n_contacts"], cols["n_pixels_tot"], cols["n_contacts_raw"]], axis="columns", inplace=True
)

# cvd = cvd.drop(cols["contact_freq"], axis='columns', errors='ignore')
Expand Down

0 comments on commit 0a7c01f

Please sign in to comment.