diff --git a/cooltools/api/expected.py b/cooltools/api/expected.py index 8d2fbeac..eeacd598 100644 --- a/cooltools/api/expected.py +++ b/cooltools/api/expected.py @@ -256,7 +256,8 @@ def _make_diag_table(n_bins, bad_locs): ) diags = diags[diags["n_elem"] > 0] - diags = diags.drop("n_elem", axis=1) + # keep diags["n_elem"] for calculating count.avg + # diags = diags.drop("n_elem", axis=1) return diags.astype(int) @@ -1009,7 +1010,11 @@ def expected_cis( # calculate actual averages by dividing sum by n_valid: for key in chain(["count"], transforms): - result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID] + if key == "count": + result[f"{key}.avg"] = result[f"{key}.sum"] / result["n_elem"] + else: + result[f"{key}.avg"] = result[f"{key}.sum"] / result[_NUM_VALID] + # additional smoothing and aggregating options would add columns only, not replace them if smooth: @@ -1017,9 +1022,9 @@ def expected_cis( result, sigma_log10=smooth_sigma, ) - # add smoothed columns to the result (only balanced for now) + # add smoothed columns to the result (only balanced for now) (include count as well) result = result.merge( - result_smooth[["balanced.avg.smoothed", _DIST]], + result_smooth[["balanced.avg.smoothed", "count.avg.smoothed", _DIST]], on=[_REGION1, _REGION2, _DIST], how="left", ) @@ -1028,10 +1033,10 @@ def expected_cis( result, groupby=None, sigma_log10=smooth_sigma, - ).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg"}) + ).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg", "count.avg.smoothed": "count.avg.smoothed.agg"}) # add smoothed columns to the result result = result.merge( - result_smooth_agg[["balanced.avg.smoothed.agg", _DIST]], + result_smooth_agg[["balanced.avg.smoothed.agg", "count.avg.smoothed.agg", _DIST]], on=[ _DIST, ], diff --git a/cooltools/sandbox/expected_smoothing.py b/cooltools/sandbox/expected_smoothing.py index 630675cc..18c354b9 100644 --- a/cooltools/sandbox/expected_smoothing.py +++ b/cooltools/sandbox/expected_smoothing.py @@ -10,6 +10,9 @@ "n_contacts": "balanced.sum", "contact_freq": "balanced.avg", "smooth_suffix": ".smoothed", + "n_pixels_tot": "n_elem", + "n_contacts_raw": "count.sum", + "contact_freq_raw": "count.avg", } @@ -182,6 +185,8 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non { cols["n_pixels"]: "sum", cols["n_contacts"]: "sum", + cols["n_pixels_tot"]: "sum", + cols["n_contacts_raw"]: "sum", } ) .reset_index() @@ -198,6 +203,18 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non points_per_sigma=points_per_sigma, ) + smoothed_raw_sum, smoothed_n_elem = log_smooth( + cvd_smoothed[cols["dist"]].values.astype(np.float64), + [ + cvd_smoothed[cols["n_contacts_raw"]].values.astype(np.float64), + cvd_smoothed[cols["n_pixels_tot"]].values.astype(np.float64), + ], + sigma_log10=sigma_log10, + window_sigma=window_sigma, + points_per_sigma=points_per_sigma, + ) + + # cvd_smoothed[cols["contact_freq"]] = cvd_smoothed[cols["n_contacts"]] / cvd_smoothed[cols["n_pixels"]] cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]] = smoothed_n_valid @@ -207,6 +224,13 @@ def _smooth_cvd_group(cvd, sigma_log10, window_sigma, points_per_sigma, cols=Non / cvd_smoothed[cols["n_pixels"] + cols["smooth_suffix"]] ) + cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]] = smoothed_n_elem + cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]] = smoothed_raw_sum + cvd_smoothed[cols["contact_freq_raw"] + cols["smooth_suffix"]] = ( + cvd_smoothed[cols["n_contacts_raw"] + cols["smooth_suffix"]] + / cvd_smoothed[cols["n_pixels_tot"] + cols["smooth_suffix"]] + ) + return cvd_smoothed @@ -291,7 +315,7 @@ def agg_smooth_cvd( ) cvd_smoothed.drop( - [cols["n_pixels"], cols["n_contacts"]], axis="columns", inplace=True + [cols["n_pixels"], cols["n_contacts"], cols["n_pixels_tot"], cols["n_contacts_raw"]], axis="columns", inplace=True ) # cvd = cvd.drop(cols["contact_freq"], axis='columns', errors='ignore')