Skip to content

Commit

Permalink
save arrays first (#229)
Browse files Browse the repository at this point in the history
* save arrays first

* update

* notebook

* finer fom scan

* style: pre-commit fixes

* 0.6

* updaate mass window

* style: pre-commit fixes

* update

* fix

* notebook with min finding

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jmduarte and pre-commit-ci[bot] authored Oct 28, 2024
1 parent e9b74ff commit 4538848
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 52 deletions.
2 changes: 0 additions & 2 deletions src/HH4b/postprocessing/CreateDatacard.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,6 @@
("hh4b-kl5", "ggHH_kl_5_kt_1_hbbhbb"),
("vbfhh4b", "qqHH_CV_1_C2V_1_kl_1_hbbhbb"),
("vbfhh4b-k2v0", "qqHH_CV_1_C2V_0_kl_1_hbbhbb"),
("vbfhh4b-k2v2", "qqHH_CV_1_C2V_2_kl_1_hbbhbb"),
("vbfhh4b-kl2", "qqHH_CV_1_C2V_1_kl_2_hbbhbb"),
("vbfhh4b-kv1p74-k2v1p37-kl14p4", "qqHH_CV_1p74_C2V_1p37_kl_14p4_hbbhbb"),
("vbfhh4b-kvm0p012-k2v0p03-kl10p2", "qqHH_CV_m0p012_C2V_0p03_kl_10p2_hbbhbb"),
("vbfhh4b-kvm0p758-k2v1p44-klm19p3", "qqHH_CV_m0p758_C2V_1p44_kl_m19p3_hbbhbb"),
Expand Down
2 changes: 1 addition & 1 deletion src/HH4b/postprocessing/PlotFits.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def plot_fits(args):
help="mass variable to make template",
)

run_utils.add_bool_arg(parser, "vbf-region", default=False, help="Include VBF region")
run_utils.add_bool_arg(parser, "vbf-region", default=True, help="Include VBF region")
run_utils.add_bool_arg(parser, "vbf-k2v0-signal", default=False, help="Plot VBF k2v=0 signal")

args = parser.parse_args()
Expand Down
86 changes: 55 additions & 31 deletions src/HH4b/postprocessing/PlotFoM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -15,16 +15,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"arrays_file = \"/home/users/woodson/HH4b/plots/PostProcess/24June19Pt300250VBFRun3SaveArrays/fom_bin1_abcd_mass105-150_fom_arrays.npz\""
"# plot_dir = \"plots/PostProcess/24June19Pt300250VBFRun3SaveArrays/\"\n",
"# plot_dir = \"plots/PostProcess/24Oct23Reproduce/\"\n",
"plot_dir = \"plots/PostProcess/24Oct25GloParTv2/\"\n",
"# plot_dir = \"/home/users/haoyang/HH4b_billy/plots/PostProcess/24Oct25GloParTv2_vbf/\"\n",
"category = \"bin2\"\n",
"arrays_file = f\"{plot_dir}/fom_{category}_abcd_mass115-160_fom_arrays.npz\"\n",
"\n",
"bdt_min = 0.675\n",
"bdt_max = 0.95\n",
"xbb_min = 0.65\n",
"xbb_max = 0.825"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -47,55 +57,56 @@
"bdt_cuts = np.sort(np.unique(all_bdt_cuts))\n",
"xbb_cuts = np.sort(np.unique(all_xbb_cuts))\n",
"\n",
"bdt_cuts = np.array([bdt_cut for bdt_cut in bdt_cuts if bdt_cut >= 0.9])\n",
"xbb_cuts = np.array([xbb_cut for xbb_cut in xbb_cuts if xbb_cut >= 0.9])\n",
"bdt_cuts = np.array([bdt_cut for bdt_cut in bdt_cuts if bdt_cut >= bdt_min and bdt_cut <= bdt_max])\n",
"xbb_cuts = np.array([xbb_cut for xbb_cut in xbb_cuts if xbb_cut >= xbb_min and xbb_cut <= xbb_max])\n",
"\n",
"print(xbb_cuts)"
"print(xbb_cuts)\n",
"print(bdt_cuts)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"h_sb = hist.Hist(\n",
" hist.axis.Variable(list(bdt_cuts) + [1.0], name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts) + [1.0], name=\"xbb_cut\"),\n",
" hist.axis.Variable(list(bdt_cuts), name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts), name=\"xbb_cut\"),\n",
")\n",
"\n",
"h_b = hist.Hist(\n",
" hist.axis.Variable(list(bdt_cuts) + [1.0], name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts) + [1.0], name=\"xbb_cut\"),\n",
" hist.axis.Variable(list(bdt_cuts), name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts), name=\"xbb_cut\"),\n",
")\n",
"\n",
"h_s = hist.Hist(\n",
" hist.axis.Variable(list(bdt_cuts) + [1.0], name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts) + [1.0], name=\"xbb_cut\"),\n",
" hist.axis.Variable(list(bdt_cuts), name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts), name=\"xbb_cut\"),\n",
")\n",
"\n",
"h_b_unc = hist.Hist(\n",
" hist.axis.Variable(list(bdt_cuts) + [1.0], name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts) + [1.0], name=\"xbb_cut\"),\n",
" hist.axis.Variable(list(bdt_cuts), name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts), name=\"xbb_cut\"),\n",
")\n",
"\n",
"h_sideband = hist.Hist(\n",
" hist.axis.Variable(list(bdt_cuts) + [1.0], name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts) + [1.0], name=\"xbb_cut\"),\n",
" hist.axis.Variable(list(bdt_cuts), name=\"bdt_cut\"),\n",
" hist.axis.Variable(list(xbb_cuts), name=\"xbb_cut\"),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"for xbb_cut in xbb_cuts:\n",
" for bdt_cut in bdt_cuts:\n",
" # find index of this cut\n",
" idx = np.where((all_bdt_cuts == bdt_cut) & (all_xbb_cuts == xbb_cut))[0][0]\n",
" if all_s[idx] > 0.25 and all_b[idx] >= 1 and all_sideband_events[idx] >= 6:\n",
" if all_s[idx] > 0.5 and all_b[idx] >= 2 and all_sideband_events[idx] >= 12:\n",
" h_sb.fill(bdt_cut, xbb_cut, weight=all_fom[idx])\n",
" h_b.fill(bdt_cut, xbb_cut, weight=all_b[idx])\n",
" h_b_unc.fill(bdt_cut, xbb_cut, weight=all_b_unc[idx])\n",
Expand All @@ -105,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -149,7 +160,7 @@
" ax.text(\n",
" (bins_x[i] + bins_x[i + 1]) / 2,\n",
" (bins_y[j] + bins_y[j + 1]) / 2,\n",
" eff[i, j].round(2),\n",
" eff[i, j].round(1),\n",
" color=\"black\",\n",
" ha=\"center\",\n",
" va=\"center\",\n",
Expand All @@ -176,23 +187,36 @@
"metadata": {},
"outputs": [],
"source": [
"plot_dir = \".\"\n",
"name = f\"fom_bin1_abcd_mass105-110\"\n",
"name = f\"fom_{category}_abcd_mass115-160\"\n",
"plot_fom(h_sb, plot_dir, name=name, fontsize=2.0, show=True, label=\"Fig Of Merit\")\n",
"plot_fom(h_b, plot_dir, name=f\"{name}_bkg\", fontsize=2.0, show=True, label=\"Background Pred.\")\n",
"plot_fom(h_b_unc, plot_dir, name=f\"{name}_bkgunc\", fontsize=2.0, show=True, label=\"Background Unc.\")\n",
"plot_fom(\n",
" h_sideband, plot_dir, name=f\"{name}_sideband\", fontsize=2.0, show=True, label=\"Sideband Events\"\n",
")\n",
"plot_fom(h_sideband, plot_dir, name=f\"{name}_signal\", fontsize=2.0, show=True, label=\"Signal Pred.\")"
"# plot_fom(h_b, plot_dir, name=f\"{name}_bkg\", fontsize=2.0, show=True, label=\"Background Pred.\")\n",
"# plot_fom(h_b_unc, plot_dir, name=f\"{name}_bkgunc\", fontsize=2.0, show=True, label=\"Background Unc.\")\n",
"# plot_fom(h_sideband, plot_dir, name=f\"{name}_sideband\", fontsize=2.0, show=True, label=\"Sideband Events\")\n",
"# plot_fom(h_s, plot_dir, name=f\"{name}_signal\", fontsize=2.0, show=True, label=\"Signal Pred.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"global_min = np.min(h_sb.values()[h_sb.values() > 0])\n",
"\n",
"argmin_axis0 = np.argmin(np.abs(h_sb.values() - global_min), axis=0)\n",
"min_axis0 = np.min(h_sb.values(), axis=0)\n",
"\n",
"argmin_axis1 = np.argmin(np.abs(h_sb.values() - global_min), axis=1)\n",
"min_axis1 = np.min(h_sb.values(), axis=1)\n",
"\n",
"bdt_cut = h_sb.axes[0].edges[argmin_axis0[min_axis0 == global_min]]\n",
"xbb_cut = h_sb.axes[1].edges[argmin_axis1[min_axis1 == global_min]]\n",
"\n",
"print(f\"Category: {category}\")\n",
"print(f\"FoM: {global_min}\")\n",
"print(f\"BDT cut: {bdt_cut}\")\n",
"print(f\"TXbb cut: {xbb_cut}\")"
]
}
],
"metadata": {
Expand Down
46 changes: 28 additions & 18 deletions src/HH4b/postprocessing/PostProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,21 +1015,17 @@ def scan_fom(
f"BG: {min_nevents[0]:.2f} S: {min_nevents[1]:.2f} S/B: {min_nevents[1]/min_nevents[0]:.2f} Sideband: {min_nevents[2]:.2f}"
)

name = f"{plot_name}_{args.method}_mass{mass_window[0]}-{mass_window[1]}"
print(f"Plotting FOM scan: {plot_dir}/{name} \n")
plotting.plot_fom(h_sb, plot_dir, name=name, fontsize=2.0)
plotting.plot_fom(h_b, plot_dir, name=f"{name}_bkg", fontsize=2.0)
plotting.plot_fom(h_b_unc, plot_dir, name=f"{name}_bkgunc", fontsize=2.0)
plotting.plot_fom(h_sideband, plot_dir, name=f"{name}_sideband", fontsize=2.0)

all_fom = np.array(all_fom)
all_b = np.array(all_b)
all_b_unc = np.array(all_b_unc)
all_s = np.array(all_s)
all_sideband_events = np.array(all_sideband_events)
all_xbb_cuts = np.array(all_xbb_cuts)
all_bdt_cuts = np.array(all_bdt_cuts)

# save all arrays to plot_dir
name = f"{plot_name}_{args.method}_mass{mass_window[0]}-{mass_window[1]}"
print(f"Saving FOM scan: {plot_dir}/{name}_fom_arrays \n")
np.savez(
f"{plot_dir}/{name}_fom_arrays.npz",
all_fom=all_fom,
Expand All @@ -1041,6 +1037,13 @@ def scan_fom(
all_bdt_cuts=all_bdt_cuts,
)

# plot fom scan
print(f"Plotting FOM scan: {plot_dir}/{name} \n")
plotting.plot_fom(h_sb, plot_dir, name=name, fontsize=2.0)
plotting.plot_fom(h_b, plot_dir, name=f"{name}_bkg", fontsize=2.0)
plotting.plot_fom(h_b_unc, plot_dir, name=f"{name}_bkgunc", fontsize=2.0)
plotting.plot_fom(h_sideband, plot_dir, name=f"{name}_sideband", fontsize=2.0)


def get_cuts(args, region: str):
xbb_cut_bin1 = args.txbb_wps[0]
Expand Down Expand Up @@ -1251,18 +1254,23 @@ def abcd(events_dict, get_cut, txbb_cut, bdt_cut, mass, mass_window, bg_keys_all
def postprocess_run3(args):
global bg_keys # noqa: PLW0602

# use for both pnet-legacy and glopart-v2
fom_window_by_mass = {
"H2Msd": [110, 140],
"H2PNetMass": [105, 150], # use wider range for FoM scan
}
blind_window_by_mass = {
"H2Msd": [110, 140],
"H2PNetMass": [110, 140], # only blind 3 bins
}

# use for both pnet-legacy
if args.txbb == "pnet-legacy":
fom_window_by_mass["H2PNetMass"] = [105, 150] # use wider range for FoM scan
blind_window_by_mass["H2PNetMass"] = [110, 140] # only blind 3 bins
# different for glopart-v2
elif args.txbb == "glopart-v2":
fom_window_by_mass["H2PNetMass"] = [115, 160] # use wider range for FoM scan
blind_window_by_mass["H2PNetMass"] = [120, 150] # only blind 3 bins
# different for pnet-v12
if args.txbb == "pnet-v12":
elif args.txbb == "pnet-v12":
fom_window_by_mass["H2PNetMass"] = [120, 150]
blind_window_by_mass["H2PNetMass"] = [120, 150]

Expand Down Expand Up @@ -1412,8 +1420,8 @@ def postprocess_run3(args):
args.method,
events_combined,
get_cuts(args, "vbf"),
np.arange(0.8, 0.999, 0.005),
np.arange(0.5, 0.99, 0.01),
np.arange(0.7, 0.85, 0.0025),
np.arange(0.9, 0.999, 0.0025),
mass_window,
plot_dir,
"fom_vbf",
Expand All @@ -1425,7 +1433,7 @@ def postprocess_run3(args):
if args.fom_scan_bin1:
if args.vbf and args.vbf_priority:
print(
f"Scanning Bin 1 vetoing VBF TXbb WP: {args.vbf_txbb_wp} BDT WP: {args.vbf_bdt_wp}"
f"Scanning Bin 1, vetoing VBF TXbb WP: {args.vbf_txbb_wp} BDT WP: {args.vbf_bdt_wp}"
)
else:
print("Scanning Bin 1, no VBF category")
Expand All @@ -1446,16 +1454,18 @@ def postprocess_run3(args):
if args.fom_scan_bin2:
if args.vbf:
print(
f"Scanning Bin 2 with VBF TXbb WP: {args.vbf_txbb_wp} BDT WP: {args.vbf_bdt_wp}, bin 1 WP: {args.txbb_wps[0]} BDT WP: {args.bdt_wps[0]}"
f"Scanning Bin 2, vetoing VBF TXbb WP: {args.vbf_txbb_wp} BDT WP: {args.vbf_bdt_wp}, bin 1 WP: {args.txbb_wps[0]} BDT WP: {args.bdt_wps[0]}"
)
else:
print(f"Scanning Bin 2 with bin 1 WP: {args.txbb_wps[0]} BDT WP: {args.bdt_wps[0]}")
print(
f"Scanning Bin 2, vetoing bin 1 WP: {args.txbb_wps[0]} BDT WP: {args.bdt_wps[0]}"
)
scan_fom(
args.method,
events_combined,
get_cuts(args, "bin2"),
np.arange(0.8, args.txbb_wps[0], 0.02),
np.arange(0.5, args.bdt_wps[0], 0.02),
np.arange(0.6, args.txbb_wps[0], 0.0025),
np.arange(0.6, args.bdt_wps[0], 0.0025),
mass_window,
plot_dir,
"fom_bin2",
Expand Down

0 comments on commit 4538848

Please sign in to comment.