Skip to content

Commit

Permalink
Refactor and test summarize design
Browse files Browse the repository at this point in the history
  • Loading branch information
dafeda authored Nov 18, 2024
1 parent 57efdd7 commit 4701ee9
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 126 deletions.
192 changes: 73 additions & 119 deletions src/fmu/tools/sensitivities/_designsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
import pandas as pd


def _get_sensitivity_type(senscase: str) -> str:
"""Determine sensitivity type based on the case name"""
sensitivity_types = {"p10_p90": "mc", "ref": "ref", "skip": "skip"}
return sensitivity_types.get(senscase.lower(), "scalar")


def summarize_design(filename, sheetname="DesignSheet01"):
"""
Summarizes the design set up for one by one sensitivities
Expand Down Expand Up @@ -34,7 +40,22 @@ def summarize_design(filename, sheetname="DesignSheet01"):
"""

# Initialisation of dataframe to store results
# Read design matrix
if str(filename).endswith(".xlsx"):
dgn = pd.read_excel(filename, sheetname, engine="openpyxl")
# Drop empty rows or columns that have been read in
# due to having background colour/formatting
dgn.dropna(axis=0, how="all", inplace=True)
dgn = dgn.loc[:, ~dgn.columns.str.contains("^Unnamed")]
elif str(filename).endswith(".csv"):
dgn = pd.read_csv(filename)
else:
raise ValueError(
"Design matrix must be on Excel or csv format"
" and filename must end with .xlsx or .csv"
)

# Initialize results DataFrame with same columns
designsummary = pd.DataFrame(
columns=[
"sensno",
Expand All @@ -48,126 +69,59 @@ def summarize_design(filename, sheetname="DesignSheet01"):
"endreal2",
]
)
sensno = 0
startreal1 = 0
endreal1 = 0

# Read design matrix and find realisation numbers for each sensitivity
if str(filename).endswith(".xlsx"):
dgn = pd.read_excel(filename, sheetname, engine="openpyxl")

# Drop empty rows or columns that have been read in
# due to having background colour/formatting

dgn.dropna(axis=0, how="all", inplace=True)
dgn = dgn.loc[:, ~dgn.columns.str.contains("^Unnamed")]

elif str(filename).endswith(".csv"):
dgn = pd.read_csv(filename)
# Get unique sensitivity names in order of appearance
sensnames = dgn["SENSNAME"].unique()

else:
raise ValueError(
"Design matrix must be on Excel or csv format"
" and filename must end with .xlsx or .csv"
)
sensname = dgn.loc[0]["SENSNAME"]
casename1 = dgn.loc[0]["SENSCASE"]
if casename1.lower() == "p10_p90":
senstype = "mc"
elif casename1.lower() == "ref":
senstype = "ref"
else:
senstype = "scalar"

currentsensname = sensname
currentsenscase = casename1
# starting with first case
secondcase = False
casename2 = None
startreal2 = None
endreal2 = None

for row in dgn.itertuples():
if currentsensname == row.SENSNAME and currentsenscase == row.SENSCASE:
if secondcase is True:
endreal2 = row.REAL
else:
endreal1 = row.REAL
elif currentsensname == row.SENSNAME:
secondcase = True
startreal2 = row.REAL
endreal2 = row.REAL
casename2 = row.SENSCASE
currentsensname = row.SENSNAME
currentsenscase = casename2
else:
if senstype != "skip":
if secondcase is True:
designsummary.loc[sensno] = [
sensno,
sensname,
senstype,
casename1,
startreal1,
endreal1,
casename2,
startreal2,
endreal2,
]
sensno += 1
else:
designsummary.loc[sensno] = [
sensno,
sensname,
senstype,
casename1,
startreal1,
endreal1,
None,
None,
None,
]
sensno += 1
secondcase = False
startreal1 = row.REAL
endreal1 = row.REAL

casename1 = row.SENSCASE
sensname = row.SENSNAME
currentsenscase = casename1
currentsensname = sensname
if row.SENSCASE.lower() == "p10_p90":
senstype = "mc"
elif row.SENSCASE.lower() == "skip":
senstype = "skip"
else:
senstype = "scalar"

# For last row
if senstype != "skip":
if secondcase is True:
designsummary.loc[sensno] = [
sensno,
sensname,
senstype,
casename1,
startreal1,
endreal1,
casename2,
startreal2,
endreal2,
]
sensno = 0
for sensname in sensnames:
sens_group = dgn[dgn["SENSNAME"] == sensname].copy()
# Get cases in order of appearance
cases = sens_group.drop_duplicates("SENSCASE")[["SENSCASE"]].values.flatten()

# Skip if first case type is 'skip'
# The "skip" option was added when creating tornado plots
# with calc_tornadoinput.
# It allowed manual exclusion of sensitivities from tornado plots by changing
# SENSCASE to "skip" in the design matrix after running the experiment.
# This would exclude the sensitivity from both the
# *designsummary table and the tornado plot.
# calc_tornadoinput is deprecated so this can be removed once
# calc_tornadoinput is removed.
senstype = _get_sensitivity_type(cases[0])
if senstype == "skip":
continue

# First case
case1_data = sens_group[sens_group["SENSCASE"] == cases[0]]
casename1 = cases[0]
startreal1 = case1_data["REAL"].min()
endreal1 = case1_data["REAL"].max()

# Handle second case if it exists
if len(cases) > 1:
case2_data = sens_group[sens_group["SENSCASE"] == cases[1]]
casename2 = cases[1]
startreal2 = case2_data["REAL"].min()
endreal2 = case2_data["REAL"].max()
else:
designsummary.loc[sensno] = [
sensno,
sensname,
senstype,
casename1,
startreal1,
endreal1,
None,
None,
None,
]
casename2 = None
startreal2 = None
endreal2 = None

# Add row to results
designsummary.loc[sensno] = [
sensno,
sensname,
senstype,
casename1,
startreal1,
endreal1,
casename2,
startreal2,
endreal2,
]

sensno += 1

return designsummary
38 changes: 31 additions & 7 deletions tests/sensitivities/test_calc_tornado.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,44 @@ def test_designsummary():
)
# checking dimensions and some values in summary of design matrix
assert snorrebergdesign.shape == (7, 9)

assert (
snorrebergdesign.columns
== [
"sensno",
"sensname",
"senstype",
"casename1",
"startreal1",
"endreal1",
"casename2",
"startreal2",
"endreal2",
]
).all()
assert snorrebergdesign["sensname"][0] == "rms_seed"
assert snorrebergdesign["senstype"][0] == "mc"
assert snorrebergdesign["casename1"][0] == "P10_P90"
assert snorrebergdesign["startreal1"][0] == 0
assert snorrebergdesign["endreal1"][0] == 9
assert snorrebergdesign["casename2"][0] is None
assert snorrebergdesign["startreal2"][0] is None
assert snorrebergdesign["endreal2"][0] is None

assert snorrebergdesign["sensname"][6] == "relp_go"
assert snorrebergdesign["senstype"][6] == "scalar"
assert snorrebergdesign["casename1"][6] == "lc"
assert snorrebergdesign["startreal1"][6] == 90
assert snorrebergdesign["endreal1"][6] == 99
assert snorrebergdesign["casename2"][6] == "hc"
assert snorrebergdesign["startreal2"][6] == 100
assert snorrebergdesign["endreal2"][6] == 109

assert snorrebergdesign["endreal1"].sum() == 333

# Test same also when design matrix is in .csv format
designcsv = summarize_design(TESTDATA / "distributions/design.csv")

# checking dimensions and some values in summary of design matrix
assert designcsv.shape == (7, 9)
assert designcsv["sensname"][0] == "rms_seed"
assert designcsv["startreal2"][6] == 100
assert designcsv["endreal2"][6] == 109
assert designcsv["endreal1"].sum() == 333
assert snorrebergdesign.equals(designcsv)


def test_calc_tornadoinput():
Expand Down

0 comments on commit 4701ee9

Please sign in to comment.