Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Solverstatisticscheck #36

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/geos_ats_package/geos_ats/command_line_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"stopcheck": "check the stop time and stop cycle",
"curvecheck": "check the ultra curves",
"restartcheck": "check the restart file",
"performancecheck": "check nonlinear and linear solvers performance"
}

verbose_options = {
Expand Down
216 changes: 216 additions & 0 deletions scripts/geos_ats_package/geos_ats/helpers/solver_statistics_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@

import os
import importlib.util
import sys
import re
import argparse
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import hdf5_wrapper
import h5py

def parse_log_file( fname ):
"""
Parses the log file and creates an hdf5 with number of linear and nonlinear iterations per time-step

Args: fname (str): name of the log file to parse

Returns: output_fileName (str):
errors:
"""
# Define regular expressions
cycle_pattern = r"\d+\s*:\s*Time: [\d.e+-]+ s, dt: [\d.e+-]+ s, Cycle: (\d+)"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the new changes in log timestamp format, this would miss anything that is in minutes, years, etc. Maybe switch s, to \s*,?

config_and_nnlinear_iter_pattern = r"\d+\s*:\s*Attempt:\s*(\d+),\s*ConfigurationIter:\s*(\d+),\s*NewtonIter:\s*(\d+)"
linear_iter_pattern = r"\d+\s*:\s*Last LinSolve\(iter,res\) = \(\s*(\d+),\s*([\d.e+-]+)\s*\) ;"

# Initialize variables to store the extracted data
data = {}

with open(fname, 'r') as file:
for line in file:
# Match Cycle number
cycle_match = re.match(cycle_pattern, line)
if cycle_match:
cycle_number = cycle_match.group(1)
data[cycle_number] = {
'Attempts': {}
}

# Match ConfigurationIter data
config_iter_match = re.match(config_and_nnlinear_iter_pattern, line)
if config_iter_match and cycle_number:
attempt, config_iter, newton_iter = config_iter_match.groups()
if int(newton_iter) > 0:
attempt_data = data[cycle_number]['Attempts'].get(attempt, {})
config_data = attempt_data.get('ConfigurationIters', [])
config_data.append({
'ConfigurationIter': config_iter,
'NewtonIters': {}
})
attempt_data['ConfigurationIters'] = config_data
data[cycle_number]['Attempts'][attempt] = attempt_data

# Match Iteration data
iteration_match = re.match(linear_iter_pattern, line)
if iteration_match and cycle_number and attempt and config_iter:
num_iterations = int(iteration_match.group(1))
attempt_data = data[cycle_number]['Attempts'][attempt]
config_data = attempt_data['ConfigurationIters']
config_iter_data = config_data[-1]
config_iter_data['NewtonIters'][newton_iter] = num_iterations

# Create an HDF5 file for storing the data
output_fileName = os.path.join(os.path.dirname(fname), 'extracted_solverStat_data.h5')
with h5py.File(output_fileName, 'w') as hdf5_file:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you are already have hdf5_wrapper as a dependency, you could switch this to:

with hdf5_wrapper.hdf5_wrapper(output_fileName, 'w') as hdf5_file:
...

And then write to the object as if it were a simple python dictionary:

hdf5_file['some_key'] = {'some': ['value']}

for cycle, cycle_data in data.items():
cycle_group = hdf5_file.create_group(f'Cycle_{cycle}')
for attempt, attempt_data in cycle_data['Attempts'].items():
attempt_group = cycle_group.create_group(f'Attempt_{attempt}')
for config_iter_data in attempt_data['ConfigurationIters']:
config_iter_group = attempt_group.create_group(f'ConfigIter_{config_iter_data["ConfigurationIter"]}')
newton_iter_list = []
linear_iter_list = []
for newton_iter, num_iterations in config_iter_data['NewtonIters'].items():
newton_iter_list.append(int(newton_iter))
linear_iter_list.append(num_iterations)

matrix_data = np.column_stack((newton_iter_list, linear_iter_list))
config_iter_group.create_dataset('NewtonAndLinearIterations', data=matrix_data)

print(f'Data has been saved to {output_fileName}')

errors = []

return output_fileName, errors

def load_data(fname):
"""
Args:
fname (str):
errors (list):

Returns:
tuple: data, errors
"""
data = {}
if os.path.isfile(fname):
data = hdf5_wrapper.hdf5_wrapper(fname).get_copy()
else:
raise Exception(f'file {fname} not found. If baselines do not exist you may simply need to rebaseline this case.')
return data

# def plot_performance_curves():
# """
# """

def compare_performance_curves( fname, baseline, tolerances, output ):
"""
Compute time history curves

Args:
fname (str): Target curve file name
baseline (str): Baseline curve file name
tolerances (list): Tolerance for nonlinear and linear iterations
output (str): Path to place output figures
Returns:
tuple: warnings, errors
"""
# Setup
warnings = []
errors = []

newton_iterations_tolerance, linear_iterations_tolerance = tolerances

# Load data
target_data = load_data( fname )
baseline_data = load_data( baseline )

# Check if the number of cycles is the same
target_cycles = set(target_data.keys())
baseline_cycles = set(baseline_data.keys())
if target_cycles != baseline_cycles:
errors.append(f'Number of cycles is different.')

# Loop over each cycle
for cycle in target_cycles:
target_num_attempts = set(target_data[cycle].keys())
baseline_num_attempts = set(baseline_data[cycle].keys())

# Check if the number of attempts is the same for this cycle
if target_num_attempts != baseline_num_attempts:
errors.append(f'Number of attempts for Cycle {cycle} is different.')

# Loop over each attempt
for attempt in target_num_attempts:
target_config_iters = set(target_data[cycle][attempt].keys())
baeline_config_iters = set(baseline_data[cycle][attempt].keys())

# Check if the number of ConfigurationIters is the same for this Attempt
if target_config_iters != baeline_config_iters:
errors.append(f'Number of ConfigurationIters for Cycle {cycle}, Attempt {attempt} is different.')

# Loop over each ConfigurationIter
for config_iter in target_config_iters:
# Check if the NewtonAndLinearIterations are within tolerance
target_iterations = np.array(target_data[cycle][attempt][config_iter]['NewtonAndLinearIterations'])
baseline_iterations = np.array(baseline_data[cycle][attempt][config_iter]['NewtonAndLinearIterations'])

newton_diff = np.abs(target_iterations[:, 0] - baseline_iterations[:, 0])
linear_diff = np.abs(target_iterations[:, 1] - baseline_iterations[:, 1])

if (np.any(newton_diff > newton_iterations_tolerance * target_iterations[:, 0]) or
np.any(linear_diff > linear_iterations_tolerance * target_iterations[:, 1])):
errors.append(f'Differences found in NewtonAndLinearIterations for Cycle {cycle}, Attempt {attempt}, ConfigurationIter {config_iter}.')

return warnings, errors

def solver_statistics_check_parser():
"""
Build the curve check parser
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Build the solver statistics parser


Returns:
argparse.parser: The performance check parser
"""
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="Path to the log file")
parser.add_argument("baseline", help="Path to the baseline file")
parser.add_argument("-t",
"--tolerance",
nargs='+',
action='append',
help=f"The tolerance for nonlinear and linear iterations",
default=[])
parser.add_argument("-o",
"--output",
help="Output figures to this directory",
default='./solver_statistics_check_figures')
return parser


def main():
"""
Entry point for the performance check script
"""
parser = solver_statistics_check_parser()
args = parser.parse_args()
fname, parsingErrors = parse_log_file( args.filename )

# We raise immediately if there is any issue while parsing
if len(parsingErrors):
print('\n'.join(parsingErrors))
raise Exception(f'Performance check error while parsing log file.')

warnings, errors = compare_performance_curves( fname, args.baseline, args.tolerance, args.output )

if len(warnings):
print('Performance check warnings:')
print('\n'.join(warnings))

if len(errors):
print('Performance check errors:')
print('\n'.join(errors))
raise Exception(f'Performance check produced {len(errors)} errors!')

if __name__ == '__main__':
main()
19 changes: 16 additions & 3 deletions scripts/geos_ats_package/geos_ats/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ def as_dict(self):
return asdict(self)


@dataclass(frozen=True)
class SolverstatisticscheckParameters:
tolerance: tuple[float, float]

def as_dict(self):
return asdict(self)

@dataclass(frozen=True)
class TestDeck:
name: str
Expand All @@ -37,6 +44,7 @@ class TestDeck:
check_step: int
restartcheck_params: RestartcheckParameters = None
curvecheck_params: CurveCheckParameters = None
performancecheck_params: SolverstatisticscheckParameters= None


def collect_block_names(fname):
Expand Down Expand Up @@ -80,15 +88,19 @@ def generate_geos_tests(decks: Iterable[TestDeck]):
"""
for ii, deck in enumerate(decks):

restartcheck_params = None
curvecheck_params = None
restartcheck_params=None
curvecheck_params=None
performancecheck_params=None

if deck.restartcheck_params is not None:
restartcheck_params = deck.restartcheck_params.as_dict()

if deck.curvecheck_params is not None:
curvecheck_params = deck.curvecheck_params.as_dict()

if deck.performancecheck_params is not None:
performancecheck_params = deck.performancecheck_params.as_dict()

for partition in deck.partitions:
nx, ny, nz = partition
N = nx * ny * nz
Expand All @@ -111,7 +123,8 @@ def generate_geos_tests(decks: Iterable[TestDeck]):
y_partitions=ny,
z_partitions=nz,
restartcheck_params=restartcheck_params,
curvecheck_params=curvecheck_params)
curvecheck_params=curvecheck_params,
performancecheck_params=performancecheck_params)
]

if deck.restart_step > 0:
Expand Down
Loading