diff --git a/compliance/nvidia/README.md b/compliance/nvidia/README.md index 3abb256c1..91c8d8df7 100755 --- a/compliance/nvidia/README.md +++ b/compliance/nvidia/README.md @@ -30,12 +30,11 @@ The `run_verification.py` found in each test directory will copy the test files | model | Required Compliance Tests | ---- | ---- | -| resnet50-v1.5 | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) | -| retinanet 800x800 | [TEST01](./TEST01/), [TEST05](./TEST05/) | -| bert | [TEST01](./TEST01/), [TEST05](./TEST05/) | -| dlrm-v2 | [TEST01](./TEST01/), [TEST05](./TEST05/) | -| 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) | -| rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) | +| resnet50-v1.5 | [TEST01](./TEST01/), [TEST04](./TEST04/) | +| retinanet 800x800 | [TEST01](./TEST01/) | +| bert | [TEST01](./TEST01/) | +| dlrm-v2 | [TEST01](./TEST01/) | +| 3d-unet | [TEST01](./TEST01/) | | gpt-j | - | | stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/) | | Llama2-70b | [TEST06](./TEST06/) | diff --git a/compliance/nvidia/TEST05/README.md b/compliance/nvidia/TEST05/README.md deleted file mode 100755 index ca381cbee..000000000 --- a/compliance/nvidia/TEST05/README.md +++ /dev/null @@ -1,41 +0,0 @@ - -# Test 05 - Vary RNG seeds -## Introduction -The purpose of this test is to ensure that the SUT does not favor a particular set of Loadgen RNG seed values. The pass condition is that performance with non-default RNG seed values should be similar to the submitted performance. - -The seeds that are changed are listed below: - - qsl_rng_seed - determines order of samples in QSL - - sample_index_rng_seed - determines subset of samples in each loadable set - - schedule_rng_seed - determines scheduling of samples in server mode - -## Prerequisites -This script works best with Python 3.3 or later. -This script also assumes that the submission runs have already been run and that results comply with the submission directory structure as described in [https://github.com/mlperf/policies/blob/master/submission_rules.adoc#562-inference](https://github.com/mlperf/policies/blob/master/submission_rules.adoc#562-inference) -## Pass Criteria -Performance must be within 5% of the submission performance. In single stream mode, latencies can be very short for high performance systems and run-to-run variation due to external disturbances (OS) can be significant. In such cases and when submission latencies are less or equal to 0.2ms, the pass threshold is relaxed to 20%. - -## Instructions - -### Part I -Run the benchmark with the provided audit.config in the corresponding benchmark subdirectory. - -The audit.config file must be copied to the directory where the benchmark is being run from. Verification that audit.config was properly read can be done by checking that loadgen has found audit.config in mlperf_log_detail.txt - -Alternatively, you can alter the mlperf.conf (or the configuration file's copy your benchmark is using) by setting the value `*.*.test05 = 1`. For this option make sure you have the right values for `*.*.test05_qsl_rng_seed`, `*.*.test05_sample_index_rng_seed` and `*.*.test05_schedule_rng_seed` included in your configuration file. - -### Part II -Run the verification script: - `python3 run_verification.py -r RESULTS_DIR -c COMPLIANCE_DIR -o OUTPUT_DIR [--dtype {byte,float32,int32,int64}]` - -- RESULTS_DIR: Specifies the path to the corresponding results directory that contains the accuracy and performance subdirectories containing the submission logs, i.e. `inference_results_v0.7/closed/NVIDIA/results/GPU/resnet/Offline` -- COMPLIANCE_DIR: Specifies the path to the directory containing the logs from the compliance test run. -- OUTPUT_DIR: Specifies the path to the output directory where compliance logs will be uploaded from, i.e. `inference_results_v0.7/closed/NVIDIA/compliance/GPU/resnet/Offline` - -Expected outcome: - - Performance check pass: True - TEST05 verification complete - - - - diff --git a/compliance/nvidia/TEST05/audit.config b/compliance/nvidia/TEST05/audit.config deleted file mode 100644 index f6074948c..000000000 --- a/compliance/nvidia/TEST05/audit.config +++ /dev/null @@ -1,6 +0,0 @@ -# The format of this config file is 'key = value'. -# The key has the format 'model.scenario.key'. Value is mostly int64_t. -# Model maybe '*' as wildcard. In that case the value applies to all models. -# All times are in milli seconds - -*.*.test05 = 1 diff --git a/compliance/nvidia/TEST05/run_verification.py b/compliance/nvidia/TEST05/run_verification.py deleted file mode 100644 index 804155187..000000000 --- a/compliance/nvidia/TEST05/run_verification.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2018-2022 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -import os -import sys -import shutil -import subprocess -import argparse -import json - -import numpy as np - -sys.path.append(os.getcwd()) - - -def main(): - - py3 = sys.version_info >= (3, 0) - # Parse arguments to identify the path to the logs from the performance - # runs - parser = argparse.ArgumentParser() - parser.add_argument( - "--results_dir", - "-r", - help="Specifies the path to the corresponding results directory that contains the performance subdirectories containing the submission logs, i.e. inference_results_v0.7/closed/NVIDIA/results/T4x8/resnet/Offline.", - required=True, - ) - parser.add_argument( - "--compliance_dir", - "-c", - help="Specifies the path to the directory containing the logs from the compliance test run.", - required=True, - ) - parser.add_argument( - "--output_dir", - "-o", - help="Specifies the path to the output directory where compliance logs will be uploaded from, i.e. inference_results_v0.7/closed/NVIDIA/compliance/T4x8/resnet/Offline.", - required=True, - ) - - args = parser.parse_args() - - print("Parsing arguments.") - results_dir = args.results_dir - compliance_dir = args.compliance_dir - output_dir = os.path.join(args.output_dir, "TEST05") - - # run verify performance - verify_performance_binary = os.path.join( - os.path.dirname(__file__), "verify_performance.py" - ) - verify_performance_command = ( - "python3 " - + verify_performance_binary - + " -r " - + results_dir - + "/performance/run_1/mlperf_log_summary.txt" - + " -t " - + compliance_dir - + "/mlperf_log_summary.txt | tee verify_performance.txt" - ) - try: - os.system(verify_performance_command) - except Exception: - print( - "Exception occurred trying to execute:\n " + - verify_performance_command) - - # check if verify performance script passes - performance_pass_command = "grep PASS verify_performance.txt" - try: - performance_pass = "TEST PASS" in subprocess.check_output( - performance_pass_command, shell=True - ).decode("utf-8") - except Exception: - performance_pass = False - - # setup output compliance directory structure - output_performance_dir = os.path.join(output_dir, "performance", "run_1") - try: - if not os.path.isdir(output_performance_dir): - os.makedirs(output_performance_dir) - except Exception: - print("Exception occurred trying to create " + output_performance_dir) - - # copy compliance logs to output compliance directory - shutil.copy2("verify_performance.txt", output_dir) - summary_file = os.path.join(compliance_dir, "mlperf_log_summary.txt") - detail_file = os.path.join(compliance_dir, "mlperf_log_detail.txt") - - try: - shutil.copy2(summary_file, output_performance_dir) - except Exception: - print( - "Exception occured trying to copy " - + summary_file - + " to " - + output_performance_dir - ) - try: - shutil.copy2(detail_file, output_performance_dir) - except Exception: - print( - "Exception occured trying to copy " - + detail_file - + " to " - + output_performance_dir - ) - - print("Performance check pass: {:}".format(performance_pass)) - print("TEST05 verification complete") - - -if __name__ == "__main__": - main() diff --git a/compliance/nvidia/TEST05/verify_performance.py b/compliance/nvidia/TEST05/verify_performance.py deleted file mode 100644 index 864a8af44..000000000 --- a/compliance/nvidia/TEST05/verify_performance.py +++ /dev/null @@ -1,157 +0,0 @@ -#! /usr/bin/env python3 -# Copyright 2018-2022 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -import json -import argparse -import os -import sys -import re - -sys.path.append(os.getcwd()) - - -def main(): - # Parse arguments to identify the path to the accuracy logs from - # the accuracy and performance runs - parser = argparse.ArgumentParser() - parser.add_argument( - "--reference_summary", - "-r", - help="Specifies the path to the summary log for the performance run.", - default="", - ) - parser.add_argument( - "--test_summary", - "-t", - help="Specifies the path to the summary log for this test.", - default="", - ) - args = parser.parse_args() - - print("Verifying performance.") - ref_file = open(args.reference_summary, "r") - test_file = open(args.test_summary, "r") - ref_score = 0 - test_score = 0 - ref_mode = "" - test_mode = "" - - for line in ref_file: - if re.match("Scenario", line): - ref_mode = line.split(": ", 1)[1].strip() - continue - - if ref_mode == "SingleStream": - if re.match(".*Early stopping 90th percentile estimate", line): - ref_score = line.split(": ", 1)[1].strip() - continue - - if ref_mode == "MultiStream": - if re.match(".*Early stopping 99th percentile estimate", line): - ref_score = line.split(": ", 1)[1].strip() - continue - - if ref_mode == "Server": - if re.match("Completed samples per second", line): - ref_score = line.split(": ", 1)[1].strip() - continue - if re.match("target_latency (ns)", line): - ref_target_latency = line.split(": ", 1)[1].strip() - continue - - if ref_mode == "Offline": - if re.match("Samples per second", line): - ref_score = line.split(": ", 1)[1].strip() - continue - - if re.match("Result is", line): - valid = line.split(": ", 1)[1].strip() - if valid == "INVALID": - sys.exit("TEST FAIL: Reference results are invalid") - - if re.match("\\d+ ERROR", line): - error = line.split(" ", 1)[0].strip() - print("WARNING: " + error + " ERROR reported in reference results") - - for line in test_file: - if re.match("Scenario", line): - test_mode = line.split(": ", 1)[1].strip() - continue - - if test_mode == "SingleStream": - if re.match(".*Early stopping 90th percentile estimate", line): - test_score = line.split(": ", 1)[1].strip() - continue - - if test_mode == "MultiStream": - if re.match(".*Early stopping 99th percentile estimate", line): - test_score = line.split(": ", 1)[1].strip() - continue - - if test_mode == "Server": - if re.match("Completed samples per second", line): - test_score = line.split(": ", 1)[1].strip() - continue - if re.match("target_latency (ns)", line): - test_target_latency = line.split(": ", 1)[1].strip() - if test_target_latency != ref_target_latency: - print("TEST FAIL: Server target latency mismatch") - sys.exit() - continue - - if test_mode == "Offline": - if re.match("Samples per second", line): - test_score = line.split(": ", 1)[1].strip() - continue - - if re.match("Result is", line): - valid = line.split(": ", 1)[1].strip() - if valid == "INVALID": - sys.exit("TEST FAIL: Test results are invalid") - - if re.match("\\d+ ERROR", line): - error = line.split(" ", 1)[0].strip() - print("WARNING: " + error + " ERROR reported in test results") - - if test_mode != ref_mode: - sys.exit("Test and reference scenarios do not match!") - - print("reference score = {}".format(ref_score)) - print("test score = {}".format(test_score)) - - threshold = 0.05 - - # In single-/multi-stream mode, latencies can be very short for high performance systems - # and run-to-run variation due to external disturbances (OS) can be significant. - # In this case we relax pass threshold to 20% - if (ref_mode == "SingleStream" and float(ref_score) <= 200000) or ( - ref_mode == "MultiStream" and float(ref_score) <= 1600000 - ): - threshold = 0.20 - - if ( - ref_mode in ["Offline", "Server"] - and float(test_score) > float(ref_score) * (1 - threshold) - ) or ( - "Stream" in ref_mode and float(test_score) < float( - ref_score) * (1 + threshold) - ): - print("TEST PASS") - else: - print("TEST FAIL: Test score invalid") - - -if __name__ == "__main__": - main() diff --git a/text_to_image/requirements.txt b/text_to_image/requirements.txt index a0a850773..857de950e 100644 --- a/text_to_image/requirements.txt +++ b/text_to_image/requirements.txt @@ -1,8 +1,8 @@ -diffusers==0.21.2 -transformers==4.33.2 -accelerate==0.23.0 -open-clip-torch==2.7.0 -opencv-python==4.8.1.78 +diffusers==0.30.3 +transformers==4.45.2 +accelerate==1.0.1 +open-clip-torch==2.26.1 +opencv-python==4.10.0.84 pycocotools==2.0.7 -torchmetrics[image]==1.2.0 -scipy==1.9.1 +torchmetrics[image]==1.4.3 +scipy==1.10.1 diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 5f2e27267..a31a6fede 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -2906,7 +2906,7 @@ def check_compliance_dir( "llama2-70b-99.9", "stable-diffusion-xl", "mixtral-8x7b", - ]: + ] or config.version not in ["v4.0", "v4.1"]: test_list.remove("TEST05") if model in [