Skip to content

Commit

Permalink
Make s3 bucket name and s3 region required parameters in dashboard wr…
Browse files Browse the repository at this point in the history
…apper. Make hardware_metrics optional. Fix custom csv path issue when fetching from s3 (#46)

* Make s3 bucket name and s3 region required parameters in dashboard wrapper. Make hardware_metrics optional. Fix custom csv path issue when fetching from s3

* remove DEFAULT in csv path

* formatting

* change csv defaults

* ignore csv paths file + remove print statement

* address PR comments

* fix unittest

* ignore csv_paths file to prevent changing of defaults
  • Loading branch information
shreyash2106 authored Aug 10, 2023
1 parent 7256999 commit 36b4511
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ web_files
*.csv
coverage.json
results.xml
csv_paths.py

#Venv
.venv*/
Expand Down
17 changes: 9 additions & 8 deletions src/autogluon_dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
PER_DATA_COMP,
PER_DATASET_IDF,
)
from autogluon_dashboard.constants.aws_s3_constants import (
AGG_FRAMEWORK_DEFAULT_CSV_PATH,
HARDWARE_METRICS_DEFAULT_CSV_PATH,
PER_DATASET_DEFAULT_CSV_PATH,
from autogluon_dashboard.constants.csv_paths import (
AGG_FRAMEWORK_CSV_PATH,
HARDWARE_METRICS_CSV_PATH,
PER_DATASET_CSV_PATH,
)
from autogluon_dashboard.constants.df_constants import (
BESTDIFF,
Expand Down Expand Up @@ -77,9 +77,9 @@
from autogluon_dashboard.widgets.slider_widget import SliderWidget

# Load Data
dataset_file = os.environ.get("PER_DATASET_S3_PATH", PER_DATASET_DEFAULT_CSV_PATH)
aggregated_file = os.environ.get("AGG_DATASET_S3_PATH", AGG_FRAMEWORK_DEFAULT_CSV_PATH)
hardware_metrics_file = os.environ.get("HWARE_METRICS_S3_PATH", HARDWARE_METRICS_DEFAULT_CSV_PATH)
dataset_file = PER_DATASET_CSV_PATH
aggregated_file = AGG_FRAMEWORK_CSV_PATH
hardware_metrics_file = HARDWARE_METRICS_CSV_PATH
dataset_paths = [dataset_file, aggregated_file, hardware_metrics_file]
per_dataset_df, all_framework_df, hware_metrics_df = get_dataframes(dataset_paths)

Expand Down Expand Up @@ -122,9 +122,10 @@
per_dataset_csv_widget = FileDownloadWidget(file=PER_DATASET_DOWNLOAD_TITLE).create_widget()
all_framework_df.to_csv(AGG_FRAMEWORKS_DOWNLOAD_TITLE)
all_framework_csv_widget = FileDownloadWidget(file=AGG_FRAMEWORKS_DOWNLOAD_TITLE).create_widget()
hware_metrics_csv_widget = None
if hware_metrics_df is not None:
hware_metrics_df.to_csv(HARDWARE_METRICS_DOWNLOAD_TITLE)
hware_metrics_csv_widget = FileDownloadWidget(file=HARDWARE_METRICS_DOWNLOAD_TITLE).create_widget()
hware_metrics_csv_widget = FileDownloadWidget(file=HARDWARE_METRICS_DOWNLOAD_TITLE).create_widget()

df_framework_only = get_df_filter_by_framework(per_dataset_idf, frameworks_widget3)
prop_best = get_proportion_framework_rank1(df_framework_only, per_dataset_df, len(dataset_list))
Expand Down
11 changes: 0 additions & 11 deletions src/autogluon_dashboard/constants/aws_s3_constants.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,2 @@
S3_REGION = "us-east-2"
DEFAULT_BUCKET_NAME = "ag-dashboard-test" # TODO: Change default bucket name
CSV_FILES_DIR = "dev_data/"
PER_DATASET_DEFAULT_CSV_PATH = (
"https://d24iwcyhf6yavw.cloudfront.net/dev_data/all_data.csv" # TODO: Change default csv path
)
AGG_FRAMEWORK_DEFAULT_CSV_PATH = (
"https://d24iwcyhf6yavw.cloudfront.net/dev_data/autogluon.csv" # TODO: Change default csv path
)
HARDWARE_METRICS_DEFAULT_CSV_PATH = (
"https://d24iwcyhf6yavw.cloudfront.net/dev_data/hardware_metrics.csv" # TODO: Change default csv path
)
CLOUDFRONT_DOMAIN = "https://d24iwcyhf6yavw.cloudfront.net"
3 changes: 3 additions & 0 deletions src/autogluon_dashboard/constants/csv_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PER_DATASET_CSV_PATH = "https://d24iwcyhf6yavw.cloudfront.net/dev_data/all_data.csv"
AGG_FRAMEWORK_CSV_PATH = "https://d24iwcyhf6yavw.cloudfront.net/dev_data/autogluon.csv"
HARDWARE_METRICS_CSV_PATH = "https://d24iwcyhf6yavw.cloudfront.net/dev_data/hardware_metrics.csv"
55 changes: 23 additions & 32 deletions src/autogluon_dashboard/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
import boto3
import botocore

from autogluon_dashboard.constants.aws_s3_constants import (
CLOUDFRONT_DOMAIN,
CSV_FILES_DIR,
DEFAULT_BUCKET_NAME,
S3_REGION,
)
from autogluon_dashboard.constants.aws_s3_constants import CLOUDFRONT_DOMAIN, CSV_FILES_DIR


def upload_to_s3(s3_client: botocore.client, file_name: str, object_name: str, bucket_name: str, args: dict = None):
Expand Down Expand Up @@ -51,14 +46,15 @@ def get_args() -> argparse.Namespace:
parser.add_argument(
"--hware_metrics_csv",
type=str,
required=True,
help="Location of csv file of hardware metrics in local filesystem to upload to S3 bucket. Example: sub_folder/file_name.csv",
default="",
metavar="",
)

parser.add_argument(
"--s3_bucket",
type=str,
required=True,
help="Name of S3 bucket that results to aggregate get outputted to",
nargs="?",
metavar="",
Expand All @@ -74,6 +70,7 @@ def get_args() -> argparse.Namespace:
parser.add_argument(
"--s3_region",
type=str,
required=True,
help="S3 Region to deploy the dashboard website",
nargs="?",
metavar="",
Expand All @@ -85,7 +82,7 @@ def get_args() -> argparse.Namespace:

def run_dashboard():
args = get_args()
# Set variables to corrensponding command line args
# Set variables to corresponding command line args
per_dataset_csv_path = args.per_dataset_csv
aggregated_csv_path = args.agg_dataset_csv
hware_metrics_csv_path = args.hware_metrics_csv
Expand All @@ -96,24 +93,7 @@ def run_dashboard():
logger = logging.getLogger("dashboard-logger")
logging.basicConfig(level=logging.INFO)

if not bucket_name:
if region:
logger.warning(
"Cannot specify region if no bucket has been provded. Defaulting to AutoGluon bucket and region (%s)",
S3_REGION,
)
else:
logger.info(
"No bucket or region has been provided. Defaulting to AutoGluon bucket and region (%s)",
S3_REGION,
)
region = S3_REGION
bucket_name = DEFAULT_BUCKET_NAME # TODO: Change default bucket name
else:
if not region:
raise ValueError("You must specify a region if you provide a bucket")
else:
logger.info(f"You have specified Bucket {bucket_name} and Region {region}.")
logger.info(f"You have specified Bucket {bucket_name} and Region {region}.")

# Set S3 URL with appropriate bucket, region, and prefix
if prefix:
Expand All @@ -127,22 +107,32 @@ def run_dashboard():
per_dataset_s3_loc = CSV_FILES_DIR + "all_data.csv"
aggregated_s3_loc = CSV_FILES_DIR + "autogluon.csv"
hware_s3_loc = CSV_FILES_DIR + "hardware_metrics.csv"
os.environ["PER_DATASET_S3_PATH"] = CLOUDFRONT_DOMAIN + f"/{prefix}" + per_dataset_s3_loc
os.environ["AGG_DATASET_S3_PATH"] = CLOUDFRONT_DOMAIN + f"/{prefix}" + aggregated_s3_loc
os.environ["HWARE_METRICS_S3_PATH"] = CLOUDFRONT_DOMAIN + f"/{prefix}" + hware_s3_loc
PER_DATASET_CSV_PATH = CLOUDFRONT_DOMAIN + f"/{prefix}" + per_dataset_s3_loc
AGG_FRAMEWORK_CSV_PATH = CLOUDFRONT_DOMAIN + f"/{prefix}" + aggregated_s3_loc
HARDWARE_METRICS_CSV_PATH = CLOUDFRONT_DOMAIN + f"/{prefix}" + hware_s3_loc if hware_metrics_csv_path else ""
wrapper_dir = os.path.dirname(__file__)
csv_path_file_location = os.path.join(wrapper_dir, "constants/csv_paths.py")
# Write the CSV paths to a python file that can be accessed by app.py
# We do this instead of using an environment variable since the website runs in a separate web environment, which cannot be accessed preemptively using this code.
f = open(csv_path_file_location, "w")
f.write(f"PER_DATASET_CSV_PATH = " + '"' + PER_DATASET_CSV_PATH + '"')
f.write(f"\nAGG_FRAMEWORK_CSV_PATH = " + '"' + AGG_FRAMEWORK_CSV_PATH + '"')
f.write(f"\nHARDWARE_METRICS_CSV_PATH = " + '"' + HARDWARE_METRICS_CSV_PATH + '"')
f.close()

os.environ["BOKEH_PY_LOG_LEVEL"] = "error"

s3_client = boto3.client("s3")

# Upload CSV files to S3
upload_to_s3(s3_client, per_dataset_csv_path, prefix + per_dataset_s3_loc, bucket_name)
upload_to_s3(s3_client, aggregated_csv_path, prefix + aggregated_s3_loc, bucket_name)
upload_to_s3(s3_client, hware_metrics_csv_path, prefix + hware_s3_loc, bucket_name)
if hware_metrics_csv_path:
upload_to_s3(s3_client, hware_metrics_csv_path, prefix + hware_s3_loc, bucket_name)
logger.info(
f"Evaluation CSV files have been successfully uploaded to bucket - {bucket_name}, at locations: {s3_url + per_dataset_s3_loc}, {s3_url + aggregated_s3_loc}, and {s3_url + hware_s3_loc}.",
)

wrapper_dir = os.path.dirname(__file__)
agg_script_location = os.path.join(wrapper_dir, "utils/aggregate_file.py")
agg_file_location = os.path.join(wrapper_dir, "out.py")
# Aggregate all code into output file
Expand Down Expand Up @@ -176,8 +166,9 @@ def run_dashboard():
upload_to_s3(s3_client, os.path.join(web_files_dir, "out.js"), prefix + "out.js", bucket_name)
logger.info("WebAssembly files have been successfully uploaded to bucket - %s", bucket_name)

# TODO: Change website link to https using CloudFront
logger.info("The dashboard website is: " + f"{CLOUDFRONT_DOMAIN}/{prefix}out.html")
# Use print so that the GitHub Actions bash script can pick up the URL from the CLI
print("The dashboard website is: " + f"{CLOUDFRONT_DOMAIN}/{prefix}out.html")


if __name__ == "__main__":
Expand Down

0 comments on commit 36b4511

Please sign in to comment.