From 644eafe13ff1bc0bf83cc9f7e3f63a0f21c3f620 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 6 Oct 2023 10:28:50 -0500 Subject: [PATCH 1/3] Add quick demo --- single_stage_detector/mlcube/mlcube.yaml | 13 ++ .../scripts/download_openimages_demo.sh | 67 ++++++++++ single_stage_detector/ssd/run_demo.sh | 114 ++++++++++++++++++ 3 files changed, 194 insertions(+) create mode 100755 single_stage_detector/scripts/download_openimages_demo.sh create mode 100755 single_stage_detector/ssd/run_demo.sh diff --git a/single_stage_detector/mlcube/mlcube.yaml b/single_stage_detector/mlcube/mlcube.yaml index 2e1b412a3..e009982d2 100644 --- a/single_stage_detector/mlcube/mlcube.yaml +++ b/single_stage_detector/mlcube/mlcube.yaml @@ -42,3 +42,16 @@ tasks: log_dir: logs/ outputs: checker_logs_dir: checker_logs/ + download_demo: + entrypoint: ../scripts/download_openimages_demo.sh -a + parameters: + outputs: + data_dir: data/ + demo: + entrypoint: ./run_demo.sh -a + # torchrun --standalone --nnodes=1 train.py --epochs=1 --batch-size=16 --eval-batch-size=16 + parameters: + inputs: + data_dir: data/ + outputs: + log_dir: logs/ diff --git a/single_stage_detector/scripts/download_openimages_demo.sh b/single_stage_detector/scripts/download_openimages_demo.sh new file mode 100755 index 000000000..ed15a6537 --- /dev/null +++ b/single_stage_detector/scripts/download_openimages_demo.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +: "${DATASET_PATH:=/datasets/open-images-v6-mlperf}" + +while [ "$1" != "" ]; do + case $1 in + -d | --dataset-path) + shift + DATASET_PATH=$1 + ;; + --data_dir=*) + if [[ "$PWD" = /workspace/single_stage_detector/ssd ]]; then + cd ../scripts + DATASET_PATH="${1#*=}" + fi + ;; + esac + shift +done + +echo "saving to" +echo $DATASET_PATH + +MLPERF_CLASSES=('Airplane' 'Antelope' 'Apple' 'Backpack' 'Balloon' 'Banana' + 'Barrel' 'Baseball bat' 'Baseball glove' 'Bee' 'Beer' 'Bench' 'Bicycle' + 'Bicycle helmet' 'Bicycle wheel' 'Billboard' 'Book' 'Bookcase' 'Boot' + 'Bottle' 'Bowl' 'Bowling equipment' 'Box' 'Boy' 'Brassiere' 'Bread' + 'Broccoli' 'Bronze sculpture' 'Bull' 'Bus' 'Bust' 'Butterfly' 'Cabinetry' + 'Cake' 'Camel' 'Camera' 'Candle' 'Candy' 'Cannon' 'Canoe' 'Carrot' 'Cart' + 'Castle' 'Cat' 'Cattle' 'Cello' 'Chair' 'Cheese' 'Chest of drawers' 'Chicken' + 'Christmas tree' 'Coat' 'Cocktail' 'Coffee' 'Coffee cup' 'Coffee table' 'Coin' + 'Common sunflower' 'Computer keyboard' 'Computer monitor' 'Convenience store' + 'Cookie' 'Countertop' 'Cowboy hat' 'Crab' 'Crocodile' 'Cucumber' 'Cupboard' + 'Curtain' 'Deer' 'Desk' 'Dinosaur' 'Dog' 'Doll' 'Dolphin' 'Door' 'Dragonfly' + 'Drawer' 'Dress' 'Drum' 'Duck' 'Eagle' 'Earrings' 'Egg (Food)' 'Elephant' + 'Falcon' 'Fedora' 'Flag' 'Flowerpot' 'Football' 'Football helmet' 'Fork' + 'Fountain' 'French fries' 'French horn' 'Frog' 'Giraffe' 'Girl' 'Glasses' + 'Goat' 'Goggles' 'Goldfish' 'Gondola' 'Goose' 'Grape' 'Grapefruit' 'Guitar' + 'Hamburger' 'Handbag' 'Harbor seal' 'Headphones' 'Helicopter' 'High heels' + 'Hiking equipment' 'Horse' 'House' 'Houseplant' 'Human arm' 'Human beard' + 'Human body' 'Human ear' 'Human eye' 'Human face' 'Human foot' 'Human hair' + 'Human hand' 'Human head' 'Human leg' 'Human mouth' 'Human nose' 'Ice cream' + 'Jacket' 'Jeans' 'Jellyfish' 'Juice' 'Kitchen & dining room table' 'Kite' + 'Lamp' 'Lantern' 'Laptop' 'Lavender (Plant)' 'Lemon' 'Light bulb' 'Lighthouse' + 'Lily' 'Lion' 'Lipstick' 'Lizard' 'Man' 'Maple' 'Microphone' 'Mirror' + 'Mixing bowl' 'Mobile phone' 'Monkey' 'Motorcycle' 'Muffin' 'Mug' 'Mule' + 'Mushroom' 'Musical keyboard' 'Necklace' 'Nightstand' 'Office building' + 'Orange' 'Owl' 'Oyster' 'Paddle' 'Palm tree' 'Parachute' 'Parrot' 'Pen' + 'Penguin' 'Personal flotation device' 'Piano' 'Picture frame' 'Pig' 'Pillow' + 'Pizza' 'Plate' 'Platter' 'Porch' 'Poster' 'Pumpkin' 'Rabbit' 'Rifle' + 'Roller skates' 'Rose' 'Salad' 'Sandal' 'Saucer' 'Saxophone' 'Scarf' 'Sea lion' + 'Sea turtle' 'Sheep' 'Shelf' 'Shirt' 'Shorts' 'Shrimp' 'Sink' 'Skateboard' + 'Ski' 'Skull' 'Skyscraper' 'Snake' 'Sock' 'Sofa bed' 'Sparrow' 'Spider' 'Spoon' + 'Sports uniform' 'Squirrel' 'Stairs' 'Stool' 'Strawberry' 'Street light' + 'Studio couch' 'Suit' 'Sun hat' 'Sunglasses' 'Surfboard' 'Sushi' 'Swan' + 'Swimming pool' 'Swimwear' 'Tank' 'Tap' 'Taxi' 'Tea' 'Teddy bear' 'Television' + 'Tent' 'Tie' 'Tiger' 'Tin can' 'Tire' 'Toilet' 'Tomato' 'Tortoise' 'Tower' + 'Traffic light' 'Train' 'Tripod' 'Truck' 'Trumpet' 'Umbrella' 'Van' 'Vase' + 'Vehicle registration plate' 'Violin' 'Wall clock' 'Waste container' 'Watch' + 'Whale' 'Wheel' 'Wheelchair' 'Whiteboard' 'Window' 'Wine' 'Wine glass' 'Woman' + 'Zebra' 'Zucchini') + +python fiftyone_openimages.py \ + --dataset-dir=${DATASET_PATH} \ + --splits="test" \ + --output-labels="openimages-mlperf.json" \ + --classes "${MLPERF_CLASSES[@]}" diff --git a/single_stage_detector/ssd/run_demo.sh b/single_stage_detector/ssd/run_demo.sh new file mode 100755 index 000000000..85d6e110d --- /dev/null +++ b/single_stage_detector/ssd/run_demo.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# runs benchmark and reports time to convergence +# to use the script: +# run_and_time.sh + +set +x +set -e + +# Only rank print +[ "${SLURM_LOCALID-}" -ne 0 ] && set +x + + +# start timing +start=$(date +%s) +start_fmt=$(date +%Y-%m-%d\ %r) +echo "STARTING TIMING RUN AT $start_fmt" + +# Set variables +[ "${DEBUG}" = "1" ] && set -x +BATCHSIZE=${BATCHSIZE:-2} +EVALBATCHSIZE=${EVALBATCHSIZE:-${BATCHSIZE}} +NUMEPOCHS=${NUMEPOCHS:-30} +LOG_INTERVAL=${LOG_INTERVAL:-20} +DATASET_DIR=${DATASET_DIR:-"/datasets/open-images-v6-mlperf"} +TORCH_HOME=${TORCH_HOME:-"$(pwd)/torch-model-cache"} + +# Handle MLCube parameters +while [ $# -gt 0 ]; do + case "$1" in + --data_dir=*) + DATASET_DIR="${1#*=}" + ;; + --log_dir=*) + LOG_DIR="${1#*=}" + ;; + *) + esac + shift +done + + +# run benchmark +echo "running benchmark" + + + +declare -a CMD +if [ -n "${SLURM_LOCALID-}" ]; then + # Mode 1: Slurm launched a task for each GPU and set some envvars; no need for parallel launch + cluster='' + if [[ "${DGXSYSTEM}" == DGX2* ]]; then + cluster='circe' + fi + if [[ "${DGXSYSTEM}" == DGXA100* ]]; then + cluster='selene' + fi + if [ "${SLURM_NTASKS}" -gt "${SLURM_JOB_NUM_NODES}" ]; then + CMD=( './bind.sh' "--cluster=${cluster}" '--ib=single' '--' ${NSYSCMD} 'python' '-u' ) + else + CMD=( 'python' '-u' ) + fi +else + # Mode 2: Single-node Docker; need to launch tasks with torchrun + CMD=( "torchrun" "--standalone" "--nnodes=1" "--nproc_per_node=1" ) + [ "$MEMBIND" = false ] && CMD+=( "--no_membind" ) +fi + +PARAMS=( + --batch-size "${BATCHSIZE}" + --eval-batch-size "${EVALBATCHSIZE}" + --epochs "${NUMEPOCHS}" + --print-freq "${LOG_INTERVAL}" + --data-path "${DATASET_DIR}" +) + +# run training +"${CMD[@]}" train.py "${PARAMS[@]}" ${EXTRA_PARAMS} ; ret_code=$? + +# Copy log file to MLCube log folder +if [ "$LOG_DIR" != "" ]; then + timestamp=$(date +%Y%m%d_%H%M%S) + cp mlperf_compliance.log "$LOG_DIR/mlperf_compliance_$timestamp.log" +fi + +set +x + +sleep 3 +if [[ $ret_code != 0 ]]; then exit $ret_code; fi + +# end timing +end=$(date +%s) +end_fmt=$(date +%Y-%m-%d\ %r) +echo "ENDING TIMING RUN AT $end_fmt" + +# report result +result=$(( $end - $start )) +result_name="SINGLE_STAGE_DETECTOR" + +echo "RESULT,$result_name,,$result,nvidia,$start_fmt" From 4decb186c7759b9cf6632b2a2909b4d99c6d994c Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 13 Oct 2023 06:07:40 -0500 Subject: [PATCH 2/3] Fix demo data --- single_stage_detector/.dockerignore | 1 + single_stage_detector/mlcube/mlcube.yaml | 4 +- single_stage_detector/requirements.txt | 2 +- .../scripts/download_openimages_demo.sh | 47 ++----------------- 4 files changed, 9 insertions(+), 45 deletions(-) create mode 100644 single_stage_detector/.dockerignore diff --git a/single_stage_detector/.dockerignore b/single_stage_detector/.dockerignore new file mode 100644 index 000000000..0af9b975f --- /dev/null +++ b/single_stage_detector/.dockerignore @@ -0,0 +1 @@ +mlcube/workspace/ \ No newline at end of file diff --git a/single_stage_detector/mlcube/mlcube.yaml b/single_stage_detector/mlcube/mlcube.yaml index e009982d2..789007c3e 100644 --- a/single_stage_detector/mlcube/mlcube.yaml +++ b/single_stage_detector/mlcube/mlcube.yaml @@ -46,12 +46,12 @@ tasks: entrypoint: ../scripts/download_openimages_demo.sh -a parameters: outputs: - data_dir: data/ + data_dir: demo/ demo: entrypoint: ./run_demo.sh -a # torchrun --standalone --nnodes=1 train.py --epochs=1 --batch-size=16 --eval-batch-size=16 parameters: inputs: - data_dir: data/ + data_dir: demo/ outputs: log_dir: logs/ diff --git a/single_stage_detector/requirements.txt b/single_stage_detector/requirements.txt index ad2d26a46..35e86b944 100644 --- a/single_stage_detector/requirements.txt +++ b/single_stage_detector/requirements.txt @@ -1,6 +1,6 @@ scikit-image>=0.15.0 ujson>=4.0.2 matplotlib>=3.5.1 -pycocotools==2.0.4 +pycocotools>=2.0.4 git+https://github.com/mlcommons/logging.git@1.1.0-rc4 fiftyone==0.15.1 diff --git a/single_stage_detector/scripts/download_openimages_demo.sh b/single_stage_detector/scripts/download_openimages_demo.sh index ed15a6537..ed5b8d162 100755 --- a/single_stage_detector/scripts/download_openimages_demo.sh +++ b/single_stage_detector/scripts/download_openimages_demo.sh @@ -20,48 +20,11 @@ done echo "saving to" echo $DATASET_PATH +ls $DATASET_PATH -MLPERF_CLASSES=('Airplane' 'Antelope' 'Apple' 'Backpack' 'Balloon' 'Banana' - 'Barrel' 'Baseball bat' 'Baseball glove' 'Bee' 'Beer' 'Bench' 'Bicycle' - 'Bicycle helmet' 'Bicycle wheel' 'Billboard' 'Book' 'Bookcase' 'Boot' - 'Bottle' 'Bowl' 'Bowling equipment' 'Box' 'Boy' 'Brassiere' 'Bread' - 'Broccoli' 'Bronze sculpture' 'Bull' 'Bus' 'Bust' 'Butterfly' 'Cabinetry' - 'Cake' 'Camel' 'Camera' 'Candle' 'Candy' 'Cannon' 'Canoe' 'Carrot' 'Cart' - 'Castle' 'Cat' 'Cattle' 'Cello' 'Chair' 'Cheese' 'Chest of drawers' 'Chicken' - 'Christmas tree' 'Coat' 'Cocktail' 'Coffee' 'Coffee cup' 'Coffee table' 'Coin' - 'Common sunflower' 'Computer keyboard' 'Computer monitor' 'Convenience store' - 'Cookie' 'Countertop' 'Cowboy hat' 'Crab' 'Crocodile' 'Cucumber' 'Cupboard' - 'Curtain' 'Deer' 'Desk' 'Dinosaur' 'Dog' 'Doll' 'Dolphin' 'Door' 'Dragonfly' - 'Drawer' 'Dress' 'Drum' 'Duck' 'Eagle' 'Earrings' 'Egg (Food)' 'Elephant' - 'Falcon' 'Fedora' 'Flag' 'Flowerpot' 'Football' 'Football helmet' 'Fork' - 'Fountain' 'French fries' 'French horn' 'Frog' 'Giraffe' 'Girl' 'Glasses' - 'Goat' 'Goggles' 'Goldfish' 'Gondola' 'Goose' 'Grape' 'Grapefruit' 'Guitar' - 'Hamburger' 'Handbag' 'Harbor seal' 'Headphones' 'Helicopter' 'High heels' - 'Hiking equipment' 'Horse' 'House' 'Houseplant' 'Human arm' 'Human beard' - 'Human body' 'Human ear' 'Human eye' 'Human face' 'Human foot' 'Human hair' - 'Human hand' 'Human head' 'Human leg' 'Human mouth' 'Human nose' 'Ice cream' - 'Jacket' 'Jeans' 'Jellyfish' 'Juice' 'Kitchen & dining room table' 'Kite' - 'Lamp' 'Lantern' 'Laptop' 'Lavender (Plant)' 'Lemon' 'Light bulb' 'Lighthouse' - 'Lily' 'Lion' 'Lipstick' 'Lizard' 'Man' 'Maple' 'Microphone' 'Mirror' - 'Mixing bowl' 'Mobile phone' 'Monkey' 'Motorcycle' 'Muffin' 'Mug' 'Mule' - 'Mushroom' 'Musical keyboard' 'Necklace' 'Nightstand' 'Office building' - 'Orange' 'Owl' 'Oyster' 'Paddle' 'Palm tree' 'Parachute' 'Parrot' 'Pen' - 'Penguin' 'Personal flotation device' 'Piano' 'Picture frame' 'Pig' 'Pillow' - 'Pizza' 'Plate' 'Platter' 'Porch' 'Poster' 'Pumpkin' 'Rabbit' 'Rifle' - 'Roller skates' 'Rose' 'Salad' 'Sandal' 'Saucer' 'Saxophone' 'Scarf' 'Sea lion' - 'Sea turtle' 'Sheep' 'Shelf' 'Shirt' 'Shorts' 'Shrimp' 'Sink' 'Skateboard' - 'Ski' 'Skull' 'Skyscraper' 'Snake' 'Sock' 'Sofa bed' 'Sparrow' 'Spider' 'Spoon' - 'Sports uniform' 'Squirrel' 'Stairs' 'Stool' 'Strawberry' 'Street light' - 'Studio couch' 'Suit' 'Sun hat' 'Sunglasses' 'Surfboard' 'Sushi' 'Swan' - 'Swimming pool' 'Swimwear' 'Tank' 'Tap' 'Taxi' 'Tea' 'Teddy bear' 'Television' - 'Tent' 'Tie' 'Tiger' 'Tin can' 'Tire' 'Toilet' 'Tomato' 'Tortoise' 'Tower' - 'Traffic light' 'Train' 'Tripod' 'Truck' 'Trumpet' 'Umbrella' 'Van' 'Vase' - 'Vehicle registration plate' 'Violin' 'Wall clock' 'Waste container' 'Watch' - 'Whale' 'Wheel' 'Wheelchair' 'Whiteboard' 'Window' 'Wine' 'Wine glass' 'Woman' - 'Zebra' 'Zucchini') +MLPERF_CLASSES=('Apple' 'Banana' 'Orange') python fiftyone_openimages.py \ - --dataset-dir=${DATASET_PATH} \ - --splits="test" \ - --output-labels="openimages-mlperf.json" \ - --classes "${MLPERF_CLASSES[@]}" + --dataset-dir=${DATASET_PATH} \ + --output-labels="openimages-mlperf.json" \ + --classes "${MLPERF_CLASSES[@]}" \ No newline at end of file From 6c799f02a8d4ea72dd9db5d50952b851f3a07584 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 23 Oct 2023 17:29:19 -0500 Subject: [PATCH 3/3] Reduce demo time --- single_stage_detector/scripts/download_openimages_demo.sh | 8 ++++---- single_stage_detector/ssd/run_demo.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/single_stage_detector/scripts/download_openimages_demo.sh b/single_stage_detector/scripts/download_openimages_demo.sh index ed5b8d162..3d73ac4f3 100755 --- a/single_stage_detector/scripts/download_openimages_demo.sh +++ b/single_stage_detector/scripts/download_openimages_demo.sh @@ -22,9 +22,9 @@ echo "saving to" echo $DATASET_PATH ls $DATASET_PATH -MLPERF_CLASSES=('Apple' 'Banana' 'Orange') +MLPERF_CLASSES=('Apple' 'Banana') python fiftyone_openimages.py \ - --dataset-dir=${DATASET_PATH} \ - --output-labels="openimages-mlperf.json" \ - --classes "${MLPERF_CLASSES[@]}" \ No newline at end of file + --dataset-dir=${DATASET_PATH} \ + --output-labels="openimages-mlperf.json" \ + --classes "${MLPERF_CLASSES[@]}" diff --git a/single_stage_detector/ssd/run_demo.sh b/single_stage_detector/ssd/run_demo.sh index 85d6e110d..43be5a0ad 100755 --- a/single_stage_detector/ssd/run_demo.sh +++ b/single_stage_detector/ssd/run_demo.sh @@ -32,9 +32,9 @@ echo "STARTING TIMING RUN AT $start_fmt" # Set variables [ "${DEBUG}" = "1" ] && set -x -BATCHSIZE=${BATCHSIZE:-2} +BATCHSIZE=${BATCHSIZE:-4} EVALBATCHSIZE=${EVALBATCHSIZE:-${BATCHSIZE}} -NUMEPOCHS=${NUMEPOCHS:-30} +NUMEPOCHS=${NUMEPOCHS:-1} LOG_INTERVAL=${LOG_INTERVAL:-20} DATASET_DIR=${DATASET_DIR:-"/datasets/open-images-v6-mlperf"} TORCH_HOME=${TORCH_HOME:-"$(pwd)/torch-model-cache"}