diff --git a/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-inference.Dockerfile b/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-inference.Dockerfile new file mode 100644 index 000000000..92af5b531 --- /dev/null +++ b/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-inference.Dockerfile @@ -0,0 +1,79 @@ +# Copyright (c) 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG PYTORCH_IMAGE="model-zoo" +ARG PYTORCH_TAG="pytorch-ipex-spr" + +FROM ${PYTORCH_IMAGE}:${PYTORCH_TAG} AS intel-optimized-pytorch + +# Build Torch Vision +ARG TORCHVISION_VERSION=v0.8.0 + +RUN source ~/anaconda3/bin/activate pytorch && \ + git clone https://github.com/pytorch/vision && \ + cd vision && \ + git checkout ${TORCHVISION_VERSION} && \ + python setup.py install + +RUN source ~/anaconda3/bin/activate pytorch && \ + pip install matplotlib Pillow pycocotools && \ + pip install yacs opencv-python cityscapesscripts transformers && \ + conda install -y libopenblas psutil && \ + cd /workspace/installs && \ + wget https://github.com/gperftools/gperftools/releases/download/gperftools-2.7.90/gperftools-2.7.90.tar.gz && \ + tar -xzf gperftools-2.7.90.tar.gz && \ + cd gperftools-2.7.90 && \ + ./configure --prefix=$HOME/.local && \ + make && \ + make install && \ + rm -rf /workspace/installs/ + +ARG PACKAGE_DIR=model_packages + +ARG PACKAGE_NAME="pytorch-spr-ssd-resnet34-inference" + +ARG MODEL_WORKSPACE + +# ${MODEL_WORKSPACE} and below needs to be owned by root:root rather than the current UID:GID +# this allows the default user (root) to work in k8s single-node, multi-node +RUN umask 002 && mkdir -p ${MODEL_WORKSPACE} && chgrp root ${MODEL_WORKSPACE} && chmod g+s+w,o+s+r ${MODEL_WORKSPACE} + +ADD --chown=0:0 ${PACKAGE_DIR}/${PACKAGE_NAME}.tar.gz ${MODEL_WORKSPACE} + +RUN chown -R root ${MODEL_WORKSPACE}/${PACKAGE_NAME} && chgrp -R root ${MODEL_WORKSPACE}/${PACKAGE_NAME} && chmod -R g+s+w ${MODEL_WORKSPACE}/${PACKAGE_NAME} && find ${MODEL_WORKSPACE}/${PACKAGE_NAME} -type d | xargs chmod o+r+x + +WORKDIR ${MODEL_WORKSPACE}/${PACKAGE_NAME} + +FROM intel-optimized-pytorch AS release +COPY --from=intel-optimized-pytorch /root/anaconda3 /root/anaconda3 +COPY --from=intel-optimized-pytorch /workspace/lib/ /workspace/lib/ +COPY --from=intel-optimized-pytorch /root/.local/ /root/.local/ + +ENV DNNL_MAX_CPU_ISA="AVX512_CORE_AMX" + +ENV PATH="~/anaconda3/bin:${PATH}" +ENV LD_PRELOAD="/workspace/lib/jemalloc/lib/libjemalloc.so:$LD_PRELOAD" +ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" +ENV BASH_ENV=/root/.bash_profile +WORKDIR /workspace/ +RUN yum install -y numactl mesa-libGL && \ + yum clean all && \ + echo "source activate pytorch" >> /root/.bash_profile diff --git a/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-training.Dockerfile b/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-training.Dockerfile new file mode 100644 index 000000000..1f07444c9 --- /dev/null +++ b/dockerfiles/pytorch/pytorch-spr-ssd-resnet34-training.Dockerfile @@ -0,0 +1,88 @@ +# Copyright (c) 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG PYTORCH_IMAGE="model-zoo" +ARG PYTORCH_TAG="pytorch-ipex-spr" + +FROM ${PYTORCH_IMAGE}:${PYTORCH_TAG} AS intel-optimized-pytorch + +RUN source ~/anaconda3/bin/activate pytorch && \ + pip install matplotlib Pillow pycocotools && \ + pip install yacs opencv-python cityscapesscripts transformers && \ + conda install -y libopenblas psutil && \ + cd /workspace/installs && \ + wget https://github.com/gperftools/gperftools/releases/download/gperftools-2.7.90/gperftools-2.7.90.tar.gz && \ + tar -xzf gperftools-2.7.90.tar.gz && \ + cd gperftools-2.7.90 && \ + ./configure --prefix=$HOME/.local && \ + make && \ + make install && \ + rm -rf /workspace/installs/ + +ARG PACKAGE_DIR=model_packages + +ARG PACKAGE_NAME="pytorch-spr-ssd-resnet34-training" + +ARG MODEL_WORKSPACE + +# ${MODEL_WORKSPACE} and below needs to be owned by root:root rather than the current UID:GID +# this allows the default user (root) to work in k8s single-node, multi-node +RUN umask 002 && mkdir -p ${MODEL_WORKSPACE} && chgrp root ${MODEL_WORKSPACE} && chmod g+s+w,o+s+r ${MODEL_WORKSPACE} + +ADD --chown=0:0 ${PACKAGE_DIR}/${PACKAGE_NAME}.tar.gz ${MODEL_WORKSPACE} + +RUN chown -R root ${MODEL_WORKSPACE}/${PACKAGE_NAME} && chgrp -R root ${MODEL_WORKSPACE}/${PACKAGE_NAME} && chmod -R g+s+w ${MODEL_WORKSPACE}/${PACKAGE_NAME} && find ${MODEL_WORKSPACE}/${PACKAGE_NAME} -type d | xargs chmod o+r+x + +WORKDIR ${MODEL_WORKSPACE}/${PACKAGE_NAME} + +RUN source ~/anaconda3/bin/activate pytorch && \ + pip install --upgrade pip && \ + pip install --no-cache-dir https://github.com/mlperf/logging/archive/9ea0afa.zip && \ + pip install --no-cache-dir \ + Cython==0.28.4 \ + git+http://github.com/NVIDIA/apex.git@9041a868a1a253172d94b113a963375b9badd030#egg=apex \ + mlperf-compliance==0.0.10 \ + cycler==0.10.0 \ + kiwisolver==1.0.1 \ + matplotlib==2.2.2 \ + Pillow==5.2.0 \ + pyparsing==2.2.0 \ + python-dateutil==2.7.3 \ + pytz==2018.5 \ + six==1.11.0 \ + torchvision==0.2.1 \ + pycocotools==2.0.2 + +FROM intel-optimized-pytorch AS release +COPY --from=intel-optimized-pytorch /root/anaconda3 /root/anaconda3 +COPY --from=intel-optimized-pytorch /workspace/lib/ /workspace/lib/ +COPY --from=intel-optimized-pytorch /root/.local/ /root/.local/ + +ENV DNNL_MAX_CPU_ISA="AVX512_CORE_AMX" + +ENV PATH="~/anaconda3/bin:${PATH}" +ENV LD_PRELOAD="/workspace/lib/jemalloc/lib/libjemalloc.so:$LD_PRELOAD" +ENV MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" +ENV BASH_ENV=/root/.bash_profile +WORKDIR /workspace/ +RUN yum install -y numactl mesa-libGL && \ + yum clean all && \ + echo "source activate pytorch" >> /root/.bash_profile diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/container_build.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/container_build.md new file mode 100644 index 000000000..9b4f9b303 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/container_build.md @@ -0,0 +1,26 @@ +## Build the container + +The package has scripts and a Dockerfile that are +used to build a workload container that runs the model. This container +uses the PyTorch/IPEX container as it's base, so ensure that you have built +the `pytorch-ipex-spr.tar.gz` container prior to building this model container. + +Use `docker images` to verify that you have the base container built. For example: +``` +$ docker images | grep pytorch-ipex-spr +model-zoo pytorch-ipex-spr fecc7096a11e 40 minutes ago 8.31GB +``` + +To build the container, extract the package and +run the `build.sh` script. +``` +# Extract the package +tar -xzf +cd + +# Build the container +./build.sh +``` + +After the build completes, you should have a container called +`` that will be used to run the model. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/datasets.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/datasets.md new file mode 100644 index 000000000..78335650c --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/datasets.md @@ -0,0 +1,33 @@ +## Datasets + +### COCO + +The [COCO dataset](https://cocodataset.org) is used to run . + +Download and extract the 2017 training/validation images and annotations from the +[COCO dataset website](https://cocodataset.org/#download) to a `coco` folder +and unzip the files. After extracting the zip files, your dataset directory +structure should look something like this: +``` +coco +├── annotations +│ ├── captions_train2017.json +│ ├── captions_val2017.json +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ ├── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── train2017 +│ ├── 000000454854.jpg +│ ├── 000000137045.jpg +│ ├── 000000129582.jpg +│ └── ... +└── val2017 + ├── 000000000139.jpg + ├── 000000000285.jpg + ├── 000000000632.jpg + └── ... +``` +The parent of the `annotations`, `train2017`, and `val2017` directory (in this example `coco`) +is the directory that should be used when setting the `DATASET_DIR` environment +variable for (for example: `export DATASET_DIR=/home//coco`). diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/description.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/description.md new file mode 100644 index 000000000..49f899094 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/description.md @@ -0,0 +1,5 @@ + +## Description + +This document has instructions for running using +Intel-optimized PyTorch. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/docker_spr.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/docker_spr.md new file mode 100644 index 000000000..7d3040e45 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/docker_spr.md @@ -0,0 +1,33 @@ +## Run the model + +Download the pretrained model weights using the script from the MLPerf repo +and set the `PRETRAINED_MODEL` environment variable to point to the downloaded file: +``` +wget https://raw.githubusercontent.com/mlcommons/inference/v0.7/others/cloud/single_stage_detector/download_model.sh +sh download_model.sh +export PRETRAINED_MODEL=$(pwd)/pretrained/resnet34-ssd1200.pth +``` + +After downloading the pretrained model and following the instructions to +[build the container](#build-the-container) and [prepare the dataset](#datasets), +use the `run.sh` script from the container package to run +using docker. Set environment variables to specify the dataset directory, +precision to run, and an output directory for logs. By default, the `run.sh` +script will run the `inference_realtime.sh` quickstart script. To run a different +script, specify the name of the script using the `SCRIPT` environment variable. +``` +# Navigate to the container package directory +cd + +# Set the required environment vars +export DATASET_DIR= +export PRETRAINED_MODEL= +export PRECISION= +export OUTPUT_DIR= + +# Run the container with inference_realtime.sh quickstart script +./run.sh + +# To run a difference quickstart script, us the SCRIPT env var +SCRIPT=accuracy.sh ./run.sh +``` diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/license.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/license.md new file mode 100644 index 000000000..e547f148d --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/license.md @@ -0,0 +1,4 @@ + +## License + +Licenses can be found in the model package, in the `licenses` directory. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/quickstart.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/quickstart.md new file mode 100644 index 000000000..02d01973f --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/quickstart.md @@ -0,0 +1,8 @@ + +## Quick Start Scripts + +| Script name | Description | +|-------------|-------------| +| `inference_realtime.sh` | Runs multi instance realtime inference using 4 cores per instance for the specified precision (fp32, int8 or bf16). | +| `inference_throughput.sh` | Runs multi instance batch inference using 1 instance per socket for the specified precision (fp32, int8 or bf16). | +| `accuracy.sh` | Measures the inference accuracy (providing a `DATASET_DIR` environment variable is required) for the specified precision (fp32, int8 or bf16). | diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/title.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/title.md new file mode 100644 index 000000000..a4ccbf863 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/title.md @@ -0,0 +1,2 @@ + +# PyTorch diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/wrapper_package.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/wrapper_package.md new file mode 100644 index 000000000..0d77199f3 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/wrapper_package.md @@ -0,0 +1,16 @@ +## Model Package + +The model package includes the Dockerfile and scripts needed to build and +run in a container. +``` + +├── README.md +├── build.sh +├── licenses +│   ├── LICENSE +│   └── third_party +├── model_packages +│   └── +├── .Dockerfile +└── run.sh +``` diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/README_SPR.md b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/README_SPR.md new file mode 100644 index 000000000..f51bb7842 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/README_SPR.md @@ -0,0 +1,135 @@ + +# PyTorch ssd-resnet34 inference + + +## Description + +This document has instructions for running ssd-resnet34 inference using +Intel-optimized PyTorch. + +## Model Package + +The model package includes the Dockerfile and scripts needed to build and +run ssd-resnet34 inference in a container. +``` +pytorch-spr-ssd-resnet34-inference +├── README.md +├── build.sh +├── licenses +│   ├── LICENSE +│   └── third_party +├── model_packages +│   └── pytorch-spr-ssd-resnet34-inference.tar.gz +├── pytorch-spr-ssd-resnet34-inference.Dockerfile +└── run.sh +``` + + +## Quick Start Scripts + +| Script name | Description | +|-------------|-------------| +| `inference_realtime.sh` | Runs multi instance realtime inference using 4 cores per instance for the specified precision (fp32, int8 or bf16). | +| `inference_throughput.sh` | Runs multi instance batch inference using 1 instance per socket for the specified precision (fp32, int8 or bf16). | +| `accuracy.sh` | Measures the inference accuracy (providing a `DATASET_DIR` environment variable is required) for the specified precision (fp32, int8 or bf16). | + +## Datasets + +### COCO + +The [COCO dataset](https://cocodataset.org) is used to run ssd-resnet34. + +Download and extract the 2017 training/validation images and annotations from the +[COCO dataset website](https://cocodataset.org/#download) to a `coco` folder +and unzip the files. After extracting the zip files, your dataset directory +structure should look something like this: +``` +coco +├── annotations +│ ├── captions_train2017.json +│ ├── captions_val2017.json +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ ├── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── train2017 +│ ├── 000000454854.jpg +│ ├── 000000137045.jpg +│ ├── 000000129582.jpg +│ └── ... +└── val2017 + ├── 000000000139.jpg + ├── 000000000285.jpg + ├── 000000000632.jpg + └── ... +``` +The parent of the `annotations`, `train2017`, and `val2017` directory (in this example `coco`) +is the directory that should be used when setting the `DATASET_DIR` environment +variable for ssd-resnet34 (for example: `export DATASET_DIR=/home//coco`). + +## Build the container + +The ssd-resnet34 inference package has scripts and a Dockerfile that are +used to build a workload container that runs the model. This container +uses the PyTorch/IPEX container as it's base, so ensure that you have built +the `pytorch-ipex-spr.tar.gz` container prior to building this model container. + +Use `docker images` to verify that you have the base container built. For example: +``` +$ docker images | grep pytorch-ipex-spr +model-zoo pytorch-ipex-spr fecc7096a11e 40 minutes ago 8.31GB +``` + +To build the ssd-resnet34 inference container, extract the package and +run the `build.sh` script. +``` +# Extract the package +tar -xzf pytorch-spr-ssd-resnet34-inference.tar.gz +cd pytorch-spr-ssd-resnet34-inference + +# Build the container +./build.sh +``` + +After the build completes, you should have a container called +`model-zoo:pytorch-spr-ssd-resnet34-inference` that will be used to run the model. + +## Run the model + +Download the pretrained model weights using the script from the MLPerf repo +and set the `PRETRAINED_MODEL` environment variable to point to the downloaded file: +``` +wget https://raw.githubusercontent.com/mlcommons/inference/v0.7/others/cloud/single_stage_detector/download_model.sh +sh download_model.sh +export PRETRAINED_MODEL=$(pwd)/pretrained/resnet34-ssd1200.pth +``` + +After downloading the pretrained model and following the instructions to +[build the container](#build-the-container) and [prepare the dataset](#datasets), +use the `run.sh` script from the container package to run ssd-resnet34 inference +using docker. Set environment variables to specify the dataset directory, +precision to run, and an output directory for logs. By default, the `run.sh` +script will run the `inference_realtime.sh` quickstart script. To run a different +script, specify the name of the script using the `SCRIPT` environment variable. +``` +# Navigate to the container package directory +cd pytorch-spr-ssd-resnet34-inference + +# Set the required environment vars +export DATASET_DIR= +export PRETRAINED_MODEL= +export PRECISION= +export OUTPUT_DIR= + +# Run the container with inference_realtime.sh quickstart script +./run.sh + +# To run a difference quickstart script, us the SCRIPT env var +SCRIPT=accuracy.sh ./run.sh +``` + + +## License + +Licenses can be found in the model package, in the `licenses` directory. + diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/accuracy.sh b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/accuracy.sh new file mode 100755 index 000000000..477aea4ff --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/accuracy.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +MODEL_DIR=${MODEL_DIR-$PWD} + +echo "DATASET_DIR: ${DATASET_DIR}" +echo "PRETRAINED_MODEL: ${PRETRAINED_MODEL}" +echo "PRECISION: ${PRECISION}" +echo "OUTPUT_DIR: ${OUTPUT_DIR}" + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +# Create the output directory in case it doesn't already exist +mkdir -p ${OUTPUT_DIR} + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ ! -d "${DATASET_DIR}" ]; then + echo "The DATASET_DIR '${DATASET_DIR}' does not exist" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + echo "Please set PRECISION to fp32, int8, or bf16." + exit 1 +fi + +cd ${MODEL_DIR}/models/ssd/inference/others/cloud/single_stage_detector/pytorch + +# Set env vars that the bash script looks for +export DATA_DIR=${DATASET_DIR} +export MODEL_DIR=${PRETRAINED_MODEL} +export work_space=${OUTPUT_DIR} + +if [[ $PRECISION == "int8" ]]; then + bash run_and_time_accuracy_cpu.sh int8 jit ./pytorch_default_recipe_ssd_configure.json 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-accuracy-int8.log +elif [[ $PRECISION == "bf16" ]]; then + bash run_and_time_accuracy_cpu.sh bf16 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-accuracy-bf16.log +elif [[ $PRECISION == "fp32" ]]; then + bash run_and_time_accuracy_cpu.sh fp32 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-accuracy-fp32.log +else + echo "The specified precision '${PRECISION}' is unsupported." + echo "Supported precisions are: fp32, bf16, and int8" + exit 1 +fi diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/build.sh b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/build.sh new file mode 100755 index 000000000..a1862546c --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/build.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +PACKAGE_NAME=pytorch-spr-ssd-resnet34-inference +DOCKERFILE=pytorch-spr-ssd-resnet34-inference.Dockerfile +PYTORCH_BASE_IMAGE=${PYTORCH_BASE_IMAGE:-model-zoo} +PYTORCH_BASE_TAG=${PYTORCH_BASE_TAG:-pytorch-ipex-spr} +IMAGE_NAME=${IMAGE_NAME:-model-zoo:pytorch-spr-ssd-resnet34-inference} + +if [ "$(docker images -q ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_TAG})" == "" ]; then + echo "The Intel(R) Extension for PyTorch container (${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_TAG}) was not found." + echo "This container is required, as it is used as the base for building the ssd-resnet34 inference container." + echo "Please download the IPEX container package and build the image and then retry this build." + exit 1 +fi + +docker build --build-arg PYTORCH_IMAGE=${PYTORCH_BASE_IMAGE} \ + --build-arg PYTORCH_TAG=${PYTORCH_BASE_TAG}\ + --build-arg PACKAGE_NAME=$PACKAGE_NAME \ + --build-arg MODEL_WORKSPACE=/workspace \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + --build-arg no_proxy=$no_proxy \ + -t $IMAGE_NAME \ + -f $DOCKERFILE . diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_realtime.sh b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_realtime.sh new file mode 100755 index 000000000..13d0ccaa1 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_realtime.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +MODEL_DIR=${MODEL_DIR-$PWD} + +echo "DATASET_DIR: ${DATASET_DIR}" +echo "PRETRAINED_MODEL: ${PRETRAINED_MODEL}" +echo "PRECISION: ${PRECISION}" +echo "OUTPUT_DIR: ${OUTPUT_DIR}" + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +# Create the output directory in case it doesn't already exist +mkdir -p ${OUTPUT_DIR} + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ ! -d "${DATASET_DIR}" ]; then + echo "The DATASET_DIR '${DATASET_DIR}' does not exist" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + echo "Please set PRECISION to fp32, int8, or bf16." + exit 1 +fi + +cd ${MODEL_DIR}/models/ssd/inference/others/cloud/single_stage_detector/pytorch + +# Set env vars that the bash script looks for +export DATA_DIR=${DATASET_DIR} +export MODEL_DIR=${PRETRAINED_MODEL} +export work_space=${OUTPUT_DIR} + +if [[ $PRECISION == "int8" ]]; then + bash run_multi_instance_ipex.sh int8 jit ./pytorch_default_recipe_ssd_configure.json 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-throughput-int8.log +elif [[ $PRECISION == "bf16" ]]; then + bash run_multi_instance_ipex.sh bf16 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-throughput-bf16.log +elif [[ $PRECISION == "fp32" ]]; then + bash run_multi_instance_ipex.sh fp32 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-throughput-fp32.log +else + echo "The specified precision '${PRECISION}' is unsupported." + echo "Supported precisions are: fp32, bf16, and int8" + exit 1 +fi diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_throughput.sh b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_throughput.sh new file mode 100755 index 000000000..944cc082d --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_throughput.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +MODEL_DIR=${MODEL_DIR-$PWD} + +echo "DATASET_DIR: ${DATASET_DIR}" +echo "PRETRAINED_MODEL: ${PRETRAINED_MODEL}" +echo "PRECISION: ${PRECISION}" +echo "OUTPUT_DIR: ${OUTPUT_DIR}" + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +# Create the output directory in case it doesn't already exist +mkdir -p ${OUTPUT_DIR} + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ ! -d "${DATASET_DIR}" ]; then + echo "The DATASET_DIR '${DATASET_DIR}' does not exist" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + echo "Please set PRECISION to fp32, int8, or bf16." + exit 1 +fi + +cd ${MODEL_DIR}/models/ssd/inference/others/cloud/single_stage_detector/pytorch + +# Set env vars that the bash script looks for +export DATA_DIR=${DATASET_DIR} +export MODEL_DIR=${PRETRAINED_MODEL} +export work_space=${OUTPUT_DIR} + +if [[ $PRECISION == "int8" ]]; then + bash run_multi_instance_latency_ipex.sh int8 jit ./pytorch_default_recipe_ssd_configure.json 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-realtime-int8.log +elif [[ $PRECISION == "bf16" ]]; then + bash run_multi_instance_latency_ipex.sh bf16 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-realtime-bf16.log +elif [[ $PRECISION == "fp32" ]]; then + bash run_multi_instance_latency_ipex.sh fp32 jit 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-inference-realtime-fp32.log +else + echo "The specified precision '${PRECISION}' is unsupported." + echo "Supported precisions are: fp32, bf16, and int8" + exit 1 +fi diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/run.sh b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/run.sh new file mode 100755 index 000000000..f6f2a7f33 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/run.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + exit 1 +fi + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ -z "${PRETRAINED_MODEL}" ]; then + echo "The required environment variable PRETRAINED_MODEL has not been set" + exit 1 +fi + +IMAGE_NAME=${IMAGE_NAME:-model-zoo:pytorch-spr-ssd-resnet34-inference} +DOCKER_ARGS=${DOCKER_ARGS:---privileged --init -it} +WORKDIR=/workspace/pytorch-spr-ssd-resnet34-inference + +# inference scripts: +# inference_realtime.sh +# inference_throughput.sh +# accuracy.sh +export SCRIPT="${SCRIPT:-inference_realtime.sh}" + +if [[ ${SCRIPT} != quickstart* ]]; then + SCRIPT="quickstart/$SCRIPT" +fi + +docker run --rm \ + ${dataset_env} \ + --env DATASET_DIR=${DATASET_DIR} \ + --env PRECISION=${PRECISION} \ + --env PRETRAINED_MODEL=${PRETRAINED_MODEL} \ + --env OUTPUT_DIR=${OUTPUT_DIR} \ + --env http_proxy=${http_proxy} \ + --env https_proxy=${https_proxy} \ + --env no_proxy=${no_proxy} \ + --volume ${DATASET_DIR}:${DATASET_DIR} \ + --volume ${OUTPUT_DIR}:${OUTPUT_DIR} \ + --volume ${PRETRAINED_MODEL}:${PRETRAINED_MODEL} \ + --shm-size 8G \ + -w ${WORKDIR} \ + ${DOCKER_ARGS} \ + $IMAGE_NAME \ + /bin/bash $SCRIPT diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/container_build.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/container_build.md new file mode 100644 index 000000000..9b4f9b303 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/container_build.md @@ -0,0 +1,26 @@ +## Build the container + +The package has scripts and a Dockerfile that are +used to build a workload container that runs the model. This container +uses the PyTorch/IPEX container as it's base, so ensure that you have built +the `pytorch-ipex-spr.tar.gz` container prior to building this model container. + +Use `docker images` to verify that you have the base container built. For example: +``` +$ docker images | grep pytorch-ipex-spr +model-zoo pytorch-ipex-spr fecc7096a11e 40 minutes ago 8.31GB +``` + +To build the container, extract the package and +run the `build.sh` script. +``` +# Extract the package +tar -xzf +cd + +# Build the container +./build.sh +``` + +After the build completes, you should have a container called +`` that will be used to run the model. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/datasets.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/datasets.md new file mode 100644 index 000000000..78335650c --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/datasets.md @@ -0,0 +1,33 @@ +## Datasets + +### COCO + +The [COCO dataset](https://cocodataset.org) is used to run . + +Download and extract the 2017 training/validation images and annotations from the +[COCO dataset website](https://cocodataset.org/#download) to a `coco` folder +and unzip the files. After extracting the zip files, your dataset directory +structure should look something like this: +``` +coco +├── annotations +│ ├── captions_train2017.json +│ ├── captions_val2017.json +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ ├── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── train2017 +│ ├── 000000454854.jpg +│ ├── 000000137045.jpg +│ ├── 000000129582.jpg +│ └── ... +└── val2017 + ├── 000000000139.jpg + ├── 000000000285.jpg + ├── 000000000632.jpg + └── ... +``` +The parent of the `annotations`, `train2017`, and `val2017` directory (in this example `coco`) +is the directory that should be used when setting the `DATASET_DIR` environment +variable for (for example: `export DATASET_DIR=/home//coco`). diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/description.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/description.md new file mode 100644 index 000000000..49f899094 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/description.md @@ -0,0 +1,5 @@ + +## Description + +This document has instructions for running using +Intel-optimized PyTorch. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/docker_spr.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/docker_spr.md new file mode 100644 index 000000000..012bb3997 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/docker_spr.md @@ -0,0 +1,32 @@ +## Run the model + +Download the backbone weights and set the `BACKBONE_WEIGHTS` environment variable +to point to the downloaded file: +``` +curl -O https://download.pytorch.org/models/resnet34-333f7ec4.pth +export BACKBONE_WEIGHTS=$(pwd)/resnet34-333f7ec4.pth +``` + +After you've downloaded the backbone weights and followed the instructions to +[build the container](#build-the-container) and [prepare the dataset](#datasets), +use the `run.sh` script from the container package to run +using docker. Set environment variables to point to the COCO dataset directory, +weights, precision, and an output directory for logs. By default, the `run.sh` +script will run the `train_performance.sh` quickstart script. To run the `train_accuracy.sh` +script instead, specify that script name using the `SCRIPT` environment variable. +``` +# Navigate to the container package directory +cd + +# Set the required environment vars +export DATASET_DIR= +export BACKBONE_WEIGHTS= +export PRECISION= +export OUTPUT_DIR= + +# Run the container with train_performance.sh quickstart script +./run.sh + +# Run a different script by specifying the SCRIPT env var +SCRIPT=train_accuracy.sh ./run.sh +``` diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/license.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/license.md new file mode 100644 index 000000000..e547f148d --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/license.md @@ -0,0 +1,4 @@ + +## License + +Licenses can be found in the model package, in the `licenses` directory. diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/quickstart.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/quickstart.md new file mode 100644 index 000000000..57f5f9e43 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/quickstart.md @@ -0,0 +1,7 @@ + +## Quick Start Scripts + +| Script name | Description | +|-------------|-------------| +| `train_performance.sh` | Tests the training performance for SSD-ResNet34 for the specified precision (fp32 or bf16). | +| `train_accuracy.sh` | Tests the training accuracy for SSD-ResNet34 for the specified precision (fp32 or bf16). | diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/title.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/title.md new file mode 100644 index 000000000..a4ccbf863 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/title.md @@ -0,0 +1,2 @@ + +# PyTorch diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/wrapper_package.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/wrapper_package.md new file mode 100644 index 000000000..0d77199f3 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/wrapper_package.md @@ -0,0 +1,16 @@ +## Model Package + +The model package includes the Dockerfile and scripts needed to build and +run in a container. +``` + +├── README.md +├── build.sh +├── licenses +│   ├── LICENSE +│   └── third_party +├── model_packages +│   └── +├── .Dockerfile +└── run.sh +``` diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/README_SPR.md b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/README_SPR.md new file mode 100644 index 000000000..c47a9047b --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/README_SPR.md @@ -0,0 +1,133 @@ + +# PyTorch ssd-resnet34 training + + +## Description + +This document has instructions for running ssd-resnet34 training using +Intel-optimized PyTorch. + +## Model Package + +The model package includes the Dockerfile and scripts needed to build and +run ssd-resnet34 training in a container. +``` +pytorch-spr-ssd-resnet34-training +├── README.md +├── build.sh +├── licenses +│   ├── LICENSE +│   └── third_party +├── model_packages +│   └── pytorch-spr-ssd-resnet34-training.tar.gz +├── pytorch-spr-ssd-resnet34-training.Dockerfile +└── run.sh +``` + + +## Quick Start Scripts + +| Script name | Description | +|-------------|-------------| +| `train_performance.sh` | Tests the training performance for SSD-ResNet34 for the specified precision (fp32 or bf16). | +| `train_accuracy.sh` | Tests the training accuracy for SSD-ResNet34 for the specified precision (fp32 or bf16). | + +## Datasets + +### COCO + +The [COCO dataset](https://cocodataset.org) is used to run ssd-resnet34. + +Download and extract the 2017 training/validation images and annotations from the +[COCO dataset website](https://cocodataset.org/#download) to a `coco` folder +and unzip the files. After extracting the zip files, your dataset directory +structure should look something like this: +``` +coco +├── annotations +│ ├── captions_train2017.json +│ ├── captions_val2017.json +│ ├── instances_train2017.json +│ ├── instances_val2017.json +│ ├── person_keypoints_train2017.json +│ └── person_keypoints_val2017.json +├── train2017 +│ ├── 000000454854.jpg +│ ├── 000000137045.jpg +│ ├── 000000129582.jpg +│ └── ... +└── val2017 + ├── 000000000139.jpg + ├── 000000000285.jpg + ├── 000000000632.jpg + └── ... +``` +The parent of the `annotations`, `train2017`, and `val2017` directory (in this example `coco`) +is the directory that should be used when setting the `DATASET_DIR` environment +variable for ssd-resnet34 (for example: `export DATASET_DIR=/home//coco`). + +## Build the container + +The ssd-resnet34 training package has scripts and a Dockerfile that are +used to build a workload container that runs the model. This container +uses the PyTorch/IPEX container as it's base, so ensure that you have built +the `pytorch-ipex-spr.tar.gz` container prior to building this model container. + +Use `docker images` to verify that you have the base container built. For example: +``` +$ docker images | grep pytorch-ipex-spr +model-zoo pytorch-ipex-spr fecc7096a11e 40 minutes ago 8.31GB +``` + +To build the ssd-resnet34 training container, extract the package and +run the `build.sh` script. +``` +# Extract the package +tar -xzf pytorch-spr-ssd-resnet34-training.tar.gz +cd pytorch-spr-ssd-resnet34-training + +# Build the container +./build.sh +``` + +After the build completes, you should have a container called +`model-zoo:pytorch-spr-ssd-resnet34-training` that will be used to run the model. + +## Run the model + +Download the backbone weights and set the `BACKBONE_WEIGHTS` environment variable +to point to the downloaded file: +``` +curl -O https://download.pytorch.org/models/resnet34-333f7ec4.pth +export BACKBONE_WEIGHTS=$(pwd)/resnet34-333f7ec4.pth +``` + +After you've downloaded the backbone weights and followed the instructions to +[build the container](#build-the-container) and [prepare the dataset](#datasets), +use the `run.sh` script from the container package to run ssd-resnet34 training +using docker. Set environment variables to point to the COCO dataset directory, +weights, precision, and an output directory for logs. By default, the `run.sh` +script will run the `train_performance.sh` quickstart script. To run the `train_accuracy.sh` +script instead, specify that script name using the `SCRIPT` environment variable. +``` +# Navigate to the container package directory +cd pytorch-spr-ssd-resnet34-training + +# Set the required environment vars +export DATASET_DIR= +export BACKBONE_WEIGHTS= +export PRECISION= +export OUTPUT_DIR= + +# Run the container with train_performance.sh quickstart script +./run.sh + +# Run a different script by specifying the SCRIPT env var +SCRIPT=train_accuracy.sh ./run.sh +``` + + +## License + +Licenses can be found in the model package, in the `licenses` directory. + diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/build.sh b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/build.sh new file mode 100755 index 000000000..b858529b2 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/build.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +PACKAGE_NAME=pytorch-spr-ssd-resnet34-training +DOCKERFILE=pytorch-spr-ssd-resnet34-training.Dockerfile +PYTORCH_BASE_IMAGE=${PYTORCH_BASE_IMAGE:-model-zoo} +PYTORCH_BASE_TAG=${PYTORCH_BASE_TAG:-pytorch-ipex-spr} +IMAGE_NAME=${IMAGE_NAME:-model-zoo:pytorch-spr-ssd-resnet34-training} + +if [ "$(docker images -q ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_TAG})" == "" ]; then + echo "The Intel(R) Extension for PyTorch container (${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_TAG}) was not found." + echo "This container is required, as it is used as the base for building the ssd-resnet34 training container." + echo "Please download the IPEX container package and build the image and then retry this build." + exit 1 +fi + +docker build --build-arg PYTORCH_IMAGE=${PYTORCH_BASE_IMAGE} \ + --build-arg PYTORCH_TAG=${PYTORCH_BASE_TAG} \ + --build-arg PACKAGE_NAME=$PACKAGE_NAME \ + --build-arg MODEL_WORKSPACE=/workspace \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + --build-arg no_proxy=$no_proxy \ + -t $IMAGE_NAME \ + -f $DOCKERFILE . diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/run.sh b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/run.sh new file mode 100755 index 000000000..29961c1b1 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/run.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + exit 1 +fi + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ -z "${BACKBONE_WEIGHTS}" ]; then + echo "The required environment variable BACKBONE_WEIGHTS has not been set" + exit 1 +fi + +IMAGE_NAME=${IMAGE_NAME:-model-zoo:pytorch-spr-ssd-resnet34-training} +DOCKER_ARGS=${DOCKER_ARGS:---privileged --init -it} +WORKDIR=/workspace/pytorch-spr-ssd-resnet34-training + +# training scripts: +# train_performance.sh +# train_accuracy.sh +export SCRIPT="${SCRIPT:-train_performance.sh}" + +if [[ ${SCRIPT} != quickstart* ]]; then + SCRIPT="quickstart/$SCRIPT" +fi + +docker run --rm \ + ${dataset_env} \ + --env PRECISION=${PRECISION} \ + --env OUTPUT_DIR=${OUTPUT_DIR} \ + --env DATASET_DIR=${DATASET_DIR} \ + --env BACKBONE_WEIGHTS=${BACKBONE_WEIGHTS} \ + --env http_proxy=${http_proxy} \ + --env https_proxy=${https_proxy} \ + --env no_proxy=${no_proxy} \ + --volume ${DATASET_DIR}:${DATASET_DIR} \ + --volume ${BACKBONE_WEIGHTS}:${BACKBONE_WEIGHTS} \ + --volume ${OUTPUT_DIR}:${OUTPUT_DIR} \ + --shm-size 8G \ + -w ${WORKDIR} \ + ${DOCKER_ARGS} \ + $IMAGE_NAME \ + /bin/bash $SCRIPT diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_accuracy.sh b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_accuracy.sh new file mode 100755 index 000000000..8d42e6cfb --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_accuracy.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +MODEL_DIR=${MODEL_DIR-$PWD} + +echo "DATASET_DIR: ${DATASET_DIR}" +echo "BACKBONE_WEIGHTS: ${BACKBONE_WEIGHTS}" +echo "PRECISION: ${PRECISION}" +echo "OUTPUT_DIR: ${OUTPUT_DIR}" + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +# Create the output directory in case it doesn't already exist +mkdir -p ${OUTPUT_DIR} + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ ! -d "${DATASET_DIR}" ]; then + echo "The DATASET_DIR '${DATASET_DIR}' does not exist" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + echo "Please set PRECISION to fp32 or bf16." + exit 1 +fi + +if [ ! -f "${BACKBONE_WEIGHTS}" ]; then + echo "The BACKBONE_WEIGHTS '${BACKBONE_WEIGHTS}' file does not exist" + exit 1 +fi + +cd ${MODEL_DIR}/models/ssd-training/training/single_stage_detector/ssd + +# Set env vars that the bash script looks for +export DATA_DIR=${DATASET_DIR} +export MODEL_DIR=${BACKBONE_WEIGHTS} +export work_space=${OUTPUT_DIR} + +if [[ $PRECISION == "bf16" ]]; then + bash run_accuracy.sh bf16 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-training-accuracy-bf16.log +elif [[ $PRECISION == "fp32" ]]; then + bash run_accuracy.sh fp32 fp32 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-training-accuracy-fp32.log +else + echo "The specified precision '${PRECISION}' is unsupported." + echo "Supported precisions are: fp32 and bf16" + exit 1 +fi diff --git a/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_performance.sh b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_performance.sh new file mode 100755 index 000000000..a95876395 --- /dev/null +++ b/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_performance.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +MODEL_DIR=${MODEL_DIR-$PWD} + +echo "DATASET_DIR: ${BACKBONE_WEIGHTS}" +echo "BACKBONE_WEIGHTS: ${PRETRAINED_MODEL}" +echo "PRECISION: ${PRECISION}" +echo "OUTPUT_DIR: ${OUTPUT_DIR}" + +if [ -z "${OUTPUT_DIR}" ]; then + echo "The required environment variable OUTPUT_DIR has not been set" + exit 1 +fi + +# Create the output directory in case it doesn't already exist +mkdir -p ${OUTPUT_DIR} + +if [ -z "${DATASET_DIR}" ]; then + echo "The required environment variable DATASET_DIR has not been set" + exit 1 +fi + +if [ ! -d "${DATASET_DIR}" ]; then + echo "The DATASET_DIR '${DATASET_DIR}' does not exist" + exit 1 +fi + +if [ -z "${PRECISION}" ]; then + echo "The required environment variable PRECISION has not been set" + echo "Please set PRECISION to fp32 or bf16." + exit 1 +fi + +if [ ! -f "${BACKBONE_WEIGHTS}" ]; then + echo "The BACKBONE_WEIGHTS '${BACKBONE_WEIGHTS}' file does not exist" + exit 1 +fi + +cd ${MODEL_DIR}/models/ssd-training/training/single_stage_detector/ssd + +# Set env vars that the bash script looks for +export DATA_DIR=${DATASET_DIR} +export MODEL_DIR=${BACKBONE_WEIGHTS} +export work_space=${OUTPUT_DIR} + +if [[ $PRECISION == "bf16" ]]; then + bash run_performance.sh bf16 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-training-performance-bf16.log +elif [[ $PRECISION == "fp32" ]]; then + bash run_performance.sh fp32 2>&1 | tee -a ${OUTPUT_DIR}/ssd-resnet34-training-performance-fp32.log +else + echo "The specified precision '${PRECISION}' is unsupported." + echo "Supported precisions are: fp32 and bf16" + exit 1 +fi diff --git a/tools/docker/partials/common/pytorch/models/ssd-resnet34-train-dependencies.partial.Dockerfile b/tools/docker/partials/common/pytorch/models/ssd-resnet34-train-dependencies.partial.Dockerfile new file mode 100644 index 000000000..d47b8d054 --- /dev/null +++ b/tools/docker/partials/common/pytorch/models/ssd-resnet34-train-dependencies.partial.Dockerfile @@ -0,0 +1,17 @@ +RUN source ~/anaconda3/bin/activate pytorch && \ + pip install --upgrade pip && \ + pip install --no-cache-dir https://github.com/mlperf/logging/archive/9ea0afa.zip && \ + pip install --no-cache-dir \ + Cython==0.28.4 \ + git+http://github.com/NVIDIA/apex.git@9041a868a1a253172d94b113a963375b9badd030#egg=apex \ + mlperf-compliance==0.0.10 \ + cycler==0.10.0 \ + kiwisolver==1.0.1 \ + matplotlib==2.2.2 \ + Pillow==5.2.0 \ + pyparsing==2.2.0 \ + python-dateutil==2.7.3 \ + pytz==2018.5 \ + six==1.11.0 \ + torchvision==0.2.1 \ + pycocotools==2.0.2 diff --git a/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-inference_spec.yml b/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-inference_spec.yml new file mode 100644 index 000000000..e942bece1 --- /dev/null +++ b/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-inference_spec.yml @@ -0,0 +1,69 @@ +releases: + versioned: + tag_specs: + - '{pytorch-multistage}{spr-ssd-resnet34-inference}' +slice_sets: + spr-ssd-resnet34-inference: + - add_to_name: -spr-ssd-resnet34-inference + dockerfile_subdirectory: pytorch + args: + - PYTORCH_IMAGE=model-zoo + - PYTORCH_TAG=pytorch-ipex-spr + - PACKAGE_NAME=pytorch-spr-ssd-resnet34-inference + partials: + - pytorch/torch-vision-from-source + - pytorch/spr-model-installs + - model_package + - pytorch/spr-release-conda-env + - pytorch/spr-dnnl-max-var + - pytorch/spr-entrypoint + files: + - source: tools/docker/models/cpu-models/ssd-ww32 + destination: models/ssd + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_realtime.sh + destination: quickstart/inference_realtime.sh + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/inference_throughput.sh + destination: quickstart/inference_throughput.sh + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/accuracy.sh + destination: quickstart/accuracy.sh + wrapper_package_files: + - source: output/pytorch-spr-ssd-resnet34-inference.tar.gz + destination: model_packages/pytorch-spr-ssd-resnet34-inference.tar.gz + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/build.sh + destination: build.sh + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/run.sh + destination: run.sh + - source: dockerfiles/pytorch/pytorch-spr-ssd-resnet34-inference.Dockerfile + destination: pytorch-spr-ssd-resnet34-inference.Dockerfile + - source: LICENSE + destination: licenses/LICENSE + - source: third_party + destination: licenses/third_party + - source: quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/README_SPR.md + destination: README.md + documentation: + - docs: + - name: Title + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/title.md + - name: Description + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/description.md + - name: Model Package + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/wrapper_package.md + - name: Quickstart + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/quickstart.md + - name: Datasets + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/datasets.md + - name: Container build + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/container_build.md + - name: Docker + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/docker_spr.md + - name: License + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu/.docs/license.md + name: README_SPR.md + text_replace: + : ssd-resnet34 + : inference + : pytorch-spr-ssd-resnet34-inference.tar.gz + : pytorch-spr-ssd-resnet34-inference + : model-zoo:pytorch-spr-ssd-resnet34-inference + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/inference/cpu diff --git a/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-training_spec.yml b/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-training_spec.yml new file mode 100644 index 000000000..06f65e235 --- /dev/null +++ b/tools/docker/specs/centos/pytorch/spr-ssd-resnet34-training_spec.yml @@ -0,0 +1,68 @@ +releases: + versioned: + tag_specs: + - '{pytorch-multistage}{spr-ssd-resnet34-training}' +slice_sets: + spr-ssd-resnet34-training: + - add_to_name: -spr-ssd-resnet34-training + dockerfile_subdirectory: pytorch + args: + - PYTORCH_IMAGE=model-zoo + - PYTORCH_TAG=pytorch-ipex-spr + - PACKAGE_NAME=pytorch-spr-ssd-resnet34-training + - SSD_RESNET34_TRAIN_DIR=/workspace/pytorch-spr-ssd-resnet34-training/models/ssd-training + partials: + - pytorch/spr-model-installs + - model_package + - pytorch/models/ssd-resnet34-train-dependencies + - pytorch/spr-release-conda-env + - pytorch/spr-dnnl-max-var + - pytorch/spr-entrypoint + files: + - source: tools/docker/models/cpu-models/ssd-training-ww32 + destination: models/ssd-training + - source: quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_performance.sh + destination: quickstart/train_performance.sh + - source: quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/train_accuracy.sh + destination: quickstart/train_accuracy.sh + wrapper_package_files: + - source: output/pytorch-spr-ssd-resnet34-training.tar.gz + destination: model_packages/pytorch-spr-ssd-resnet34-training.tar.gz + - source: quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/build.sh + destination: build.sh + - source: quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/run.sh + destination: run.sh + - source: dockerfiles/pytorch/pytorch-spr-ssd-resnet34-training.Dockerfile + destination: pytorch-spr-ssd-resnet34-training.Dockerfile + - source: LICENSE + destination: licenses/LICENSE + - source: third_party + destination: licenses/third_party + - source: quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/README_SPR.md + destination: README.md + documentation: + - docs: + - name: Title + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/title.md + - name: Description + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/description.md + - name: Model Package + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/wrapper_package.md + - name: Quickstart + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/quickstart.md + - name: Datasets + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/datasets.md + - name: Container build + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/container_build.md + - name: Docker + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/docker_spr.md + - name: License + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu/.docs/license.md + name: README_SPR.md + text_replace: + : ssd-resnet34 + : training + : pytorch-spr-ssd-resnet34-training.tar.gz + : pytorch-spr-ssd-resnet34-training + : model-zoo:pytorch-spr-ssd-resnet34-training + uri: models/quickstart/object_detection/pytorch/ssd-resnet34/training/cpu