-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_score.py
executable file
·134 lines (123 loc) · 5.7 KB
/
benchmark_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Benchmark the scoring performance on various CNNs
"""
from common import find_mxnet
from common.util import get_gpus
import mxnet as mx
import mxnet.gluon.model_zoo.vision as models
from importlib import import_module
import logging
import argparse
import time
import numpy as np
logging.basicConfig(level=logging.DEBUG)
parser = argparse.ArgumentParser(description='SymbolAPI-based CNN inference performance benchmark')
parser.add_argument('--network', type=str, default='all',
choices=['all', 'alexnet', 'vgg-16', 'resnetv1-50', 'resnet-50',
'resnet-152', 'inception-bn', 'inception-v3',
'inception-v4', 'inception-resnet-v2', 'mobilenet',
'densenet121', 'squeezenet1.1'])
parser.add_argument('--batch-size', type=int, default=0,
help='Batch size to use for benchmarking. Example: 32, 64, 128.'
'By default, runs benchmark for batch sizes - 1, 32, 64, 128, 256')
opt = parser.parse_args()
def get_symbol(network, batch_size, dtype):
image_shape = (3,299,299) if network in ['inception-v3', 'inception-v4'] else (3,224,224)
num_layers = 0
if network == 'inception-resnet-v2':
network = network
elif 'resnet' in network:
num_layers = int(network.split('-')[1])
network = network.split('-')[0]
if 'vgg' in network:
num_layers = int(network.split('-')[1])
network = 'vgg'
if network in ['densenet121', 'squeezenet1.1']:
sym = models.get_model(network)
sym.hybridize()
data = mx.sym.var('data')
sym = sym(data)
sym = mx.sym.SoftmaxOutput(sym, name='softmax')
else:
net = import_module('symbols.'+network)
sym = net.get_symbol(num_classes=1000,
image_shape=','.join([str(i) for i in image_shape]),
num_layers=num_layers,
dtype=dtype)
return (sym, [('data', (batch_size,)+image_shape)])
def score(network, dev, batch_size, num_batches, dtype):
# get mod
sym, data_shape = get_symbol(network, batch_size, dtype)
mod = mx.mod.Module(symbol=sym, context=dev)
mod.bind(for_training = False,
inputs_need_grad = False,
data_shapes = data_shape)
mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
# get data
data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=dev) for _, shape in mod.data_shapes]
batch = mx.io.DataBatch(data, []) # empty label
# run
dry_run = 5 # use 5 iterations to warm up
for i in range(dry_run+num_batches):
if i == dry_run:
tic = time.time()
mod.forward(batch, is_train=False)
for output in mod.get_outputs():
output.wait_to_read()
# return num images per second
return num_batches*batch_size/(time.time() - tic)
if __name__ == '__main__':
if opt.network == 'all':
networks = ['alexnet', 'vgg-16', 'resnetv1-50', 'resnet-50',
'resnet-152', 'inception-bn', 'inception-v3',
'inception-v4', 'inception-resnet-v2',
'mobilenet', 'densenet121', 'squeezenet1.1']
logging.info('It may take some time to run all models, '
'set --network to run a specific one')
else:
networks = [opt.network]
devs = [mx.gpu(0)] if len(get_gpus()) > 0 else []
# Enable USE_MKLDNN for better CPU performance
devs.append(mx.cpu())
if opt.batch_size == 0:
batch_sizes = [1, 32, 64, 128, 256]
logging.info('run batchsize [1, 32, 64, 128, 256] by default, '
'set --batch-size to run a specific one')
else:
batch_sizes = [opt.batch_size]
for net in networks:
logging.info('network: %s', net)
if net in ['densenet121', 'squeezenet1.1']:
logging.info('network: %s is converted from gluon modelzoo', net)
logging.info('you can run benchmark/python/gluon/benchmark_gluon.py for more models')
for d in devs:
logging.info('device: %s', d)
logged_fp16_warning = False
for b in batch_sizes:
for dtype in ['float32', 'float16']:
if d == mx.cpu() and dtype == 'float16':
#float16 is not supported on CPU
continue
elif net in ['inception-bn', 'alexnet'] and dtype == 'float16':
if not logged_fp16_warning:
logging.info('Model definition for {} does not support float16'.format(net))
logged_fp16_warning = True
else:
speed = score(network=net, dev=d, batch_size=b, num_batches=10, dtype=dtype)
logging.info('batch size %2d, dtype %s, images/sec: %f', b, dtype, speed)