# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import time import mxnet as mx from mxnet.test_utils import check_speed def quantize_int8_helper(data): min_data = mx.nd.min(data) max_data = mx.nd.max(data) return mx.nd.contrib.quantize(data, min_data, max_data, out_type='int8') def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=True, layout='NCHW', repeats=20): ctx_gpu = mx.gpu(0) data = mx.sym.Variable(name="data", shape=data_shape, dtype='float32') # conv cudnn conv_cudnn = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, no_bias=no_bias, layout=layout, cudnn_off=False, name="conv_cudnn") arg_shapes, _, _ = conv_cudnn.infer_shape(data=data_shape) input_data = mx.nd.random.normal(0, 0.2, shape=data_shape, ctx=ctx_gpu) conv_weight_name = conv_cudnn.list_arguments()[1] args = {data.name: input_data, conv_weight_name: mx.random.normal(0, 1, shape=arg_shapes[1], ctx=ctx_gpu)} conv_cudnn_time = check_speed(sym=conv_cudnn, location=args, ctx=ctx_gpu, N=repeats, grad_req='null', typ='forward') * 1000 # quantized_conv2d qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8') weight = mx.sym.Variable(name='weight', shape=arg_shapes[1], dtype='int8') min_data = mx.sym.Variable(name='min_data', shape=(1,), dtype='float32') max_data = mx.sym.Variable(name='max_data', shape=(1,), dtype='float32') min_weight = mx.sym.Variable(name='min_weight', shape=(1,), dtype='float32') max_weight = mx.sym.Variable(name='max_weight', shape=(1,), dtype='float32') quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=weight, min_data=min_data, max_data=max_data, min_weight=min_weight, max_weight=max_weight, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, no_bias=no_bias, layout=layout, cudnn_off=False, name='quantized_conv2d') qargs = {qdata.name: quantize_int8_helper(input_data)[0], min_data.name: quantize_int8_helper(input_data)[1], max_data.name: quantize_int8_helper(input_data)[2], weight.name: quantize_int8_helper(args[conv_weight_name])[0], min_weight.name: quantize_int8_helper(args[conv_weight_name])[1], max_weight.name: quantize_int8_helper(args[conv_weight_name])[2]} qconv_time = check_speed(sym=quantized_conv2d, location=qargs, ctx=ctx_gpu, N=repeats, grad_req='null', typ='forward') * 1000 print('==================================================================================================') print('data=%s, kernel=%s, num_filter=%s, pad=%s, stride=%s, no_bias=%s, layout=%s, repeats=%s' % (data_shape, kernel, num_filter, pad, stride, no_bias, layout, repeats)) print('%s , ctx=%s, time=%.2f ms' % (conv_cudnn.name + '-FP32', ctx_gpu, conv_cudnn_time)) print('%s, ctx=%s, time=%.2f ms' % (quantized_conv2d.name, ctx_gpu, qconv_time)) print('quantization speedup: %.1fX' % (conv_cudnn_time / qconv_time)) print('\n') if __name__ == '__main__': for batch_size in [32, 64, 128]: benchmark_convolution(data_shape=(batch_size, 64, 56, 56), kernel=(1, 1), num_filter=256, pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20) benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=64, pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20) benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=128, pad=(0, 0), stride=(2, 2), layout='NCHW', repeats=20) benchmark_convolution(data_shape=(batch_size, 128, 28, 28), kernel=(3, 3), num_filter=128, pad=(1, 1), stride=(1, 1), layout='NCHW', repeats=20) benchmark_convolution(data_shape=(batch_size, 1024, 14, 14), kernel=(1, 1), num_filter=256, pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20) benchmark_convolution(data_shape=(batch_size, 2048, 7, 7), kernel=(1, 1), num_filter=512, pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)