2018-03-26 03:46:32 -07:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import time
import mxnet as mx
from mxnet . test_utils import check_speed
def quantize_int8_helper ( data ) :
min_data = mx . nd . min ( data )
max_data = mx . nd . max ( data )
return mx . nd . contrib . quantize ( data , min_data , max_data , out_type = ' int8 ' )
def benchmark_convolution ( data_shape , kernel , num_filter , pad , stride , no_bias = True , layout = ' NCHW ' , repeats = 20 ) :
ctx_gpu = mx . gpu ( 0 )
data = mx . sym . Variable ( name = " data " , shape = data_shape , dtype = ' float32 ' )
# conv cudnn
conv_cudnn = mx . sym . Convolution ( data = data , kernel = kernel , num_filter = num_filter , pad = pad , stride = stride ,
no_bias = no_bias , layout = layout , cudnn_off = False , name = " conv_cudnn " )
arg_shapes , _ , _ = conv_cudnn . infer_shape ( data = data_shape )
input_data = mx . nd . random . normal ( 0 , 0.2 , shape = data_shape , ctx = ctx_gpu )
conv_weight_name = conv_cudnn . list_arguments ( ) [ 1 ]
args = { data . name : input_data , conv_weight_name : mx . random . normal ( 0 , 1 , shape = arg_shapes [ 1 ] , ctx = ctx_gpu ) }
conv_cudnn_time = check_speed ( sym = conv_cudnn , location = args , ctx = ctx_gpu , N = repeats ,
grad_req = ' null ' , typ = ' forward ' ) * 1000
# quantized_conv2d
qdata = mx . sym . Variable ( name = ' qdata ' , shape = data_shape , dtype = ' int8 ' )
weight = mx . sym . Variable ( name = ' weight ' , shape = arg_shapes [ 1 ] , dtype = ' int8 ' )
min_data = mx . sym . Variable ( name = ' min_data ' , shape = ( 1 , ) , dtype = ' float32 ' )
max_data = mx . sym . Variable ( name = ' max_data ' , shape = ( 1 , ) , dtype = ' float32 ' )
min_weight = mx . sym . Variable ( name = ' min_weight ' , shape = ( 1 , ) , dtype = ' float32 ' )
max_weight = mx . sym . Variable ( name = ' max_weight ' , shape = ( 1 , ) , dtype = ' float32 ' )
quantized_conv2d = mx . sym . contrib . quantized_conv ( data = qdata , weight = weight , min_data = min_data , max_data = max_data ,
min_weight = min_weight , max_weight = max_weight ,
kernel = kernel , num_filter = num_filter , pad = pad , stride = stride ,
no_bias = no_bias , layout = layout , cudnn_off = False ,
name = ' quantized_conv2d ' )
qargs = { qdata . name : quantize_int8_helper ( input_data ) [ 0 ] ,
min_data . name : quantize_int8_helper ( input_data ) [ 1 ] ,
max_data . name : quantize_int8_helper ( input_data ) [ 2 ] ,
weight . name : quantize_int8_helper ( args [ conv_weight_name ] ) [ 0 ] ,
min_weight . name : quantize_int8_helper ( args [ conv_weight_name ] ) [ 1 ] ,
max_weight . name : quantize_int8_helper ( args [ conv_weight_name ] ) [ 2 ] }
qconv_time = check_speed ( sym = quantized_conv2d , location = qargs , ctx = ctx_gpu , N = repeats ,
grad_req = ' null ' , typ = ' forward ' ) * 1000
print ( ' ================================================================================================== ' )
2022-09-16 13:01:27 +02:00
print ( f ' data= { data_shape } , kernel= { kernel } , num_filter= { num_filter } , pad= { pad } , stride= { stride } , no_bias= { no_bias } , layout= { layout } , repeats= { repeats } ' )
print ( f ' { conv_cudnn . name } -FP32 , ctx= { ctx_gpu } , time= { conv_cudnn_time : .2f } ms ' )
print ( f ' { quantized_conv2d . name } , ctx= { ctx_gpu } , time= { qconv_time : .2f } ms ' )
print ( f ' quantization speedup: { conv_cudnn_time / qconv_time : .1f } X ' )
2018-03-26 03:46:32 -07:00
print ( ' \n ' )
if __name__ == ' __main__ ' :
for batch_size in [ 32 , 64 , 128 ] :
benchmark_convolution ( data_shape = ( batch_size , 64 , 56 , 56 ) , kernel = ( 1 , 1 ) , num_filter = 256 ,
pad = ( 0 , 0 ) , stride = ( 1 , 1 ) , layout = ' NCHW ' , repeats = 20 )
benchmark_convolution ( data_shape = ( batch_size , 256 , 56 , 56 ) , kernel = ( 1 , 1 ) , num_filter = 64 ,
pad = ( 0 , 0 ) , stride = ( 1 , 1 ) , layout = ' NCHW ' , repeats = 20 )
benchmark_convolution ( data_shape = ( batch_size , 256 , 56 , 56 ) , kernel = ( 1 , 1 ) , num_filter = 128 ,
pad = ( 0 , 0 ) , stride = ( 2 , 2 ) , layout = ' NCHW ' , repeats = 20 )
benchmark_convolution ( data_shape = ( batch_size , 128 , 28 , 28 ) , kernel = ( 3 , 3 ) , num_filter = 128 ,
pad = ( 1 , 1 ) , stride = ( 1 , 1 ) , layout = ' NCHW ' , repeats = 20 )
benchmark_convolution ( data_shape = ( batch_size , 1024 , 14 , 14 ) , kernel = ( 1 , 1 ) , num_filter = 256 ,
pad = ( 0 , 0 ) , stride = ( 1 , 1 ) , layout = ' NCHW ' , repeats = 20 )
benchmark_convolution ( data_shape = ( batch_size , 2048 , 7 , 7 ) , kernel = ( 1 , 1 ) , num_filter = 512 ,
pad = ( 0 , 0 ) , stride = ( 1 , 1 ) , layout = ' NCHW ' , repeats = 20 )