2017-08-08 16:36:23 -07:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
2018-01-30 10:45:25 -08:00
from __future__ import print_function
2017-04-03 15:18:41 -07:00
import sys
2016-03-19 23:45:52 -07:00
import os
2017-06-26 22:37:11 -07:00
import time
2018-01-30 10:45:25 -08:00
import multiprocessing as mp
2017-06-26 22:37:11 -07:00
import mxnet as mx
import numpy as np
2017-10-14 19:44:32 -07:00
import unittest
2018-04-03 10:33:56 -07:00
from nose . tools import assert_raises
2020-09-02 18:47:01 -07:00
import scipy . sparse as sps
import mxnet . ndarray . sparse as mxsps
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
import itertools
2019-10-15 15:56:43 -07:00
from mxnet . test_utils import check_consistency , set_default_context , assert_almost_equal , assert_allclose
2020-09-17 21:57:59 -07:00
from mxnet . test_utils import check_symbolic_forward , check_symbolic_backward , discard_stderr
from mxnet . test_utils import default_context , rand_shape_2d , rand_ndarray , same , environment
2018-04-03 10:33:56 -07:00
from mxnet . base import MXNetError
2018-04-09 14:43:53 -07:00
from mxnet import autograd
2017-06-26 22:37:11 -07:00
2016-03-19 23:45:52 -07:00
curr_path = os . path . dirname ( os . path . abspath ( os . path . expanduser ( __file__ ) ) )
sys . path . insert ( 0 , os . path . join ( curr_path , ' ../unittest ' ) )
2019-12-14 22:47:42 +01:00
from common import setup_module , with_seed , teardown , assert_raises_cudnn_not_satisfied , assert_raises_cuda_not_satisfied
2019-03-06 21:58:52 -08:00
from common import run_in_spawned_process
2015-10-24 15:57:42 -07:00
from test_operator import *
Numpy-compatible Infra (#15581)
* [Do not review] [Do not merge] New numpy-compatible sum (#14739)
* Add numpy namespace and initial impl of np.sum (not complete)
* Clean up
* Fix import error
* numpy sum
* add test and backward data type support
* add license to test_numpy_op.py
* improve test to reduce flakiness
* fix sanity build
* extra numeric test and imperative test
* add error message for initial argument
* [numpy] Infra for supporting numpy ops in imperative mode and Gluon APIs (#14758)
* Infra of new ndarray and symbol types for numpy operators
* Rename
* Fix import problem
* Refactor
* Remove redundant code
* Add docstring
* More on numpy ndarray and symbol
* Override unimplemented methdos for ndarray and _NumpySymbol
* Fix built-in methods of ndarray and _NumpySymbol
* Fix test and sanity check
* Fix pylint
* Address cr comments
* Add unit tests for ndarray and _NumpySymbol
* Add _true_divide
* Fix gpu build
* Add future import division
* More correct way of checking if an output is from a np compat op
* Fix gpu build
* Fix output ndarray/symbol types with at least one new ndarray/symbol
* Modify true_divide doc
* Fix flaky copying zero-size arrays via gpus
* Fix zero size in gluon hybridize and zeros/ones symbol not creating new symbol type
* Fix doc
* Enable np op compat check with name prefix (#14897)
* [numpy] Numpy dot (#14831)
* Numpy Dot case 1-4 + case 3.5 forward and 0.5 backward
* Backward computation and test coverage
* numpy-compatible mean (#14859)
* [numpy] Some np ops for d2l (#14924)
* Add np transpose
More ops and namespaces for submodules
Add relu and sigmoid
Add reshape
Fix symbolic name mismatch
Add maximum and minimum
* Add convenience fluent method
* Add ndarray.item()
* Fix CI
* Fix lint
* Fix lint
* Fix reshape gpu
* Add example
* Remove python notebook outputs
* Remove notebook output
* Add one more example
* [numpy] Refactor np modules (#14989)
* Refactor
* Initial refactoring
* Fix notebook
* Move numpy op check from backend to frontend
* Add homogeneous ndarray check
* Fix grouping inhomogeneous types of symbols
* Improve error handling of different types of symbols as outputs
* Fix test
* Fix numpy test
* Fix ci
* Try to fix gpu ci failure
* [numpy] Refactor np module (example runs through) (#15055)
* Refactor notebook
* notebook working with hybrid block
* More refactoring
* Remove unnecessary use_np_compat
* Use class decorator to initialize numpy ndarrays in parameter.py
* Clear notebook outputs
* Improve np decorator
* Remove npe op from optimizer
* Fix CI
* Fix functools.wraps issue in Python2
* Fix ci
* Change np_compat to np_shape
* Temporarily disable test_amp
* Numpy-compatible stack (#15027)
* numpy stack
* migrate to use_np_shape
* Numpy Unary Ops (#15010)
* Unary Ops
* new version of unit tests
* [numpy] Fix np branch after rebase (#15086)
* Add np_array semantics for Gluon
Fix notebook
Fix sanity
Fix gluon deferred infer shape
Add np.random.uniform
Add random normal
Add boolean comparison ops
Add np.ndarray indexing
Reformat test ndarray indexing
Fix unit tests
Add one more test of indexing
Fix sanity
Enable amp test
Add np.arange
Revert cython unit test to ctypes
Delete unnecessary use_np_shape decorator from test
Rebase with numpy branch
support range as index
Fix python2 range type check
Add argmax
Disable clojure test
* Fix ci
* Add np.linalg.norm for ord='fro'
* Fix pylint
* numpy concatenate (#15104)
* [WIP][numpy] Fix for D2L Chapters 2/3/4 (#15139)
* Fix
* Fix linear regression gluon
* More fix
* Fix pylint
* Fix for chapter 4
* Add np.add mul div mod pow sub and shuffle
* Fix model selection, underfitting, overfitting
* Fix weight decay
* Fix dropout
* Fix
* Fix chapter 4
* [numpy] Fix d2l performance regression (#15173)
* Add np array adapter decorator for layers
* Fix performance regression caused by too many conversions between nd.NDArray and np.ndarray
* Fix pylint
* Fix test backward compatibility issue
* Fix test_lambda
* Fix (#15188)
* fix for chapter6 conv nn (#15224)
* [numpy] Fix d2l chapter8 (#15237)
* Add np op doc
* Fix several issues
* Add a N-D dot b 2D support
* Simplify array creation api
* Add swapaxes
* Fix rnn gluon
* More fix
* Fix pylint
* Delete
* Fix mp windows
* fix for ch11 (#15244)
* Numpy-compatible split (#15049)
* numpy split
* numpy split
* unit test
* unit test
* [numpy] [DO NOT MERGE] Fix d2l chapters 9 and 13 (#15246)
* Add npx batch_dot and topk
* Text embedding uses numpy
* Fix SoftmaxCrossEntropyLoss with np
* Fix sentiment cnn
* Fix pylint
* Fix dot attention
* Fix seq2seq attention
* Add np.tile
* Fix transformer
* Fix ci
* Fix ci and rebase
* [numpy] Fix d2l chapter 5 (#15264)
* Fix parameter initializer
* Add np.save and np.load
* Fix read-write
* Fix lint
* Numpy compatible max (#15161)
* numpy amax
* weird cu file diff
* fix the unit test error
* fix gpu bug
* minor fix
* fix lint
* remove scalar value check
* fix the bug on unit test
* fix the case () that breaks the kernel launch
* add zero dimension unit test
* revert the tuple change
* use mshadow maximum
* remove test zero
* change the macro for now
* change the cuda to use mashadow op
* fix the broadcast_reduce_op_value.cu wrong kernel
* add more logic in shape to detect the invalid situation
* change back to type swtich
* change to as_nd_ndarray
* add missing @npx.use_np_shape
* retrigger CI
* address the comment
* undo algorithm import
* remove the numeric gradient check
* Numpy compatible multinomial (#15219)
* draft of multinomial
* rename to more concise name
* finish shape
* complete the forward function
* complete forward without handle 0 dimension & scalar
* handle 0 dimension
* add new line
* fix lint
* fix the build error
* fix lint
* finish unit test
* change the registration
* make multinomial support pvals as mx.ndarray
* delete newline
* fix lint error
* support input as list, mx.ndarray, np.ndarray & unit test
* fix lint
* fix the include error
* fix lint
* refactor & pass the tensor instead of tuple to kernel
* fix lint
* updata the doc
* address the comment
* Numpy compatible linspace (#15256)
* draft
* finish linspace implementation
* finish linspace
* delete newline
* fix pylint
* add more unit test
* address comment
* add more test case
* disable too-many-arguments
* resolve confliction
* add ctx
* numpy-compatible cumsum (#15309)
* [numpy] Misc fix for other chapters (#15332)
* Add np.prod
* Fix ndarray.reshape accepting positional integers as arguments
* Rebase
* Fix rebase error
* Add np.ndarray.flatten
* Fix
* Add broadcast_to
* Add meshgrid and broadcast_arrays
* Fix sin, cos, sinh, cosh not supporting scalars
* Add more unary ops supporting python scalars
* Fix
* Fix
* Fix ci
* Fix sanity
* [numpy] Change d2l chapters cv and gan to use numpy (#15368)
* Change op name style to lower case underscore
* Add ops under image to npx
* Add image submodule to npx
* Fix split_and_load use np
* Fix fine tuning
* Fix bbox and anchor
* Fix odd
* Fix ssd and rcnn
* Remove restriction on binary element-wise scalar
* Fix gan
* Fix sanity
* Try to fix website build failure
* Add npx.random.seed
* Fix doc
* add doc for multinomial, dot, cumsum, clip, abs, exp, arctan (#15386)
* [numpy] Fix several places in numpy (#15398)
* Fix
* More fix
* [numpy] fix cython (#15418)
* add cython support for numpy
* stay with original API for backward compatibility
* fix after rebase
* get rid of coverage in clang60 mkldnn
* fix lint issues
* fix flaky test and get rid of extra print
* remove numpy examples
* revert #15309 #15256 #15219 #15161
* remove numpy docs
* remove changes to contrib/text/embedding.py
* remove numpy changes to gluon peripherals
* Revert "remove numpy docs"
This reverts commit c104695b28a26738b8700d80c70814e0f583ac55.
* get rid of most operators
* Revert "get rid of coverage in clang60 mkldnn"
This reverts commit 77dc90520b6a2282716ba41987a1f37522daf078.
* remove np-compatible from mxnet.image mxnet.initializer
* address comments
2019-08-07 19:54:02 -07:00
from test_numpy_ndarray import *
2019-08-08 20:30:50 -07:00
from test_numpy_op import *
2019-09-04 16:36:50 -07:00
from test_numpy_interoperability import *
2017-02-07 13:37:43 +08:00
from test_optimizer import *
2017-04-27 12:14:37 -07:00
from test_random import *
2018-02-13 11:13:04 -08:00
from test_exc_handling import *
2017-06-26 22:37:11 -07:00
#from test_rnn import *
2018-05-15 09:55:52 -07:00
from test_sparse_ndarray import *
2017-08-30 23:12:06 -07:00
from test_sparse_operator import *
from test_ndarray import *
2018-08-30 19:13:33 -07:00
from test_subgraph_op import *
2019-10-15 15:56:43 -07:00
from test_gluon_gpu import _test_bulking
2018-10-18 23:17:52 -07:00
from test_contrib_operator import test_multibox_target_op
2019-07-21 20:58:28 -07:00
from test_tvm_op import *
2019-10-19 16:51:23 -07:00
from test_contrib_optimizer import test_adamw
2016-03-19 23:45:52 -07:00
2016-10-19 00:06:32 -07:00
set_default_context ( mx . gpu ( 0 ) )
2018-07-20 05:02:22 +02:00
del test_support_vector_machine_l1_svm # noqa
del test_support_vector_machine_l2_svm # noqa
2019-04-22 18:36:21 -05:00
del test_custom_op_fork #noqa
2016-10-19 00:06:32 -07:00
2017-04-03 15:18:41 -07:00
def check_countsketch ( in_dim , out_dim , n ) :
2018-07-17 22:59:43 -07:00
data = mx . sym . Variable ( " data " )
h = mx . sym . Variable ( " h " )
s = mx . sym . Variable ( " s " )
sym = mx . sym . contrib . count_sketch ( data = data , h = h , s = s , name = ' countsketch ' , out_dim = out_dim )
2017-04-03 15:18:41 -07:00
shape = [ ( n , in_dim ) , ( 1 , in_dim ) , ( 1 , in_dim ) ] #shape of input x, hash h and hash s
arr = [ mx . nd . empty ( shape [ i ] ) for i in range ( 3 ) ]
arr_grad = [ mx . nd . empty ( shape [ i ] ) for i in range ( 3 ) ]
x = np . random . uniform ( - 10 , 10 , shape [ 0 ] )
arr [ 0 ] [ : ] = x #input x
h = np . random . randint ( 0 , out_dim , shape [ 1 ] )
arr [ 1 ] [ : ] = h #hash h
s = np . random . randint ( 0 , 2 , shape [ 2 ] ) * 2 - np . ones ( shape [ 2 ] )
arr [ 2 ] [ : ] = s #hash s
2018-07-17 22:59:43 -07:00
locations = { " data " : x , " h " : h , " s " : s }
2017-04-03 15:18:41 -07:00
a = np . zeros ( ( n , out_dim ) )
temp = np . multiply ( x , s )
for num_sample in np . arange ( 0 , n ) :
for idx in np . arange ( 0 , in_dim ) :
a [ num_sample ] [ h [ 0 ] [ idx ] ] + = temp [ num_sample ] [ idx ]
2018-07-17 22:59:43 -07:00
check_symbolic_forward ( sym , locations , [ a ] , rtol = 1e-3 , atol = 1e-5 , ctx = mx . gpu ( 0 ) )
2017-04-03 15:18:41 -07:00
out_grad = mx . nd . empty ( ( n , out_dim ) )
out_grad [ : ] = np . random . normal ( - 3 , 3 , ( n , out_dim ) )
2018-07-17 22:59:43 -07:00
a = np . zeros ( ( n , in_dim ) )
for j in np . arange ( 0 , n ) :
for i in np . arange ( 0 , in_dim ) :
a [ j , i ] = out_grad . asnumpy ( ) [ j , h [ 0 , i ] ] * s [ 0 , i ]
check_symbolic_backward ( sym , locations , [ out_grad ] , [ a ] , rtol = 1e-3 , atol = 1e-5 , ctx = mx . gpu ( 0 ) )
2017-04-27 12:14:37 -07:00
2018-06-28 06:03:13 +02:00
2018-07-17 22:59:43 -07:00
@with_seed ( )
2017-04-03 15:18:41 -07:00
def test_countsketch ( ) :
minindim = 40
maxindim = 100
minoutdim = 5
maxoutdim = 30
maxn = 200
2018-07-17 22:59:43 -07:00
in_dim = np . random . randint ( minindim , maxindim )
out_dim = np . random . randint ( minoutdim , maxoutdim )
n = np . random . randint ( 1 , maxn )
check_countsketch ( in_dim , out_dim , n )
2017-04-03 15:18:41 -07:00
2018-02-18 03:11:58 -08:00
2017-04-03 15:18:41 -07:00
def check_ifft ( shape ) :
shape_old = shape
if len ( shape ) == 2 :
if shape [ 1 ] % 2 != 0 :
lst = list ( shape )
lst [ 1 ] = lst [ 1 ] * 2
shape = tuple ( lst )
shape_old = shape
shape = ( shape [ 0 ] , shape [ 1 ] * 2 )
if len ( shape ) == 4 :
if shape [ 3 ] % 2 != 0 :
lst = list ( shape )
lst [ 3 ] = lst [ 3 ] * 2
shape = tuple ( lst )
shape_old = shape
shape = ( shape [ 0 ] , shape [ 1 ] , shape [ 2 ] , shape [ 3 ] * 2 )
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . ifft ( name = ' ifft ' , compute_size = 128 )
2017-04-03 15:18:41 -07:00
init = [ np . random . normal ( size = shape , scale = 1.0 ) ]
arr_grad = [ mx . nd . empty ( shape ) ]
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' ifft_data ' : shape , ' type_dict ' : { ' ifft_data ' : np . float32 } } ]
exe_list = [ sym . simple_bind ( args_grad = arr_grad , * * ctx ) for ctx in ctx_list ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
for exe in exe_list :
for arr , iarr in zip ( exe . arg_arrays , init ) :
arr [ : ] = iarr . astype ( arr . dtype )
# forward
for exe in exe_list :
exe . forward ( is_train = True )
out1 = [ exe . outputs [ 0 ] . asnumpy ( ) for exe in exe_list ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 2 :
init_complex = np . zeros ( shape_old , dtype = np . complex64 )
for i in range ( 0 , shape_old [ 1 ] ) :
init_complex . real [ : , i ] = init [ 0 ] [ : , 2 * i ]
init_complex . imag [ : , i ] = init [ 0 ] [ : , 2 * i + 1 ]
a = np . fft . ifft ( init_complex , n = None , axis = - 1 , norm = None )
2018-08-07 10:29:47 -07:00
assert_almost_equal ( a . real , out1 [ 0 ] / shape_old [ 1 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
init_complex = np . zeros ( shape_old , dtype = np . complex64 )
for i in range ( 0 , shape_old [ 3 ] ) :
init_complex . real [ : , : , : , i ] = init [ 0 ] [ : , : , : , 2 * i ]
init_complex . imag [ : , : , : , i ] = init [ 0 ] [ : , : , : , 2 * i + 1 ]
a = np . fft . ifft ( init_complex , n = None , axis = - 1 , norm = None )
2018-08-07 10:29:47 -07:00
assert_almost_equal ( a . real , out1 [ 0 ] / shape_old [ 3 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-03 15:18:41 -07:00
# backward
if len ( shape ) == 2 :
out_grad = mx . nd . empty ( shape_old )
out_grad [ : ] = np . random . normal ( - 3 , 3 , shape_old )
for exe in exe_list :
exe . backward ( [ out_grad ] )
temp = exe . grad_arrays [ 0 ] . asnumpy ( )
temp = np . zeros ( shape_old )
for i in range ( shape_old [ 1 ] ) :
temp [ : , i ] = exe . grad_arrays [ 0 ] . asnumpy ( ) [ : , 2 * i ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
a = np . fft . fft ( out_grad . asnumpy ( ) , n = None , axis = - 1 , norm = None )
2018-08-07 10:29:47 -07:00
assert_almost_equal ( a . real , temp , rtol = 1e-3 , atol = 1e-5 )
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
out_grad = mx . nd . empty ( shape_old )
out_grad [ : ] = np . random . normal ( - 3 , 3 , shape_old )
for exe in exe_list :
exe . backward ( [ out_grad ] )
temp = exe . grad_arrays [ 0 ] . asnumpy ( )
temp = np . zeros ( shape_old )
for i in range ( shape_old [ 3 ] ) :
temp [ : , : , : , i ] = exe . grad_arrays [ 0 ] . asnumpy ( ) [ : , : , : , 2 * i ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
a = np . fft . fft ( out_grad . asnumpy ( ) , n = None , axis = - 1 , norm = None )
2018-08-07 10:29:47 -07:00
assert_almost_equal ( a . real , temp , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2018-08-07 10:29:47 -07:00
@with_seed ( )
2017-04-03 15:18:41 -07:00
def test_ifft ( ) :
nrepeat = 2
maxdim = 10
for repeat in range ( nrepeat ) :
for order in [ 2 , 4 ] :
shape = tuple ( np . random . randint ( 1 , maxdim , size = order ) )
check_ifft ( shape )
2018-02-18 03:11:58 -08:00
2017-04-03 15:18:41 -07:00
def check_fft ( shape ) :
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . fft ( name = ' fft ' , compute_size = 128 )
2017-04-03 15:18:41 -07:00
if len ( shape ) == 2 :
if shape [ 1 ] % 2 != 0 :
lst = list ( shape )
lst [ 1 ] = lst [ 1 ] * 2
shape = tuple ( lst )
shape_old = shape
if len ( shape ) == 4 :
if shape [ 3 ] % 2 != 0 :
lst = list ( shape )
lst [ 3 ] = lst [ 3 ] * 2
shape = tuple ( lst )
shape_old = shape
init = [ np . random . normal ( size = shape , scale = 1.0 ) ]
arr_grad = [ mx . nd . empty ( shape ) ]
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' fft_data ' : shape , ' type_dict ' : { ' fft_data ' : np . float32 } } ]
exe_list = [ sym . simple_bind ( args_grad = arr_grad , * * ctx ) for ctx in ctx_list ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
for exe in exe_list :
for arr , iarr in zip ( exe . arg_arrays , init ) :
arr [ : ] = iarr . astype ( arr . dtype )
2018-08-07 10:29:47 -07:00
# forward
2017-04-03 15:18:41 -07:00
for exe in exe_list :
exe . forward ( is_train = True )
out1 = [ exe . outputs [ 0 ] . asnumpy ( ) for exe in exe_list ]
out = np . fft . fft ( init , n = None , axis = - 1 , norm = None )
if len ( shape ) == 2 :
out = np . reshape ( out , ( out . shape [ 1 ] , out . shape [ 2 ] ) )
out2 = np . append ( out . real , out . imag , axis = 1 )
a = np . zeros ( out1 [ 0 ] . shape )
p = 0
for i in range ( out2 . shape [ 1 ] / / 2 ) :
a [ : , p ] = out2 [ : , i ]
a [ : , p + 1 ] = out2 [ : , i + out2 . shape [ 1 ] / / 2 ]
p = p + 2
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
out = np . reshape ( out , ( out . shape [ 1 ] , out . shape [ 2 ] , out . shape [ 3 ] , out . shape [ 4 ] ) )
out2 = np . append ( out . real , out . imag , axis = 1 )
a = np . zeros ( out1 [ 0 ] . shape )
for i in range ( out1 [ 0 ] . shape [ 0 ] ) :
for j in range ( out1 [ 0 ] . shape [ 1 ] ) :
p = 0
for k in range ( out2 . shape [ 3 ] ) :
a [ i , j , : , p ] = out2 [ i , j , : , k ]
a [ i , j , : , p + 1 ] = out2 [ i , j + out1 [ 0 ] . shape [ 1 ] , : , k ]
p = p + 2
2017-04-27 12:14:37 -07:00
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a , out1 [ 0 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
# backward
if len ( shape ) == 2 :
out_grad = mx . nd . empty ( ( shape [ 0 ] , 2 * shape [ 1 ] ) )
out_grad [ : ] = np . random . normal ( - 3 , 3 , ( shape [ 0 ] , 2 * shape [ 1 ] ) )
# out_grad_to_complex
out_grad_complex = np . zeros ( shape , dtype = np . complex64 )
for i in range ( 0 , shape [ 1 ] ) :
out_grad_complex . real [ : , i ] = out_grad . asnumpy ( ) [ : , 2 * i ]
out_grad_complex . imag [ : , i ] = out_grad . asnumpy ( ) [ : , 2 * i + 1 ]
for exe in exe_list :
2017-04-27 12:14:37 -07:00
exe . backward ( [ out_grad ] )
2017-04-03 15:18:41 -07:00
a = np . fft . ifft ( out_grad_complex , n = None , axis = - 1 , norm = None )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a . real , exe . grad_arrays [ 0 ] / shape [ 1 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
out_grad = mx . nd . empty ( out1 [ 0 ] . shape )
out_grad [ : ] = np . random . normal ( - 3 , 3 , out1 [ 0 ] . shape )
# out_grad_to_complex
out_grad_complex = np . zeros ( shape , dtype = np . complex64 )
for i in range ( 0 , shape [ 3 ] ) :
out_grad_complex . real [ : , : , : , i ] = out_grad . asnumpy ( ) [ : , : , : , 2 * i ]
out_grad_complex . imag [ : , : , : , i ] = out_grad . asnumpy ( ) [ : , : , : , 2 * i + 1 ]
for exe in exe_list :
2017-04-27 12:14:37 -07:00
exe . backward ( [ out_grad ] )
2017-04-03 15:18:41 -07:00
a = np . fft . ifft ( out_grad_complex , n = None , axis = - 1 , norm = None )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a . real , exe . grad_arrays [ 0 ] / shape [ 3 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-03 15:18:41 -07:00
2018-08-07 10:29:47 -07:00
@with_seed ( )
2017-04-03 15:18:41 -07:00
def test_fft ( ) :
nrepeat = 2
maxdim = 10
for repeat in range ( nrepeat ) :
for order in [ 2 , 4 ] :
shape = tuple ( np . random . randint ( 1 , maxdim , size = order ) )
check_fft ( shape )
2019-09-30 17:14:58 -04:00
def _make_ndarrays ( input_list , ctx = mx . gpu ( 0 ) ) :
return [ mx . nd . array ( arr , dtype = arr . dtype , ctx = ctx ) for arr in input_list ]
2019-12-14 08:32:50 -08:00
def check_multi_sum_sq ( dtype , shapes , ctx , tol1 , tol2 ) :
values_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 10. for shape in shapes ]
mx_vals = _make_ndarrays ( values_arr , ctx = ctx )
sum_sq = mx . nd . multi_sum_sq ( * mx_vals , num_arrays = len ( shapes ) )
sum_sq2 = mx . nd . multi_sum_sq ( * mx_vals , num_arrays = len ( shapes ) )
# checks that operator is deterministic
assert np . array_equal ( sum_sq . asnumpy ( ) , sum_sq2 . asnumpy ( ) )
ref_sum_sq = mx . nd . array ( [ ( v . astype ( ' float32 ' ) * * 2 ) . sum ( ) for v in values_arr ] ,
dtype = ' float32 ' , ctx = ctx )
assert_almost_equal ( ref_sum_sq . asnumpy ( ) , sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
@with_seed ( )
def test_multi_sum_sq ( ) :
min_nparam = 100
max_nparam = 120
min_dim = 50000
max_dim = 100000
max_ndim = 1
dtypes = [ ' float16 ' , ' float32 ' , ' float64 ' ]
for ctx in [ mx . gpu ( 0 ) ] :
for dtype in dtypes :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( min_dim , max_dim + 1 , size = max_ndim ) for i in range ( nparam ) ]
low_tol = ctx == mx . cpu ( 0 ) and ( ' float16 ' in [ dtype ] )
tol1 = 1e-3 if low_tol else 1e-5
tol2 = 1e-6 if low_tol else 1e-7
check_multi_sum_sq ( dtype , shapes , ctx , tol1 , tol2 )
2019-09-30 17:14:58 -04:00
def check_fast_lars ( w_dtype , g_dtype , shapes , ctx , tol1 , tol2 ) :
weights_arr = [ np . random . rand ( * shape ) . astype ( w_dtype ) * 10. for shape in shapes ]
grads_arr = [ np . random . rand ( * shape ) . astype ( g_dtype ) for shape in shapes ]
lrs = ( np . random . rand ( len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 100.
wds = ( np . random . rand ( len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 1000.
eta = ( np . random . rand ( ) + 0.1 )
eps = ( np . random . rand ( ) + 0.1 ) / 10000.
mx_w = _make_ndarrays ( weights_arr , ctx = ctx )
mx_g = _make_ndarrays ( grads_arr , ctx = ctx )
mx_lrs = mx . nd . array ( lrs , dtype = ' float32 ' , ctx = ctx )
mx_wds = mx . nd . array ( wds , dtype = ' float32 ' , ctx = ctx )
w_sum_sq = mx . nd . multi_sum_sq ( * mx_w , num_arrays = len ( shapes ) )
g_sum_sq = mx . nd . multi_sum_sq ( * mx_g , num_arrays = len ( shapes ) )
ref_w_sum_sq = mx . nd . array ( [ ( w . astype ( ' float32 ' ) * * 2 ) . sum ( ) for w in weights_arr ] ,
dtype = ' float32 ' , ctx = ctx )
ref_g_sum_sq = mx . nd . array ( [ ( g . astype ( ' float32 ' ) * * 2 ) . sum ( ) for g in grads_arr ] ,
dtype = ' float32 ' , ctx = ctx )
assert_almost_equal ( ref_w_sum_sq . asnumpy ( ) , w_sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
assert_almost_equal ( ref_g_sum_sq . asnumpy ( ) , g_sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
rescale_grad = ( np . random . rand ( ) + 0.5 ) * 100.
mx_new_lrs = mx . nd . multi_lars ( mx_lrs , w_sum_sq , g_sum_sq , mx_wds , eta = eta , eps = eps ,
rescale_grad = rescale_grad )
ref_w_l2norm = mx . nd . sqrt ( ref_w_sum_sq )
ref_g_l2norm = mx . nd . sqrt ( ref_g_sum_sq * rescale_grad * rescale_grad )
ref_new_lrs = mx . nd . zeros ( ref_w_l2norm . shape , dtype = ' float32 ' , ctx = ctx )
for i in range ( ref_w_l2norm . size ) :
_w = ref_w_l2norm [ i ]
_g = ref_g_l2norm [ i ]
if _w > 0.0 and _g > 0.0 :
ref_new_lrs [ i ] = lrs [ i ] * eta * _w / ( _g + wds [ i ] * _w + eps )
else :
ref_new_lrs [ i ] = lrs [ i ]
assert_almost_equal ( ref_new_lrs . asnumpy ( ) , mx_new_lrs . asnumpy ( ) , atol = tol2 , rtol = tol2 )
@with_seed ( )
def test_fast_lars ( ) :
min_nparam = 50
max_nparam = 60
maxdim = 10000
maxndim = 1
dtypes = [ ' float16 ' , ' float32 ' , ' float64 ' ]
for ctx in [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] :
for w_dtype in dtypes :
for g_dtype in dtypes :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( 1 , maxdim + 1 , size = maxndim ) for i in range ( nparam ) ]
lowTol = ctx == mx . cpu ( 0 ) and ( ' float16 ' in [ w_dtype , g_dtype ] )
tol1 = 1e-3 if lowTol else 1e-5
tol2 = 1e-6 if lowTol else 1e-7
check_fast_lars ( w_dtype , g_dtype , shapes , ctx , tol1 , tol2 )
def check_preloaded_multi_sgd ( dtype , shapes , momentum , use_master_weights ) :
def _flatten_list ( nested_list ) :
return [ item for sublist in nested_list for item in sublist ]
weights_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 100. for shape in shapes ]
grads_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 100. for shape in shapes ]
rescale_grad = ( np . random . random ( ) + 1.0 )
mx_w = _make_ndarrays ( weights_arr )
mx_g = _make_ndarrays ( grads_arr )
mx_p_w = _make_ndarrays ( weights_arr )
mx_p_g = _make_ndarrays ( grads_arr )
lrs = list ( ( np . random . random ( size = len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 100. )
mx_lrs = mx . nd . array ( lrs , dtype = ' float32 ' , ctx = mx . gpu ( 0 ) )
wds = list ( ( np . random . random ( size = len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 1000. )
mx_wds = mx . nd . array ( wds , dtype = ' float32 ' , ctx = mx . gpu ( 0 ) )
if use_master_weights :
weights32_arr = [ arr . astype ( ' float32 ' ) for arr in weights_arr ]
mx_w32 = _make_ndarrays ( weights32_arr )
mx_p_w32 = _make_ndarrays ( weights32_arr )
if momentum is None :
if use_master_weights :
mx . nd . multi_mp_sgd_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_w32 ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = rescale_grad , out = mx_w )
mx . nd . preloaded_multi_mp_sgd_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_w32 ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = rescale_grad , out = mx_p_w )
else :
out = mx . nd . multi_sgd_update (
* _flatten_list ( zip ( mx_w , mx_g ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = rescale_grad , out = mx_w )
preloaded_out = mx . nd . preloaded_multi_sgd_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = rescale_grad , out = mx_p_w )
else :
if use_master_weights :
momentums_arr = [ np . random . rand ( * shape ) . astype ( " float32 " ) for shape in shapes ]
mx_m = _make_ndarrays ( momentums_arr )
mx_p_m = _make_ndarrays ( momentums_arr )
out = mx . nd . multi_mp_sgd_mom_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_m , mx_w32 ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = 0.95 , momentum = momentum , out = mx_w )
preloaded_out = mx . nd . preloaded_multi_mp_sgd_mom_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_m , mx_p_w32 ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = 0.95 , momentum = momentum , out = mx_p_w )
else :
momentums_arr = [ np . random . rand ( * shape ) . astype ( dtype ) for shape in shapes ]
mx_m = _make_ndarrays ( momentums_arr )
mx_p_m = _make_ndarrays ( momentums_arr )
mx . nd . multi_sgd_mom_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_m ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = 0.95 , momentum = momentum , out = mx_w )
mx . nd . preloaded_multi_sgd_mom_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_m ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = 0.95 , momentum = momentum , out = mx_p_w )
def _assert_all_almost_equal ( lhs_list , rhs_list , rtol , atol ) :
for i , ( lhs , rhs ) in enumerate ( zip ( lhs_list , rhs_list ) ) :
assert_almost_equal ( lhs . asnumpy ( ) , rhs . asnumpy ( ) , rtol = rtol , atol = atol )
if dtype == ' float16 ' :
rtol = 1e-3
2019-10-02 17:47:59 -04:00
atol = 1e-2
2019-09-30 17:14:58 -04:00
else :
rtol = 1e-5
atol = 1e-6
_assert_all_almost_equal ( mx_p_w , mx_w , rtol , atol )
if momentum is not None :
_assert_all_almost_equal ( mx_p_m , mx_m , rtol , atol )
if use_master_weights :
_assert_all_almost_equal ( mx_p_w32 , mx_w32 , 1e-5 , 1e-6 )
@with_seed ( )
def test_preloaded_multi_sgd ( ) :
dtypes = [ ' float16 ' , ' float32 ' ]
momentums = [ None , 0.9 ]
min_nparam = 5
max_nparam = 10
maxdim = 6
maxndim = 4
for dtype in dtypes :
use_master_weights_list = [ False , ] if dtype == ' float32 ' else [ True , False ]
for use_master_weights in use_master_weights_list :
for momentum in momentums :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( 1 , maxdim + 1 , size = maxndim ) for i in range ( nparam ) ]
check_preloaded_multi_sgd ( dtype , shapes , momentum , use_master_weights )
2018-02-18 03:11:58 -08:00
2019-12-09 16:52:02 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-07-05 11:29:40 -07:00
def test_batchnorm_with_type ( ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
ctx_list_v1_2D = [
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
]
ctx_list_v2_2D = [
2018-06-26 20:18:10 +00:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
ctx_list_v2_1D = [
2018-06-26 20:18:10 +00:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
ctx_list_v2_3D = [
2018-09-05 12:31:30 -07:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float64 } }
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
# V1, 2D
sym = mx . sym . BatchNorm_v1 ( name = ' norm ' , fix_gamma = False )
check_consistency ( sym , ctx_list_v1_2D )
sym = mx . sym . BatchNorm_v1 ( name = ' norm ' , fix_gamma = True )
check_consistency ( sym , ctx_list_v1_2D )
# V2, 2D
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
bools = [ False , True ]
for fix_gamma , cudnn_off in itertools . product ( bools , bools ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_2D )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
# V2, 1D
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
for fix_gamma , cudnn_off in itertools . product ( bools , bools ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_1D )
# V2, 3D
for fix_gamma , cudnn_off in itertools . product ( bools , [ True , ] ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_3D )
2018-10-10 21:00:17 -07:00
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
def test_batchnorm_versions ( ) :
2018-10-10 21:00:17 -07:00
def test_batchnorm_versions_helper ( batchnorm_op_list , data , fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
ctx_list = [ ]
sym_list = [ ]
# BatchNormV1 cpu
if ' batchnorm_v1_cpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm_v1 ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' ) )
# BatchNormV1 gpu (organic)
if ' batchnorm_v1_gpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm_v1 ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' ) )
# BatchNorm cpu
if ' batchnorm_cpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' ) )
# BatchNorm gpu (organic)
if ' batchnorm_gpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' , cudnn_off = True ) )
# BatchNorm gpu cudnn (if cudnn is enabled)
if ' batchnorm_cudnn ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' , cudnn_off = False ) )
check_consistency ( sym_list , ctx_list )
2018-10-10 21:00:17 -07:00
def test_1d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 20 )
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_cpu ' ,
' batchnorm_gpu ' , ' batchnorm_cudnn ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-10-10 21:00:17 -07:00
def test_2d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 10 , 10 )
2018-10-10 21:00:17 -07:00
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_v1_cpu ' , ' batchnorm_v1_gpu ' ,
' batchnorm_cpu ' ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
' batchnorm_gpu ' , ' batchnorm_cudnn ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-10-10 21:00:17 -07:00
def test_3d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 3 , 5 , 5 )
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_cpu ' ,
' batchnorm_gpu ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
test_1d_batchnorm ( True , False )
test_1d_batchnorm ( False , False )
test_1d_batchnorm ( False , True )
test_1d_batchnorm ( True , True )
test_2d_batchnorm ( True , False )
test_2d_batchnorm ( False , False )
test_2d_batchnorm ( False , True )
test_2d_batchnorm ( True , True )
test_3d_batchnorm ( True , False )
test_3d_batchnorm ( False , False )
test_3d_batchnorm ( False , True )
test_3d_batchnorm ( True , True )
2016-07-05 11:29:40 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( 1234 )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2016-03-19 23:45:52 -07:00
def test_convolution_with_type ( ) :
2016-12-23 23:55:49 -08:00
sym1 = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , name = ' conv ' )
data = mx . sym . Variable ( ' conv_data ' )
w = mx . sym . Variable ( ' conv_weight ' )
b = mx . sym . Variable ( ' conv_bias ' )
w = mx . sym . transpose ( w , axes = ( 0 , 2 , 3 , 1 ) )
sym2 = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym2 = mx . sym . Convolution ( sym2 , w , b , layout = ' NHWC ' , num_filter = 3 , kernel = ( 3 , 3 ) )
sym2 = mx . sym . transpose ( sym2 , axes = ( 0 , 3 , 1 , 2 ) , name = ' conv ' )
sym = [ sym1 , sym1 , sym1 , sym1 , sym1 , sym2 , sym2 ]
2016-03-19 23:45:52 -07:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
2016-12-23 23:55:49 -08:00
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
# NHWC
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' conv_weight ' : ( 3 , 2 , 3 , 3 ) ,
' type_dict ' : { ' conv_data ' : np . float32 , ' conv_weight ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' conv_weight ' : ( 3 , 2 , 3 , 3 ) ,
' type_dict ' : { ' conv_data ' : np . float16 , ' conv_weight ' : np . float16 } }
]
2017-04-18 22:00:04 -07:00
# wider tolerance needed for true-fp16 NCHW test above
tol = { np . dtype ( np . float16 ) : 0.5 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
2017-04-21 21:15:00 -07:00
# test ability to turn off training on bias
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , grad_req = { ' conv_data ' : ' write ' , ' conv_weight ' : ' write ' , ' conv_bias ' : ' null ' } , rtol = tol , atol = tol )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
2017-04-18 22:00:04 -07:00
# Apply N symbols against each of M contexts, checking that all NxM combinations match.
def check_consistency_NxM ( sym_list , ctx_list ) :
# e.g. if sym_list=[sym1, sym2] and ctx_list=[ctx1, ctx2, ctx3], then resulting lists are:
# sym_list=[sym1, sym1, sym1, sym2, sym2, sym2] and ctx_list=[ctx1, ctx2, ctx3, ctx1, ctx2, ctx3]
2018-07-02 13:53:32 -07:00
check_consistency ( np . repeat ( sym_list , len ( ctx_list ) ) , ctx_list * len ( sym_list ) , scale = 0.5 )
2017-04-18 22:00:04 -07:00
2018-06-28 06:03:13 +02:00
2022-11-21 09:02:56 -08:00
@unittest.skip ( " test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/mxnet/issues/10141 " )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-12-23 23:55:49 -08:00
def test_convolution_options ( ) :
2017-03-17 12:42:11 -07:00
# 1D convolution
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 1 , ) , pad = ( 0 , ) , name = ' conv ' )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , ) , pad = ( 0 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-03-17 12:42:11 -07:00
# 2D convolution
2016-12-23 23:55:49 -08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 ) , pad = ( 0 , 0 ) , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 ) , pad = ( 0 , 0 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2016-12-23 23:55:49 -08:00
2017-03-17 12:42:11 -07:00
# 3D convolution
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
2016-12-23 23:55:49 -08:00
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , stride = ( 2 , 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , stride = ( 2 , 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 , 1 ) , pad = ( 0 , 0 , 0 ) , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 , 1 ) , pad = ( 0 , 0 , 0 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-03-17 12:42:11 -07:00
2019-02-23 18:56:30 -08:00
2019-03-13 12:17:05 -07:00
@with_seed ( )
def test_conv_deconv_guards ( ) :
# Test cases for convolution and deconvolution via strided fft. Ensure that the framework
# guards against problematic CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING in cuDNN [7.3.1,7.5)
# see https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_750.html#rel_750
for ( op , opname ) in [ ( mx . sym . Convolution , ' conv ' ) , ( mx . sym . Deconvolution , ' deconv ' ) ] :
dataname = opname + ' _data '
ctx = { ' ctx ' : mx . gpu ( 0 ) , dataname : ( 32 , 32 , 64 , 64 ) , ' type_dict ' : { dataname : np . float32 } }
test_cases = [
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 6 ) , ' pad ' : ( 0 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 6 ) , ' pad ' : ( 1 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 7 ) , ' pad ' : ( 0 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 6 ) , ' pad ' : ( 1 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 7 ) , ' pad ' : ( 0 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 7 ) , ' pad ' : ( 1 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ]
for test_case_args in test_cases :
try :
sym = op ( * * test_case_args )
sym_no_cudnn = op ( cudnn_off = True , * * test_case_args )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym_no_cudnn ] , [ ctx , ctx ] , scale = 0.1 )
2019-03-13 12:17:05 -07:00
except :
print ( ' Test failure of mx.sym. {} with args: {} ' . format ( op . __name__ , test_case_args ) )
raise
2019-02-23 18:56:30 -08:00
def _conv_with_num_streams ( seed ) :
with random_seed ( seed ) :
# Try to expose timing-dependent improper workspace sharing by parallel dgrad and wgrad
num_trials = 20
for _ in range ( num_trials ) :
size = np . random . randint ( 32 , 128 )
# The cudnn conv operator runs dgrad and wgrad in separate streams if enabled, with possible
# kernel overlap. The non-cudnn conv op doesn't do this so is used as the 'golden copy'.
ctx = { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , size , size ) ,
' type_dict ' : { ' conv_data ' : np . float32 } }
# Adding 'flip' here isolates the model from the input node (which can't use inplace store)
flipped = mx . sym . flip ( axis = 0 , name = ' conv ' )
sym = mx . sym . Convolution ( data = flipped , num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
flipped_no_cudnn = mx . sym . flip ( axis = 0 , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( data = flipped_no_cudnn , num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) ,
cudnn_off = True , name = ' conv ' )
try :
# tol can be pretty high- we're looking for a large diff due to garbaged workspace
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym_no_cudnn ] , [ ctx , ctx ] , rtol = 1e-2 , atol = 1e-2 )
2019-02-23 18:56:30 -08:00
except :
print ( ' Failing conv size = {} ' . format ( size ) )
raise
2020-02-06 22:10:30 -08:00
@unittest.skip ( " skipping for now due to severe flakiness " )
2019-02-23 18:56:30 -08:00
@with_seed ( )
def test_convolution_multiple_streams ( ) :
2020-09-17 21:57:59 -07:00
for num_streams in [ ' 1 ' , ' 2 ' ] :
2019-03-11 11:56:53 +08:00
for engine in [ ' NaiveEngine ' , ' ThreadedEngine ' , ' ThreadedEnginePerDevice ' ] :
2020-09-17 21:57:59 -07:00
print ( ' Starting engine {} with {} streams. ' . format ( engine , num_streams ) , file = sys . stderr )
2019-03-06 21:58:52 -08:00
run_in_spawned_process ( _conv_with_num_streams ,
2019-02-23 18:56:30 -08:00
{ ' MXNET_GPU_WORKER_NSTREAMS ' : num_streams , ' MXNET_ENGINE_TYPE ' : engine } )
2020-09-17 21:57:59 -07:00
print ( ' Finished engine {} with {} streams. ' . format ( engine , num_streams ) , file = sys . stderr )
2019-02-23 18:56:30 -08:00
2018-07-30 13:34:34 -07:00
# This test is designed to expose an issue with cudnn v7.1.4 algo find() when invoked with large c.
# Algos returned by find() can fail to run with grad_req='add' (wgrad kernel beta parameter == 1.0f).
@with_seed ( )
def test_convolution_large_c ( ) :
problematic_c = 64 * 1024
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
# The convolution accumulates many values, so scale the input magnitude.
scale = 0.1
2018-07-30 13:34:34 -07:00
def test_1D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , width ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , width ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ]
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 8 , kernel = ( 2 , ) , name = ' conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
def test_2D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , 2 , width ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , 2 , width ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ]
sym = mx . sym . Convolution ( layout = ' NCHW ' , num_filter = 4 , kernel = ( 2 , 2 ) , name = ' conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
# Then run models that must avoid cached cudnnFind() results in some cases (req == 'add').
widths = [ 4 , 16 , 64 ]
for req in [ ' write ' , ' add ' ] :
for width in widths :
test_1D_with_width ( width , req )
test_2D_with_width ( width , req )
# This test is designed to expose an issue with cudnn v7.1.4 algo find() when invoked with large c.
# Algos returned by find() can fail to run with grad_req='add' (wgrad kernel beta parameter == 1.0f).
@with_seed ( )
def test_deconvolution_large_c ( ) :
problematic_c = 64 * 1024
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
# The deconvolution accumulates many values, so scale the input magnitude.
scale = 0.1
2018-07-30 13:34:34 -07:00
def test_1D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , width ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , width ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ]
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = problematic_c , kernel = ( 2 , ) , name = ' deconv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
def test_2D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , 2 , width ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , 2 , width ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ]
sym = mx . sym . Deconvolution ( layout = ' NCHW ' , num_filter = problematic_c , kernel = ( 2 , 2 ) , name = ' deconv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
# Then run models that must avoid cached cudnnFind() results in some cases (req == 'add').
widths = [ 4 , 16 , 64 ]
for req in [ ' write ' , ' add ' ] :
for width in widths :
test_1D_with_width ( width , req )
test_2D_with_width ( width , req )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-03-17 12:42:11 -07:00
def test_convolution_versions ( ) :
# 2D convolution NCHW
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
conv_v1_cpu = mx . sym . Convolution_v1 ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
conv_v1_gpu = mx . sym . Convolution_v1 ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' conv ' )
conv_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
conv_cpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
conv_gpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' conv ' )
syms = [ conv_v1_cpu , conv_v1_gpu , conv_cudnn , conv_cpu , conv_gpu ]
check_consistency ( syms , ctx_list )
# 3D convolution NCDHW
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
conv_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
conv_cpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
conv_gpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , cudnn_off = True , name = ' conv ' )
syms = [ conv_cudnn , conv_cpu , conv_gpu ]
check_consistency ( syms , ctx_list )
2018-02-18 03:11:58 -08:00
2019-02-16 15:17:33 -08:00
# More max-pooling strides and pads to test cudnn pooling implementation code paths
2018-02-18 03:11:58 -08:00
@with_seed ( )
2019-02-16 15:17:33 -08:00
def test_pooling_nhwc_with_convention ( ) :
def make_pooling_syms ( * * kwargs ) :
# Conventional NCHW layout pooling
sym = mx . sym . Pooling ( * * kwargs )
# NHWC pooling
data = mx . sym . Variable ( ' pool_data ' )
sym_nhwc = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym_nhwc = mx . sym . Pooling ( sym_nhwc , layout = ' NHWC ' , * * kwargs )
sym_nhwc = mx . sym . transpose ( sym_nhwc , axes = ( 0 , 3 , 1 , 2 ) , name = ' pool ' )
return [ sym , sym_nhwc ]
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare nhwc and nchw results only within a given precision.
for in_shape in [ ( 3 , 4 , 8 , 8 ) , ( 2 , 2 , 20 , 20 ) ] :
for kernel in [ ( 2 , 2 ) , ( 3 , 3 ) , ( 4 , 4 ) ] :
for stride in [ ( 1 , 1 ) , ( 1 , 2 ) , ( 2 , 1 ) , ( 2 , 2 ) ] :
for data_type in [ np . float64 , np . float32 , np . float16 ] :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : in_shape ,
' type_dict ' : { ' pool_data ' : data_type } } ]
symlist = make_pooling_syms ( kernel = kernel , pool_type = ' max ' , stride = stride ,
pooling_convention = ' valid ' , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( kernel = kernel , pool_type = ' max ' , stride = stride ,
pooling_convention = ' full ' , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( kernel = ( 300 , 300 ) , pool_type = ' max ' ,
global_pool = True , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
2016-12-23 23:55:49 -08:00
def test_pooling_with_type ( ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float32 } } ]
sym = mx . sym . Pooling ( kernel = ( 3 , 3 ) , pool_type = ' max ' , pooling_convention = ' valid ' , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-12-23 23:55:49 -08:00
sym = mx . sym . Pooling ( kernel = ( 3 , 3 ) , pool_type = ' max ' , pooling_convention = ' full ' , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-12-23 23:55:49 -08:00
sym = mx . sym . Pooling ( kernel = ( 300 , 300 ) , pool_type = ' max ' , global_pool = True , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-03-19 23:45:52 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-09 01:32:07 +09:00
def test_deconvolution_with_type ( ) :
2018-01-02 10:47:41 -08:00
# Test basic deconvolution without exercising stride, pad or dilation.
# 1D deconvolution
sym = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , name = ' deconv ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
# wider tolerance needed for true-fp16 test above
tol = { np . dtype ( np . float16 ) : 0.3 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
check_consistency ( sym , ctx_list , rtol = tol , atol = tol , grad_req = " add " )
2018-01-02 10:47:41 -08:00
# 2D deconvolution
2016-06-10 11:31:17 +09:00
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , name = ' deconv ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# wider tolerance needed for true-fp16 test above
tol = { np . dtype ( np . float16 ) : 0.3 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
check_consistency ( sym , ctx_list , rtol = tol , atol = tol , grad_req = " add " )
2017-04-18 22:00:04 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-04-18 22:00:04 -07:00
def test_deconvolution_options ( ) :
2018-01-02 10:47:41 -08:00
# 1D deconvolution
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
# Pad > 0
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-04-18 22:00:04 -07:00
# 2D deconvolution
2018-11-29 17:24:18 -08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2018-01-02 10:47:41 -08:00
# # 3D deconvolution (not yet enabled)
2017-04-18 22:00:04 -07:00
# ctx_list = [{'ctx': mx.cpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.cpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.gpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.gpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float32}}]
# # Pad > 0
# sym = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), pad=(1,1,1), name='conv')
# sym_no_cudnn = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), pad=(1,1,1), cudnn_off=True, name='conv')
# check_consistency_NxM([sym, sym_no_cudnn], ctx_list)
# # Stride > 1
# sym = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), stride=(2,2,2), name='conv')
# sym_no_cudnn = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), stride=(2,2,2), cudnn_off=True, name='conv')
# check_consistency_NxM([sym, sym_no_cudnn], ctx_list)
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( 1234 )
2017-01-28 00:45:17 +08:00
def test_bilinear_sampler_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
grid = mx . sym . Variable ( ' grid ' )
sym = mx . sym . BilinearSampler ( data = data , grid = grid )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-01-28 00:45:17 +08:00
def test_grid_generator_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
sym = mx . sym . GridGenerator ( data = data , transform_type = ' affine ' , target_shape = ( 20 , 20 ) )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
scale = 1
2017-01-28 00:45:17 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 3 , 6 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 3 , 6 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = scale )
check_consistency ( sym , ctx_list , scale = scale , grad_req = " add " )
2017-01-28 00:45:17 +08:00
sym = mx . sym . GridGenerator ( data = data , transform_type = ' warp ' , target_shape = ( 20 , 20 ) )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 3 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 3 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-02-18 03:11:58 -08:00
2018-06-28 16:16:29 -07:00
@with_seed ( )
2017-08-12 08:08:36 +08:00
def test_spatial_transformer_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
loc = mx . sym . Flatten ( data )
loc = mx . sym . FullyConnected ( data = loc , num_hidden = 10 )
loc = mx . sym . Activation ( data = loc , act_type = ' relu ' )
loc = mx . sym . FullyConnected ( data = loc , num_hidden = 6 )
sym = mx . sym . SpatialTransformer ( data = data , loc = loc , target_shape = ( 10 , 10 ) ,
2018-09-14 14:25:38 -07:00
transform_type = " affine " , sampler_type = " bilinear " , cudnn_off = True )
2018-06-28 16:16:29 -07:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' type_dict ' : { ' data ' : np . float64 } } ]
2017-08-12 08:08:36 +08:00
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-09-14 14:25:38 -07:00
sym = mx . sym . SpatialTransformer ( data = data , loc = loc , target_shape = ( 10 , 10 ) ,
transform_type = " affine " , sampler_type = " bilinear " , cudnn_off = False )
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2017-03-17 12:42:11 -07:00
2018-08-12 12:43:19 -07:00
@with_seed ( )
def test_pooling_with_type2 ( ) :
2019-02-16 15:17:33 -08:00
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare cpu and gpu results only within a given precision.
for data_type in [ np . float64 , np . float32 , np . float16 ] :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ]
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency ( sym , ctx_list )
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' avg ' )
check_consistency ( sym , ctx_list )
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 5 , 5 ) , pad = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency ( sym , ctx_list )
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' sum ' )
check_consistency ( sym , ctx_list )
@with_seed ( )
def test_pooling_nhwc_with_type ( ) :
def make_pooling_syms ( * * kwargs ) :
# Conventional NCHW layout pooling
sym = mx . sym . Pooling ( * * kwargs )
# NHWC pooling
data = mx . sym . Variable ( ' pool_data ' )
sym_nhwc = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym_nhwc = mx . sym . Pooling ( sym_nhwc , layout = ' NHWC ' , * * kwargs )
sym_nhwc = mx . sym . transpose ( sym_nhwc , axes = ( 0 , 3 , 1 , 2 ) , name = ' pool ' )
return [ sym , sym_nhwc ]
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare nhwc and nchw results only within a given precision.
for data_type in [ np . float64 , np . float32 , np . float16 ] :
# NHWC pooling only enabled on GPU with CUDNN
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ]
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' avg ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 5 , 5 ) , pad = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency_NxM ( symlist , ctx_list )
2016-11-17 22:19:38 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-03-21 23:01:57 -07:00
def test_pooling_versions ( ) :
2019-02-16 15:17:33 -08:00
# Produce the name of the 'transposed' layout, given the dimension
def transposed_layout ( ndim ) :
if ndim < 3 or ndim > 5 :
raise RuntimeError ( " Invalid data dim, expecting 3, 4 or 5 " )
return ( ' NWC ' , ' NHWC ' , ' NDHWC ' ) [ ndim - 3 ]
# default padding is all zeros
def is_default_pad ( pad ) :
return pad == ( 0 , ) * len ( pad )
# default stride is all ones
def is_default_stride ( stride ) :
return stride == ( 1 , ) * len ( stride )
# returns True/False randomly with equal probability
def random_choice ( ) :
return np . random . random ( 1 ) [ 0 ] < 0.5
def test_pooling_versions_helper ( pool_op_list , data , kernel , pool_type , pad , stride ,
pooling_convention = ' valid ' , global_pool = False , p_value = 2 ,
count_include_pad = True , tol = None , dtype = np . float32 ) :
2017-03-21 23:01:57 -07:00
ctx_list = [ ]
sym_list = [ ]
2019-02-16 15:17:33 -08:00
for pool_ctx in pool_op_list :
( pool_op , ctx_type ) = pool_ctx . rsplit ( ' _ ' , 1 )
expected_ctxs = [ ' cpu ' , ' gpu ' , ' cudnn ' ]
if ctx_type not in expected_ctxs :
raise RuntimeError ( ' Expected one of {} , saw {} . ' . format ( expected_ctxs , ctx_type ) )
ctx = mx . cpu ( 0 ) if ctx_type == ' cpu ' else mx . gpu ( 0 )
ctx_list . append ( { ' ctx ' : ctx , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : dtype } } )
# start with pool args present in all cases
pool_op_args = { ' kernel ' : kernel , ' pool_type ' : pool_type ,
' pooling_convention ' : pooling_convention , ' name ' : ' pool ' }
# add other args as needed
if global_pool :
pool_op_args [ ' global_pool ' ] = True
2017-03-21 23:01:57 -07:00
else :
2019-02-16 15:17:33 -08:00
# Add pad and stride param if needed, plus randomly when it matches the default
if not is_default_pad ( pad ) or random_choice ( ) :
pool_op_args . update ( { ' pad ' : pad } )
if not is_default_stride ( stride ) or random_choice ( ) :
pool_op_args . update ( { ' stride ' : stride } )
expected_pool_ops = [ ' pool ' , ' pool_transposed ' , ' pool_v1 ' ]
if pool_op == ' pool_v1 ' :
sym = mx . sym . Pooling_v1 ( * * pool_op_args )
2017-03-21 23:01:57 -07:00
else :
2019-02-16 15:17:33 -08:00
pool_op_args . update ( { ' p_value ' : p_value , ' count_include_pad ' : count_include_pad } )
if ctx_type != ' cpu ' :
pool_op_args [ ' cudnn_off ' ] = ctx_type == ' gpu '
if pool_op == ' pool ' :
# isolate pooling input from symbol input to test shared tensor optimizations
buffered_input = mx . sym . identity ( name = ' pool ' )
sym = mx . sym . Pooling ( buffered_input , * * pool_op_args )
elif pool_op == ' pool_transposed ' :
ndim = len ( data )
# NCW->NWC axes=(0,2,1) NCHW->NHWC axes=(0,2,3,1) NCDHW->NDHWC axes=(0,2,3,4,1);
axes = ( 0 , ) + tuple ( range ( 2 , ndim ) ) + ( 1 , )
transposed = mx . sym . transpose ( axes = axes , name = ' pool ' )
pooled = mx . sym . Pooling ( data = transposed , layout = transposed_layout ( ndim ) ,
* * pool_op_args )
# NWC->NCW axes=(0,2,1) NHWC->NCHW axes=(0,3,1,2) NDHWC->NCDHW axes=(0,4,1,2,3);
axes = ( 0 , ndim - 1 ) + tuple ( range ( 1 , ndim - 1 ) )
sym = mx . sym . transpose ( data = pooled , axes = axes , name = ' pool ' )
else :
raise RuntimeError ( ' Expected one of {} , saw {} . ' . format ( expected_pool_ops ,
pool_op ) )
sym_list . append ( sym )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym_list , ctx_list , equal_nan = ( not count_include_pad ) , rtol = tol , atol = tol )
2017-03-21 23:01:57 -07:00
2019-02-16 15:17:33 -08:00
def test_pooling_dim ( dim , pool_type , dtype , pool_op_list , p_value = 2 , count_include_pad = True ,
tol = None ) :
if dim == ' 1D ' :
data = ( 3 , 3 , 10 )
kernels = [ ( 4 , ) , ( 4 , ) , ( 5 , ) ]
pads = [ ( 0 , ) , ( 2 , ) , ( 2 , ) ]
strides = [ ( 1 , ) , ( 2 , ) , ( 1 , ) ]
elif dim == ' 2D_no_padding ' :
data = ( 3 , 2 , 20 , 20 )
kernels = [ ( 3 , 3 ) , ( 4 , 5 ) ]
pads = [ ( 0 , 0 ) , ( 0 , 0 ) ]
strides = [ ( 1 , 1 ) , ( 2 , 1 ) ]
elif dim == ' 2D ' :
data = ( 2 , 2 , 20 , 20 )
kernels = [ ( 3 , 3 ) , ( 3 , 5 ) , ( 4 , 5 ) , ( 4 , 5 ) ]
pads = [ ( 0 , 0 ) , ( 1 , 2 ) , ( 0 , 0 ) , ( 2 , 3 ) ]
strides = [ ( 1 , 1 ) , ( 1 , 1 ) , ( 2 , 1 ) , ( 1 , 1 ) ]
elif dim == ' 3D ' :
data = ( 2 , 3 , 20 , 20 , 20 )
kernels = [ ( 4 , 5 , 3 ) , ( 4 , 5 , 3 ) , ( 3 , 5 , 7 ) ]
pads = [ ( 0 , 0 , 0 ) , ( 2 , 3 , 2 ) , ( 1 , 2 , 3 ) ]
strides = [ ( 1 , 1 , 1 ) , ( 2 , 3 , 1 ) , ( 1 , 1 , 1 ) ]
2018-05-18 10:49:43 -07:00
else :
2019-02-16 15:17:33 -08:00
raise RuntimeError ( ' Unexpected pooling test class: {} . ' . format ( dim ) )
for kernel , pad , stride in zip ( kernels , pads , strides ) :
for pooling_convention in [ ' valid ' , ' full ' ] :
try :
test_pooling_versions_helper ( pool_op_list = pool_op_list ,
data = data , kernel = kernel , pad = pad , stride = stride ,
pool_type = pool_type , pooling_convention = pooling_convention ,
global_pool = False , p_value = p_value ,
count_include_pad = count_include_pad , tol = tol , dtype = dtype )
except :
print ( ' pool_op_list = {} ' . format ( pool_op_list ) )
print ( ' kernel= {} , pad= {} , stride= {} ' . format ( kernel , pad , stride ) )
print ( ' pool_type= {} , pooling_convention= {} , global_pool=False ' . format ( pool_type ,
pooling_convention ) )
print ( ' p_value= {} , count_include_pad= {} , dtype= {} ' . format ( p_value ,
count_include_pad , dtype ) )
print ( ' environ = \n {} ' . format ( os . environ ) )
raise
# Make sure kernel is ignored during global_pool by sometimes setting it to a crazy value
kernel = kernels [ 0 ]
if random_choice ( ) :
kernel = ( 300 , ) * len ( kernel )
test_pooling_versions_helper ( pool_op_list = pool_op_list ,
data = data , kernel = kernel , pad = None , stride = None ,
pool_type = pool_type , global_pool = True , p_value = p_value ,
count_include_pad = count_include_pad , tol = tol , dtype = dtype )
# The various implementations of the standard pooling operator
std_pool_op_list = [ ' pool_cpu ' , ' pool_transposed_cpu ' ,
' pool_gpu ' , ' pool_transposed_gpu ' ,
' pool_cudnn ' , ' pool_transposed_cudnn ' ]
# The implementations of the 'v1' pooling operator
v1_pool_op_list = [ ' pool_v1_cpu ' , ' pool_v1_gpu ' ]
# For those cases when all implementations should match- the combined implementation list.
combo_pool_op_list = std_pool_op_list + v1_pool_op_list
for dtype in [ np . float32 , np . float64 , np . float16 ] :
# Testing of the standard (not 'v1') pooling operator is universal across all
# data dimensions, implementations and layouts.
for dim in [ ' 1D ' , ' 2D ' , ' 3D ' ] :
test_pooling_dim ( dim , ' max ' , dtype , std_pool_op_list )
test_pooling_dim ( dim , ' avg ' , dtype , std_pool_op_list , count_include_pad = True )
test_pooling_dim ( dim , ' avg ' , dtype , std_pool_op_list , count_include_pad = False )
test_pooling_dim ( dim , ' sum ' , dtype , std_pool_op_list )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 1 )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 2 )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 3 )
# Testing of the 'v1' pooling operator is over its restricted support domain of
# 2D data only and not with the 'lp' pooling type. The 'v1' cpu and gpu versions are
# always tested against each other, and sometimes against the standard operator versions.
# The slightly different 'v1' definition prevents this in the following cases:
#
# 1. In max pooling, when multiple input values are the maximum in the input window,
# the 'v1' implementation backprops the gradient to all maxima, whereas the standard
# pooling operator backprops the gradient to the lowest-indexed maximum only.
# 2. In max pooling, the 'v1' operator pads with 0's and this value can become the
# maximum output value in the case of an all-negative input. The standard pooling
# operator effectively considers the padding to be the largest negative value, so
# only input values should appear in the output.
# 3. In avg pooling, the 'v1' operator divides the sum by the same window size factor,
# even at the edges, and so does not support count_include_pad = False.
# 4. The float16 'v1' pooling operator performs forward sums and averages in
# float16, whereas the std operators perform those calculations in float32, so
# greater float16 tolerances are needed when comparing across implementations.
# Double the float16 tol when comparing v1 and non-v1 implemenations, per note 4 above.
relaxed_tol = { np . dtype ( np . float16 ) : 2e-1 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 ,
np . dtype ( np . int64 ) : 0 }
# Exclude std implementations due to points 1 and 2 above.
test_pooling_dim ( ' 2D ' , ' max ' , dtype , v1_pool_op_list )
# The standard and 'v1' implementations match for this case.
test_pooling_dim ( ' 2D ' , ' avg ' , dtype , combo_pool_op_list , count_include_pad = True ,
tol = relaxed_tol )
# Exclude std implementations due to point 3 above.
test_pooling_dim ( ' 2D ' , ' avg ' , dtype , v1_pool_op_list , count_include_pad = False )
# The standard and 'v1' implementations match for this case.
test_pooling_dim ( ' 2D ' , ' sum ' , dtype , combo_pool_op_list , tol = relaxed_tol )
# We can compare the standard and 'v1' max pooling implementations if we eliminate padding
# (see point 2 above) and use np.float64 data so that no two random input window values are
# likely to be the same (see point 1 above).
test_pooling_dim ( ' 2D_no_padding ' , ' max ' , np . float64 , combo_pool_op_list )
2017-03-21 23:01:57 -07:00
2018-11-17 09:43:05 +08:00
@with_seed ( )
def test_pooling_full_2d ( ) :
def test_pooling_full_2d_type ( pool_type ) :
data = ( 2 , 2 , 10 , 10 )
kernel = ( 4 , 5 )
pad = ( 1 , 2 )
stride = ( 3 , 4 )
convention = ' full '
ctx_list = [ ]
sym_list = [ ]
# o_h = ceil((10 + 1 + 1 - 4) / 3) + 1 = 4
# o_w = ceil((10 + 2 + 2 - 5) / 4) + 1 = 4
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
pooling_convention = convention , global_pool = False , name = ' pool ' ) )
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
pooling_convention = convention , global_pool = False , name = ' pool ' ) )
check_consistency ( sym_list , ctx_list )
test_pooling_full_2d_type ( ' max ' )
test_pooling_full_2d_type ( ' avg ' )
test_pooling_full_2d_type ( ' sum ' )
2019-07-08 10:07:37 +08:00
@with_seed ( )
def test_flatten_slice_after_conv ( ) :
ctx_list = [ ]
data = mx . sym . Variable ( ' conv_data ' )
conv = mx . symbol . Convolution ( data = data , name = ' conv ' , num_filter = 16 , kernel = ( 3 , 3 ) , stride = ( 1 , 1 ) )
flatten = mx . symbol . flatten ( data = conv )
slice_sym = mx . symbol . slice ( data = flatten , begin = 0 , end = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 16 , 16 , 16 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 16 , 16 , 16 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( slice_sym , ctx_list , scale = 0.5 )
2019-07-08 10:07:37 +08:00
2019-07-22 07:25:30 +08:00
@with_seed ( )
def test_bilinear_resize_op ( ) :
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 2 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 2 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
data = mx . sym . Variable ( ' data ' )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , height = 10 , width = 5 , align_corners = True )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , height = 10 , width = 5 , align_corners = False )
check_consistency ( sym , ctx_list )
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 2 , scale_width = 0.5 , mode = ' odd_scale ' , align_corners = True )
check_consistency ( sym , ctx_list )
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 2 , scale_width = 0.5 , mode = ' odd_scale ' , align_corners = False )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 0.5 , scale_width = 2 , mode = ' to_even_up ' , align_corners = True )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 0.5 , scale_width = 2 , mode = ' to_even_up ' , align_corners = False )
check_consistency ( sym , ctx_list )
2019-07-22 07:25:30 +08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-02-09 09:53:16 +08:00
def test_global_pooling ( ) :
2018-05-18 10:49:43 -07:00
def test_1d_pooling ( pool_type , p_value = 2 ) :
2018-02-09 09:53:16 +08:00
data = ( 2 , 3 , 20 )
kernel = ( 4 , )
pad = ( 2 , )
stride = ( 2 , )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list = [ ]
sym_list = [ ]
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
pooling_convention = ' valid '
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
check_consistency ( sym_list , ctx_list )
2018-03-30 17:57:39 -07:00
2018-05-18 10:49:43 -07:00
def test_2d_pooling ( pool_type , p_value = 2 ) :
2018-02-09 09:53:16 +08:00
data = ( 2 , 3 , 20 , 20 )
kernel = ( 4 , 4 )
pad = ( 2 , 2 )
stride = ( 2 , 2 )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list = [ ]
sym_list = [ ]
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
pooling_convention = ' valid '
2018-03-30 17:57:39 -07:00
2018-05-18 10:49:43 -07:00
if pool_type != ' lp ' :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling_v1 ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-05-18 10:49:43 -07:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling_v1 ( kernel = kernel , pool_type = pool_type ,
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-05-18 10:49:43 -07:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling_v1 ( pool_type = pool_type ,
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
check_consistency ( sym_list , ctx_list )
test_1d_pooling ( ' max ' )
test_1d_pooling ( ' avg ' )
test_1d_pooling ( ' sum ' )
2018-05-18 10:49:43 -07:00
test_1d_pooling ( ' lp ' , p_value = 1 )
test_1d_pooling ( ' lp ' , p_value = 2 )
test_1d_pooling ( ' lp ' , p_value = 3 )
2018-02-09 09:53:16 +08:00
test_2d_pooling ( ' max ' )
test_2d_pooling ( ' avg ' )
test_2d_pooling ( ' sum ' )
2018-05-18 10:49:43 -07:00
test_2d_pooling ( ' lp ' , p_value = 1 )
test_2d_pooling ( ' lp ' , p_value = 2 )
test_2d_pooling ( ' lp ' , p_value = 3 )
2018-02-09 09:53:16 +08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 11:31:17 +09:00
def test_upsampling_with_type ( ) :
2017-02-08 01:01:35 +08:00
sym = mx . sym . UpSampling ( scale = 2 , num_filter = 2 , name = ' up ' , sample_type = ' nearest ' , num_args = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-02-08 01:01:35 +08:00
def test_upsampling_bilinear_with_type ( ) :
sym = mx . sym . UpSampling ( scale = 2 , num_filter = 2 , name = ' up ' , sample_type = ' bilinear ' , num_args = 1 )
2017-02-09 01:24:53 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float32 } } ]
2016-06-10 11:31:17 +09:00
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 11:31:17 +09:00
def test_concat_with_type ( ) :
sym = mx . sym . Concat ( name = ' concat ' , num_args = 2 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float64 , ' concat_arg1 ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float32 , ' concat_arg1 ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float16 , ' concat_arg1 ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float64 , ' concat_arg1 ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float32 , ' concat_arg1 ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2016-06-09 01:32:07 +09:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-12 12:26:12 +09:00
def test_elementwisesum_with_type ( ) :
2017-05-11 21:36:16 -07:00
dev_types = [ [ mx . gpu ( 0 ) , [ np . float64 , np . float32 , np . float16 ] ] ,
[ mx . cpu ( 0 ) , [ np . float64 , np . float32 ] ] ]
for num_args in range ( 1 , 6 ) :
ews_arg_shape = { }
for i in range ( num_args ) :
ews_arg_shape [ ' ews_arg ' + str ( i ) ] = ( 2 , 10 )
sym = mx . sym . ElementWiseSum ( name = ' ews ' , num_args = num_args )
ctx_list = [ ]
for dev , types in dev_types :
for dtype in types :
ews_arg_dtype = { ' type_dict ' : { } }
for i in range ( num_args ) :
ews_arg_dtype [ ' type_dict ' ] [ ' ews_arg ' + str ( i ) ] = dtype
ctx_elem = { ' ctx ' : dev }
ctx_elem . update ( ews_arg_shape )
ctx_elem . update ( ews_arg_dtype )
ctx_list . append ( ctx_elem )
2016-06-12 12:26:12 +09:00
check_consistency ( sym , ctx_list )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 15:58:43 +09:00
def test_reshape_with_type ( ) :
sym = mx . sym . Reshape ( name = ' reshape ' , shape = ( - 1 , 1 , 1 , 0 ) )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 16:13:47 +09:00
def test_blockgrad_with_type ( ) :
sym = mx . sym . BlockGrad ( name = ' bg ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-11 10:46:25 +09:00
def test_swapaxis_with_type ( ) :
sym = mx . sym . SwapAxis ( name = ' swap ' , dim1 = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-03-19 23:45:52 -07:00
def test_fullyconnected_with_type ( ) :
sym = mx . sym . FullyConnected ( num_hidden = 3 , name = ' inner ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ]
2016-05-27 23:27:19 -07:00
check_consistency ( sym , ctx_list )
2017-08-17 21:16:51 -07:00
# Sizes are divisible by 8 to test TensorCore on Volta GPU.
sym = mx . sym . FullyConnected ( num_hidden = 8 , name = ' inner ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 16 , 24 ) , ' type_dict ' : { ' inner_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 16 , 24 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2016-03-19 23:45:52 -07:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-03-19 23:45:52 -07:00
def test_activation_with_type ( ) :
2018-05-08 15:10:36 -07:00
act_types = [ ' relu ' , ' sigmoid ' , ' tanh ' , ' softrelu ' , ' softsign ' ]
shape = ( 2 , 2 , 10 , 10 )
for act_type in act_types :
sym = mx . sym . Activation ( name = ' act ' , act_type = act_type )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float16 } } ]
check_consistency ( sym , ctx_list )
2015-10-24 15:57:42 -07:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-02-15 14:44:34 -08:00
def test_lrn ( ) :
sym = mx . sym . LRN ( alpha = 0.0001 , beta = 0.75 , knorm = 2 , nsize = 5 , name = ' lrn ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' lrn_data ' : ( 2 , 6 , 10 , 10 ) , ' type_dict ' : { ' lrn_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' lrn_data ' : ( 2 , 6 , 10 , 10 ) , ' type_dict ' : { ' lrn_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-24 11:13:26 +08:00
def test_embedding_with_type ( ) :
2017-04-13 22:54:27 -07:00
def test_embedding_helper ( data_types , weight_types , low_pad , high_pad ) :
NVD = [ [ 20 , 10 , 20 ] , [ 200 , 10 , 300 ] ]
for N , V , D in NVD :
sym = mx . sym . Embedding ( name = ' embedding ' , input_dim = V , output_dim = D )
ctx_list = [ ]
for data_type in data_types :
for weight_type in weight_types :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' embedding_data ' : ( N , ) ,
' type_dict ' : { ' embedding_data ' : data_type , ' embedding_weight ' : weight_type } } )
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' embedding_data ' : ( N , ) ,
' type_dict ' : { ' embedding_data ' : data_type , ' embedding_weight ' : weight_type } } )
arg_params = { ' embedding_data ' : np . random . randint ( low = - low_pad , high = V + high_pad , size = ( N , ) ) }
check_consistency ( sym , ctx_list , grad_req = { ' embedding_data ' : ' null ' , ' embedding_weight ' : ' write ' } ,
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
arg_params = arg_params , scale = 0.1 )
2017-04-13 22:54:27 -07:00
data_types = [ np . float16 , np . float32 , np . float64 , np . int32 ]
weight_types = [ np . float16 , np . float32 , np . float64 ]
test_embedding_helper ( data_types , weight_types , 5 , 5 )
data_types = [ np . uint8 ]
weight_types = [ np . float16 , np . float32 , np . float64 ]
test_embedding_helper ( data_types , weight_types , 0 , 5 )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-03-01 01:58:32 +08:00
def test_svmoutput_with_type ( ) :
sym = mx . sym . SVMOutput ( name = ' svmoutput ' , use_linear = True )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' svmoutput_data ' : ( 20 , 10 ) , ' type_dict ' : { ' svmoutput_data ' : np . float16 } } ]
2018-06-30 18:29:52 -07:00
check_consistency ( sym , ctx_list , use_uniform = True )
2017-03-01 01:58:32 +08:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-01-19 01:45:49 +08:00
def test_take_with_type ( ) :
sym = mx . sym . take ( name = ' take ' )
for data_ndim in range ( 2 , 5 ) :
for idx_ndim in range ( 1 , 4 ) :
data_shape = ( )
for _ in range ( data_ndim ) :
data_shape + = ( np . random . randint ( low = 3 , high = 6 ) , )
idx_shape = ( )
for _ in range ( idx_ndim ) :
2017-03-12 15:37:21 -07:00
idx_shape + = ( np . random . randint ( low = 3 , high = 5 ) , )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float64 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float64 } } ,
2017-03-12 15:37:21 -07:00
{ ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float32 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float32 } } ,
2017-03-12 15:37:21 -07:00
{ ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float16 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float16 } } ,
2017-03-12 15:37:21 -07:00
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float64 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float64 } } ,
2017-03-12 15:37:21 -07:00
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float32 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float32 } } ,
2017-03-12 15:37:21 -07:00
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float16 ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . float16 } } ]
2017-03-12 15:37:21 -07:00
arg_params = { ' take_indices ' : np . random . randint ( low = 0 ,
high = data_shape [ 0 ] ,
size = idx_shape ) ,
2017-01-19 01:45:49 +08:00
' take_a ' : np . random . normal ( size = data_shape ) }
2017-03-12 15:37:21 -07:00
check_consistency ( sym , ctx_list ,
2017-01-19 01:45:49 +08:00
grad_req = { ' take_indices ' : ' null ' ,
' take_a ' : ' write ' } ,
arg_params = arg_params )
2017-03-17 12:42:11 -07:00
2017-02-16 11:15:26 -08:00
def check_rnn_consistency ( cell1 , cell2 ) :
dshape = ( 32 , 5 , 200 )
data = mx . sym . Variable ( ' data ' )
sym1 , _ = cell1 . unroll ( 5 , data , merge_outputs = True )
mod1 = mx . mod . Module ( sym1 , label_names = None , context = mx . gpu ( 0 ) )
mod1 . bind ( data_shapes = [ ( ' data ' , dshape ) ] , label_shapes = None )
sym2 , _ = cell2 . unroll ( 5 , data , merge_outputs = True )
mod2 = mx . mod . Module ( sym2 , label_names = None , context = mx . gpu ( 0 ) )
mod2 . bind ( data_shapes = [ ( ' data ' , dshape ) ] , label_shapes = None )
mod1 . init_params ( )
args , auxs = mod1 . get_params ( )
args = cell1 . unpack_weights ( args )
args = cell2 . pack_weights ( args )
mod2 . set_params ( args , auxs )
batch = mx . io . DataBatch ( data = [ mx . random . uniform ( shape = dshape ) ] , label = [ ] )
2017-03-22 00:11:16 -07:00
mod1 . forward ( batch , is_train = False )
mod2 . forward ( batch , is_train = False )
2017-02-16 11:15:26 -08:00
2019-10-15 15:56:43 -07:00
mx . test_utils . assert_allclose ( mod1 . get_outputs ( ) [ 0 ] , mod2 . get_outputs ( ) [ 0 ] , rtol = 1e-2 , atol = 1e-4 )
2017-02-16 11:15:26 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-02-16 11:15:26 -08:00
def test_rnn ( ) :
fused = mx . rnn . FusedRNNCell ( 100 , num_layers = 2 , mode = ' rnn_relu ' , prefix = ' ' )
stack = mx . rnn . SequentialRNNCell ( )
stack . add ( mx . rnn . RNNCell ( 100 , activation = ' relu ' , prefix = ' l0_ ' ) )
stack . add ( mx . rnn . RNNCell ( 100 , activation = ' relu ' , prefix = ' l1_ ' ) )
check_rnn_consistency ( fused , stack )
check_rnn_consistency ( stack , fused )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-03-22 00:52:36 +01:00
def test_lstm_forget_bias ( ) :
forget_bias = 2.0
fused = mx . rnn . FusedRNNCell ( 10 , forget_bias = forget_bias , num_layers = 2 , mode = ' lstm ' , prefix = ' ' )
dshape = ( 32 , 1 , 20 )
data = mx . sym . Variable ( ' data ' )
sym , _ = fused . unroll ( 1 , data , merge_outputs = True )
mod = mx . mod . Module ( sym , label_names = None , context = mx . gpu ( 0 ) )
mod . bind ( data_shapes = [ ( ' data ' , dshape ) ] , label_shapes = None )
mod . init_params ( )
args , auxs = mod . get_params ( )
args = fused . unpack_weights ( args )
bias_name = next ( x for x in args if x . endswith ( ' f_bias ' ) )
expected_bias = forget_bias * np . ones ( 10 , )
2019-10-15 15:56:43 -07:00
mx . test_utils . assert_allclose ( args [ bias_name ] , expected_bias )
2017-03-22 00:52:36 +01:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-03-09 01:31:05 +01:00
def test_gru ( ) :
fused = mx . rnn . FusedRNNCell ( 100 , num_layers = 2 , mode = ' gru ' , prefix = ' ' )
stack = mx . rnn . SequentialRNNCell ( )
stack . add ( mx . rnn . GRUCell ( 100 , prefix = ' l0_ ' ) )
stack . add ( mx . rnn . GRUCell ( 100 , prefix = ' l1_ ' ) )
check_rnn_consistency ( fused , stack )
check_rnn_consistency ( stack , fused )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-03-12 15:37:21 -07:00
def test_bidirectional ( ) :
fused = mx . rnn . FusedRNNCell ( 100 , num_layers = 2 , mode = ' gru ' , prefix = ' ' ,
bidirectional = True )
stack = mx . rnn . SequentialRNNCell ( )
stack . add ( mx . rnn . BidirectionalCell (
mx . rnn . GRUCell ( 100 , prefix = ' l0_ ' ) ,
mx . rnn . GRUCell ( 100 , prefix = ' r0_ ' ) ,
output_prefix = ' bi_gru_0_ ' ) )
stack . add ( mx . rnn . BidirectionalCell (
mx . rnn . GRUCell ( 100 , prefix = ' l1_ ' ) ,
mx . rnn . GRUCell ( 100 , prefix = ' r1_ ' ) ,
output_prefix = ' bi_gru_1_ ' ) )
check_rnn_consistency ( fused , stack )
check_rnn_consistency ( stack , fused )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-03-13 09:34:21 -07:00
def test_unfuse ( ) :
for mode in [ ' rnn_tanh ' , ' rnn_relu ' , ' lstm ' , ' gru ' ] :
2017-03-22 00:11:16 -07:00
fused = mx . rnn . FusedRNNCell (
100 , num_layers = 2 , mode = mode ,
prefix = ' test_ %s ' % mode ,
bidirectional = True ,
dropout = 0.5 )
2017-03-13 09:34:21 -07:00
stack = fused . unfuse ( )
check_rnn_consistency ( fused , stack )
check_rnn_consistency ( stack , fused )
2017-03-09 01:31:05 +01:00
2018-02-18 03:11:58 -08:00
2018-08-07 18:33:55 -07:00
@with_seed ( )
2017-06-18 01:53:37 +08:00
def test_psroipooling_with_type ( ) :
arg_params = {
' psroipool_rois ' : np . array ( [ [ 0 , 10 , 22 , 161 , 173 ] , [ 0 , 20 , 15 , 154 , 160 ] ] ) }
# plain psroipooling
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . PSROIPooling ( spatial_scale = 0.0625 , output_dim = 2 , pooled_size = 3 , name = ' psroipool ' )
2017-06-18 01:53:37 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float64 , ' psroipool_rois ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float32 , ' psroipool_rois ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float16 , ' psroipool_rois ' : np . float16 } } ,
]
check_consistency ( sym , ctx_list , grad_req = { ' psroipool_data ' : ' write ' ,
' psroipool_rois ' : ' null ' } , arg_params = arg_params )
2018-02-18 03:11:58 -08:00
2018-08-12 12:26:16 -07:00
@with_seed ( )
2017-06-18 01:53:37 +08:00
def test_deformable_psroipooling_with_type ( ) :
2018-08-12 12:26:16 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 ,
np . dtype ( np . float16 ) : 1e-2 }
2017-06-18 01:53:37 +08:00
arg_params = {
' deformable_psroipool_rois ' : np . array ( [ [ 0 , 10 , 22 , 161 , 173 ] , [ 0 , 20 , 15 , 154 , 160 ] ] ) }
# deformable psroipooling
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformablePSROIPooling ( spatial_scale = 0.0625 , sample_per_part = 4 , group_size = 3 , pooled_size = 3 ,
2017-06-18 01:53:37 +08:00
output_dim = 2 , trans_std = 0.1 , no_trans = False , name = ' deformable_psroipool ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float64 , ' deformable_psroipool_rois ' : np . float64 ,
' deformable_psroipool_trans ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float32 , ' deformable_psroipool_rois ' : np . float32 ,
' deformable_psroipool_trans ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float16 , ' deformable_psroipool_rois ' : np . float16 ,
' deformable_psroipool_trans ' : np . float16 } } ,
2019-05-24 11:21:15 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float64 , ' deformable_psroipool_rois ' : np . float64 ,
' deformable_psroipool_trans ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float32 , ' deformable_psroipool_rois ' : np . float32 ,
' deformable_psroipool_trans ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float16 , ' deformable_psroipool_rois ' : np . float16 ,
' deformable_psroipool_trans ' : np . float16 } } ,
2017-06-18 01:53:37 +08:00
]
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol ,
2018-08-12 12:26:16 -07:00
grad_req = { ' deformable_psroipool_data ' : ' write ' ,
' deformable_psroipool_rois ' : ' null ' ,
' deformable_psroipool_trans ' : ' write ' } , arg_params = arg_params )
2017-06-18 01:53:37 +08:00
2018-02-18 03:11:58 -08:00
2018-08-12 12:26:16 -07:00
@with_seed ( )
2017-06-18 01:53:37 +08:00
def test_deformable_convolution_with_type ( ) :
2018-08-12 12:26:16 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 }
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformableConvolution ( num_filter = 3 , kernel = ( 3 , 3 ) , name = ' deformable_conv ' )
2017-06-18 01:53:37 +08:00
# since atomicAdd does not support fp16 (which deformable conv uses in backward), we do not test fp16 here
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2018-08-12 12:26:16 -07:00
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# test ability to turn off training on bias
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol ,
2018-08-12 12:26:16 -07:00
grad_req = { ' deformable_conv_data ' : ' write ' ,
' deformable_conv_offset ' : ' write ' ,
' deformable_conv_weight ' : ' write ' ,
' deformable_conv_bias ' : ' null ' } )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-06-18 01:53:37 +08:00
def test_deformable_convolution_options ( ) :
2018-07-06 18:48:57 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 }
2017-06-18 01:53:37 +08:00
# 2D convolution
2019-05-23 21:00:33 -05:00
# since atomicAdd does not support fp16 (which deformable conv uses in backward), we do not test fp16 here
2017-06-18 01:53:37 +08:00
# Pad > 0
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformableConvolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' deformable_conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Stride > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformableConvolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' deformable_conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Dilate > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformableConvolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' deformable_conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Deformable group > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2019-05-23 21:00:33 -05:00
sym = mx . sym . contrib . DeformableConvolution ( num_filter = 4 , kernel = ( 3 , 3 ) , num_deformable_group = 2 , name = ' deformable_conv ' )
[v1.x] Backport Unittest tolerance handling improvements (#18694). Also test seeding (#18762). (#19148)
* Add sm arch 80 to Makefile
* Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Remove pytest decorators introduced in error
* Fix test_forward.py:test_consistency
* Fix test_numpy_op.py tests
* Improve test seeding in test_numpy_interoperablity.py (#18762)
* Fix test_numpy_op.py:test_np_random_{beta,chisquare}
* Reduce problem sizes with test_optimizer.py:test_multilamb
* Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts
* Trigger CI
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-09-17 15:47:32 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2019-05-23 21:00:33 -05:00
2017-05-31 09:56:32 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2017-05-26 15:00:17 -07:00
def test_residual_fused ( ) :
cell = mx . rnn . ResidualCell (
mx . rnn . FusedRNNCell ( 50 , num_layers = 3 , mode = ' lstm ' ,
prefix = ' rnn_ ' , dropout = 0.5 ) )
inputs = [ mx . sym . Variable ( ' rnn_t %d _data ' % i ) for i in range ( 2 ) ]
outputs , _ = cell . unroll ( 2 , inputs , merge_outputs = None )
assert sorted ( cell . params . _params . keys ( ) ) == \
[ ' rnn_parameters ' ]
args , outs , auxs = outputs . infer_shape ( rnn_t0_data = ( 10 , 50 ) , rnn_t1_data = ( 10 , 50 ) )
assert outs == [ ( 10 , 2 , 50 ) ]
outputs = outputs . eval ( ctx = mx . gpu ( 0 ) ,
rnn_t0_data = mx . nd . ones ( ( 10 , 50 ) , ctx = mx . gpu ( 0 ) ) + 5 ,
rnn_t1_data = mx . nd . ones ( ( 10 , 50 ) , ctx = mx . gpu ( 0 ) ) + 5 ,
rnn_parameters = mx . nd . zeros ( ( 61200 , ) , ctx = mx . gpu ( 0 ) ) )
expected_outputs = np . ones ( ( 10 , 2 , 50 ) ) + 5
assert np . array_equal ( outputs [ 0 ] . asnumpy ( ) , expected_outputs )
2018-02-18 03:11:58 -08:00
2017-06-26 22:37:11 -07:00
def check_rnn_layer ( layer ) :
2017-07-10 09:20:03 -07:00
layer . collect_params ( ) . initialize ( ctx = [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] )
2017-06-26 22:37:11 -07:00
with mx . gpu ( 0 ) :
x = mx . nd . ones ( ( 10 , 16 , 30 ) )
states = layer . begin_state ( 16 )
go , gs = layer ( x , states )
2017-06-05 10:07:12 -07:00
2017-06-26 22:37:11 -07:00
with mx . cpu ( 0 ) :
x = mx . nd . ones ( ( 10 , 16 , 30 ) )
states = layer . begin_state ( 16 )
co , cs = layer ( x , states )
2017-06-05 10:07:12 -07:00
2018-02-18 03:11:58 -08:00
# atol of 1e-6 required, as exposed by seed 2124685726
2019-10-15 15:56:43 -07:00
assert_almost_equal ( go , co , rtol = 1e-2 , atol = 1e-6 )
2017-06-26 22:37:11 -07:00
for g , c in zip ( gs , cs ) :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( g , c , rtol = 1e-2 , atol = 1e-6 )
2017-06-05 10:07:12 -07:00
2018-03-09 21:46:34 -08:00
def check_rnn_layer_w_rand_inputs ( layer ) :
layer . collect_params ( ) . initialize ( ctx = [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] )
x = mx . nd . uniform ( shape = ( 10 , 16 , 30 ) )
with mx . gpu ( 0 ) :
x = x . copyto ( mx . gpu ( 0 ) )
states = layer . begin_state ( 16 )
go , gs = layer ( x , states )
with mx . cpu ( 0 ) :
x = x . copyto ( mx . cpu ( 0 ) )
states = layer . begin_state ( 16 )
co , cs = layer ( x , states )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( go , co , rtol = 1e-2 , atol = 1e-6 )
2018-03-09 21:46:34 -08:00
for g , c in zip ( gs , cs ) :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( g , c , rtol = 1e-2 , atol = 1e-6 )
2018-03-09 21:46:34 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-07-16 17:41:32 -07:00
def test_sequence_reverse ( ) :
check_sequence_reverse ( mx . gpu ( 0 ) )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-10-14 19:44:32 -07:00
def test_autograd_save_memory ( ) :
x = mx . nd . zeros ( ( 128 , 512 , 512 ) , ctx = mx . gpu ( 0 ) )
x . attach_grad ( )
2017-07-16 17:41:32 -07:00
2017-10-14 19:44:32 -07:00
with mx . autograd . record ( ) :
for i in range ( 200 ) :
x = x + 1
x . wait_to_read ( )
x . backward ( )
2017-08-15 12:24:35 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-09-26 12:10:26 -07:00
def test_cuda_rtc ( ) :
source = r '''
extern " C " __global__ void axpy(const float *x, float *y, float alpha) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
y[i] += alpha * x[i];
}
extern " C " __global__ void saxpy(const float *x, float *y, float alpha) {
extern __shared__ float smem[];
int i = threadIdx.x + blockIdx.x * blockDim.x;
smem[threadIdx.x] = x[i];
y[i] += alpha * smem[threadIdx.x];
}
'''
module = mx . rtc . CudaModule ( source )
axpy = module . get_kernel ( " axpy " , " const float *x, float *y, float alpha " )
x = mx . nd . ones ( ( 10 , ) , ctx = mx . gpu ( 0 ) )
y = mx . nd . zeros ( ( 10 , ) , ctx = mx . gpu ( 0 ) )
axpy . launch ( [ x , y , 3.0 ] , mx . gpu ( 0 ) , ( 1 , 1 , 1 ) , ( 10 , 1 , 1 ) )
assert ( y . asnumpy ( ) == 3 ) . all ( )
saxpy = module . get_kernel ( " saxpy " , " const float *x, float *y, float alpha " )
saxpy . launch ( [ x , y , 4.0 ] , mx . gpu ( 0 ) , ( 1 , 1 , 1 ) , ( 10 , 1 , 1 ) , 10 )
assert ( y . asnumpy ( ) == 7 ) . all ( )
saxpy . launch ( [ x , y , 5.0 ] , mx . gpu ( 0 ) , ( 2 , 1 , 1 ) , ( 5 , 1 , 1 ) , 5 )
assert ( y . asnumpy ( ) == 12 ) . all ( )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-10-02 11:48:51 -07:00
def test_cross_device_autograd ( ) :
x = mx . nd . random . uniform ( shape = ( 10 , ) )
x . attach_grad ( )
with mx . autograd . record ( ) :
y = mx . nd . tanh ( x )
y = y . copyto ( mx . gpu ( 0 ) )
y = mx . nd . tanh ( y )
y = y . copyto ( mx . cpu ( 0 ) )
y = mx . nd . tanh ( y )
y = y . copyto ( mx . gpu ( 0 ) )
y = y . copyto ( mx . gpu ( 0 ) )
y . backward ( )
2019-10-15 15:56:43 -07:00
dx = x . grad . copy ( )
2017-10-02 11:48:51 -07:00
x . grad [ : ] = 0
with mx . autograd . record ( ) :
y = x
for i in range ( 3 ) :
y = mx . nd . tanh ( y )
y . backward ( )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( dx , x . grad )
2017-10-02 11:48:51 -07:00
2018-03-21 06:56:55 +08:00
@with_seed ( )
def test_multi_proposal_op ( ) :
# paramters
feature_stride = 16
scales = ( 8 , 16 , 32 )
ratios = ( 0.5 , 1 , 2 )
rpn_pre_nms_top_n = 12000
rpn_post_nms_top_n = 2000
rpn_min_size = feature_stride
feat_len = ( 1000 + 15 ) / / 16
H , W = feat_len , feat_len
num_anchors = len ( scales ) * len ( ratios )
count_anchors = H * W * num_anchors
def get_new_data ( batch_size , ctx ) :
'''
cls_prob: (batch_size, 2 * num_anchors, H, W)
bbox_pred: (batch_size, 4 * num_anchors, H, W)
im_info: (batch_size, 3)
'''
dtype = np . float32
cls_prob = mx . nd . empty ( ( batch_size , 2 * num_anchors , H , W ) , dtype = dtype , ctx = ctx )
bbox_pred = mx . nd . empty ( ( batch_size , 4 * num_anchors , H , W ) , dtype = dtype , ctx = ctx )
im_info = mx . nd . empty ( ( batch_size , 3 ) , dtype = dtype , ctx = ctx )
cls = [ 1.0 * ( i + 1 ) / cls_prob . size for i in range ( cls_prob . size ) ]
np . random . shuffle ( cls )
cls_prob = mx . nd . reshape ( mx . nd . array ( cls , dtype = dtype , ctx = ctx ) , shape = cls_prob . shape )
bbox_pred = mx . nd . array ( np . random . randint ( - 2 , 3 , size = bbox_pred . shape ) , dtype = dtype , ctx = ctx )
for i in range ( batch_size ) :
im_size = np . random . randint ( 600 , feat_len * feature_stride , size = ( 2 , ) )
im_scale = np . random . randint ( 80 , 100 ) / 100.0
im_info [ i , : ] = [ im_size [ 0 ] , im_size [ 1 ] , im_scale ]
return cls_prob , bbox_pred , im_info
2018-07-02 17:28:52 -07:00
def check_proposal_consistency ( op , batch_size , with_nms = False ) :
2018-03-21 06:56:55 +08:00
'''
op is mx.nd.contrib.Proposal or mx.nd.contrib.MultiProposal
'''
cls_prob , bbox_pred , im_info = get_new_data ( batch_size , mx . cpu ( 0 ) )
rois_cpu , score_cpu = op (
2018-05-30 06:38:16 +08:00
cls_prob = cls_prob ,
2018-03-21 06:56:55 +08:00
bbox_pred = bbox_pred ,
im_info = im_info ,
feature_stride = feature_stride ,
scales = scales ,
ratios = ratios ,
rpn_pre_nms_top_n = rpn_pre_nms_top_n ,
rpn_post_nms_top_n = rpn_post_nms_top_n ,
2018-07-02 17:28:52 -07:00
threshold = 0.7 if with_nms else 1.0 ,
2018-03-21 06:56:55 +08:00
rpn_min_size = rpn_min_size , output_score = True )
gpu_ctx = mx . gpu ( 0 )
# copy data to gpu from cpu
cls_prob_gpu = cls_prob . as_in_context ( gpu_ctx )
bbox_pred_gpu = bbox_pred . as_in_context ( gpu_ctx )
im_info_gpu = im_info . as_in_context ( gpu_ctx )
rois_gpu , score_gpu = op (
2018-05-30 06:38:16 +08:00
cls_prob = cls_prob_gpu ,
2018-03-21 06:56:55 +08:00
bbox_pred = bbox_pred_gpu ,
im_info = im_info_gpu ,
feature_stride = feature_stride ,
scales = scales ,
ratios = ratios ,
rpn_pre_nms_top_n = rpn_pre_nms_top_n ,
rpn_post_nms_top_n = rpn_post_nms_top_n ,
2018-07-02 17:28:52 -07:00
threshold = 0.7 if with_nms else 1.0 ,
2018-03-21 06:56:55 +08:00
rpn_min_size = rpn_min_size , output_score = True )
rois_cpu_np = rois_cpu . asnumpy ( )
rois_gpu_np = rois_gpu . asnumpy ( )
score_cpu_np = score_cpu . asnumpy ( )
score_gpu_np = score_gpu . asnumpy ( )
2018-07-02 17:28:52 -07:00
if not with_nms :
assert_almost_equal ( score_cpu_np , score_gpu_np , atol = 1e-3 , rtol = 1e-3 )
assert_almost_equal ( rois_cpu_np , rois_gpu_np , atol = 1e-3 , rtol = 1e-3 )
else :
# no 100% gurantee with nms
assert ( np . sum ( np . abs ( score_cpu_np - score_gpu_np ) < 1e-3 ) > = 10 )
assert ( np . sum ( np . abs ( rois_cpu_np - rois_gpu_np ) < 1e-3 ) > = 40 )
2018-03-21 06:56:55 +08:00
check_proposal_consistency ( mx . nd . contrib . Proposal , 1 )
2018-07-02 17:28:52 -07:00
check_proposal_consistency ( mx . nd . contrib . MultiProposal , 5 )
check_proposal_consistency ( mx . nd . contrib . Proposal , 1 , with_nms = True )
check_proposal_consistency ( mx . nd . contrib . MultiProposal , 5 , with_nms = True )
2018-03-21 06:56:55 +08:00
2018-01-30 10:45:25 -08:00
# The following 2 functions launch 0-thread kernels, an error that should be caught and signaled.
def kernel_error_check_imperative ( ) :
2020-09-17 21:57:59 -07:00
with environment ( ' MXNET_ENGINE_TYPE ' , ' NaiveEngine ' ) :
with mx . np_shape ( active = True ) :
a = mx . nd . array ( [ 1 , 2 , 3 ] , ctx = mx . gpu ( 0 ) )
b = mx . nd . array ( [ ] , ctx = mx . gpu ( 0 ) )
c = ( a / b ) . asnumpy ( )
2018-01-30 10:45:25 -08:00
def kernel_error_check_symbolic ( ) :
2020-09-17 21:57:59 -07:00
with environment ( ' MXNET_ENGINE_TYPE ' , ' NaiveEngine ' ) :
with mx . np_shape ( active = True ) :
a = mx . sym . Variable ( ' a ' )
b = mx . sym . Variable ( ' b ' )
c = a / b
f = c . bind ( mx . gpu ( 0 ) , { ' a ' : mx . nd . array ( [ 1 , 2 , 3 ] , ctx = mx . gpu ( 0 ) ) ,
' b ' : mx . nd . array ( [ ] , ctx = mx . gpu ( 0 ) ) } )
f . forward ( )
g = f . outputs [ 0 ] . asnumpy ( )
2018-01-30 10:45:25 -08:00
2022-11-21 09:02:56 -08:00
@unittest.skip ( ' skippping temporarily, tracked by https://github.com/apache/mxnet/issues/20011 ' )
2018-01-30 10:45:25 -08:00
def test_kernel_error_checking ( ) :
# Running tests that may throw exceptions out of worker threads will stop CI testing
# if not run in a separate process (with its own address space for CUDA compatibility).
try :
mpctx = mp . get_context ( ' spawn ' )
except :
print ( ' SKIP: python %s . %s lacks the required process fork-exec support ... ' %
sys . version_info [ 0 : 2 ] , file = sys . stderr , end = ' ' )
else :
with discard_stderr ( ) :
for f in [ kernel_error_check_imperative , kernel_error_check_symbolic ] :
p = mpctx . Process ( target = f )
p . start ( )
p . join ( )
assert p . exitcode != 0 , \
" Expected a synchronous kernel error from %s (), none seen. " % f . __name__
2018-04-03 10:33:56 -07:00
def test_incorrect_gpu ( ) :
# Try setting dev_id to a really big number
assert_raises ( MXNetError , mx . nd . ones , ( 2 , 2 ) , ctx = mx . gpu ( 100001 ) )
2018-01-30 10:45:25 -08:00
2018-04-09 14:43:53 -07:00
@with_seed ( )
def test_batchnorm_backwards_notrain ( ) :
for ctx in [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] :
for cudnn_o in [ False , True ] :
B , C , H , W = 4 , 3 , 2 , 2
x = mx . nd . random . poisson ( 1 , shape = ( B , C , H , W ) ) . as_in_context ( ctx )
gamma = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
beta = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
mean = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
std = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
x . attach_grad ( )
with autograd . record ( False ) :
y = mx . ndarray . BatchNorm ( x , gamma , beta , mean , std . square ( ) ,
fix_gamma = False , cudnn_off = cudnn_o )
loss = y . square ( ) . sum ( )
loss . backward ( train_mode = False )
2018-05-06 13:57:15 -07:00
@with_seed ( )
def test_create_sparse_ndarray_gpu_to_cpu ( ) :
dim0 = 10
dim1 = 5
densities = [ 0 , 0.5 , 1 ]
for density in densities :
shape = rand_shape_2d ( dim0 , dim1 )
matrix = rand_ndarray ( shape , ' row_sparse ' , density )
data = matrix . data
indices = matrix . indices
rsp_created = mx . nd . sparse . row_sparse_array ( ( data , indices ) , shape = shape , ctx = mx . cpu ( ) )
assert rsp_created . stype == ' row_sparse '
assert same ( rsp_created . data . asnumpy ( ) , data . asnumpy ( ) )
assert same ( rsp_created . indices . asnumpy ( ) , indices . asnumpy ( ) )
rsp_copy = mx . nd . array ( rsp_created )
assert ( same ( rsp_copy . asnumpy ( ) , rsp_created . asnumpy ( ) ) )
2018-05-12 22:48:34 -07:00
@with_seed ( )
def test_softmax_activation ( ) :
gpu_a = mx . nd . array ( [ [ 3. , 0.5 , - 0.5 , 2. , 7. ] ,
[ 2. , - .4 , 7. , 3. , 0.2 ] ] , ctx = mx . gpu ( 0 ) )
cpu_a = mx . nd . array ( [ [ 3. , 0.5 , - 0.5 , 2. , 7. ] ,
[ 2. , - .4 , 7. , 3. , 0.2 ] ] , ctx = mx . cpu ( ) )
cpu_a . attach_grad ( )
gpu_a . attach_grad ( )
with mx . autograd . record ( ) :
gpu_y = mx . nd . SoftmaxActivation ( data = gpu_a )
cpu_y = mx . nd . SoftmaxActivation ( data = cpu_a )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( cpu_y , gpu_y , atol = 1e-3 , rtol = 1e-3 )
2018-05-12 22:48:34 -07:00
gpu_y . backward ( )
cpu_y . backward ( )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( cpu_a . grad , gpu_a . grad , atol = 1e-3 , rtol = 1e-3 )
2018-05-12 22:48:34 -07:00
2018-09-05 11:34:54 -07:00
@with_seed ( )
def test_bilinear_sampler_versions ( ) :
data = mx . sym . Variable ( ' data ' )
grid = mx . sym . Variable ( ' grid ' )
sym1 = mx . sym . BilinearSampler ( data = data , grid = grid )
sym2 = mx . sym . BilinearSampler ( data = data , grid = grid , cudnn_off = True )
sym3 = mx . sym . BilinearSampler ( data = data , grid = grid )
test_cases = [ [ ( 1 , 3 , 15 , 16 ) , ( 1 , 2 , 10 , 10 ) ] ,
[ ( 1 , 6 , 7 , 16 ) , ( 1 , 2 , 10 , 4 ) ] ,
[ ( 1 , 7 , 3 , 16 ) , ( 1 , 2 , 8 , 11 ) ] ,
[ ( 1 , 9 , 50 , 50 ) , ( 1 , 2 , 50 , 50 ) ] ]
for item in test_cases :
data_shape , grid_shape = item
# kWriteTo
exe_cpu = sym1 . simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = ' write ' )
exe_gpu = sym2 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' write ' )
exe_cudnn = sym3 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' write ' )
exe_list = [ exe_cpu , exe_gpu , exe_cudnn ]
ref_idx = 0
test_data = np . random . uniform ( low = - 0.1 , high = 0.1 , size = data_shape ) . astype ( np . float32 )
test_grid = np . random . uniform ( low = - 2 , high = 2 , size = grid_shape ) . astype ( np . float32 )
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . forward ( is_train = True )
2019-10-15 15:56:43 -07:00
mx . test_utils . assert_almost_equal ( exe_list [ ref_idx ] . outputs [ 0 ] , exe . outputs [ 0 ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
out_grad = np . random . uniform ( low = - 0.01 , high = 0.01 , size = data_shape [ : 2 ] + grid_shape [ 2 : ] ) . astype ( np . float32 )
for exe in exe_list :
exe . backward ( mx . nd . array ( out_grad ) )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
data_grad = exe_list [ ref_idx ] . grad_dict [ ' data ' ] . asnumpy ( )
grid_grad = exe_list [ ref_idx ] . grad_dict [ ' grid ' ] . asnumpy ( )
# kAddTo
exe_cpu_addto = sym1 . simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = ' add ' )
exe_gpu_addto = sym2 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' add ' )
exe_cudnn_addto = sym3 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' add ' )
exe_list = [ exe_cpu_addto , exe_gpu_addto , exe_cudnn_addto ]
data_initial_grad = np . random . normal ( size = exe_list [ ref_idx ] . grad_dict [ ' data ' ] . shape ) . astype ( np . float32 )
grid_initial_grad = np . random . normal ( size = exe_list [ ref_idx ] . grad_dict [ ' grid ' ] . shape ) . astype ( np . float32 )
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . grad_dict [ ' data ' ] [ : ] = data_initial_grad
exe . grad_dict [ ' grid ' ] [ : ] = grid_initial_grad
exe . forward ( is_train = True )
exe . backward ( mx . nd . array ( out_grad ) )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe_list [ ref_idx ] . grad_dict [ ' data ' ] , data_grad + data_initial_grad , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , grid_grad + grid_initial_grad , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
2018-09-11 10:20:41 -07:00
for req_dict in [ { ' data ' : ' null ' , ' grid ' : ' write ' } , { ' data ' : ' write ' , ' grid ' : ' null ' } ] :
# Mixture of kWriteTo and kNullOp
exe_cpu_mix = sym1 . simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = req_dict )
exe_gpu_mix = sym2 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = req_dict )
exe_cudnn_mix = sym3 . simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = req_dict )
exe_list = [ exe_cpu_mix , exe_gpu_mix , exe_cudnn_mix ]
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . forward ( is_train = True )
exe . backward ( mx . nd . array ( out_grad ) )
if req_dict [ ' data ' ] is ' write ' :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-11 10:20:41 -07:00
if req_dict [ ' grid ' ] is ' write ' :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-11 10:20:41 -07:00
2018-09-05 11:34:54 -07:00
2019-03-12 13:25:12 -07:00
# isolated execution bulking test function to be invoked with different env var settings
def _test_bulking_in_process ( seed , time_per_iteration ) :
2019-03-06 21:58:52 -08:00
data_shape = ( 10 , )
num_ops = 1000
num_iterations = 20
2019-03-12 13:25:12 -07:00
ctx = default_context ( )
# build symbol
X = mx . sym . Variable ( ' X ' )
sym = mx . sym . flip ( X , axis = 0 )
for _ in range ( num_ops - 1 ) :
sym = mx . sym . flip ( sym , axis = 0 )
x = mx . ndarray . zeros ( data_shape )
dx = mx . ndarray . zeros ( data_shape )
dy = mx . ndarray . ones ( data_shape )
exe = sym . bind ( ctx = ctx , args = [ x ] , args_grad = { ' X ' : dx } )
# time a number of forward() and backward() executions after some warm-up iterations
warmups = 1
for i in range ( num_iterations + warmups ) :
if i == warmups :
start = time . time ( )
exe . forward ( is_train = True )
exe . backward ( dy )
dx . wait_to_read ( )
time_per_iteration . value = ( time . time ( ) - start ) / num_iterations
2019-10-15 15:56:43 -07:00
2019-03-12 13:25:12 -07:00
@with_seed ( )
2022-11-21 09:02:56 -08:00
@unittest.skip ( ' skippping temporarily, tracked by https://github.com/apache/mxnet/issues/16517 ' )
2019-10-15 15:56:43 -07:00
def test_bulking_operator_gpu ( ) :
_test_bulking ( _test_bulking_in_process )
2022-11-21 09:02:56 -08:00
@unittest.skip ( ' skippping temporarily, tracked by https://github.com/apache/mxnet/issues/14970 ' )
2019-03-12 13:25:12 -07:00
def test_bulking ( ) :
2019-03-06 21:58:52 -08:00
# test case format: (max_fwd_segment_size, max_bwd_segment_size, enable_bulking_in_training)
test_cases = [ ( 0 , 0 , True ) , ( 1 , 1 , True ) , ( 15 , 15 , False ) , ( 15 , 0 , True ) , ( 0 , 15 , True ) , ( 15 , 15 , True ) ]
times = { }
times_str = ' '
for seg_sizes in test_cases :
2019-03-12 13:25:12 -07:00
# Create shared variable to return measured time from test process
time_per_iteration = mp . Manager ( ) . Value ( ' d ' , 0.0 )
if not run_in_spawned_process ( _test_bulking_in_process ,
2020-09-17 21:57:59 -07:00
{ ' MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_FWD ' : str ( seg_sizes [ 0 ] ) ,
' MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_BWD ' : str ( seg_sizes [ 1 ] ) ,
' MXNET_EXEC_BULK_EXEC_TRAIN ' : str ( seg_sizes [ 2 ] ) } ,
2019-03-12 13:25:12 -07:00
time_per_iteration ) :
# skip test since the python version can't run it properly. Warning msg was logged.
return
times [ seg_sizes ] = time_per_iteration . value
times_str + = \
2019-03-06 21:58:52 -08:00
' \n runtime of (fwd,bwd,enable) op seg setting ( {} , {} , {} ) = \t {:.1f} msec ' . format (
seg_sizes [ 0 ] , seg_sizes [ 1 ] , seg_sizes [ 2 ] , 1000.0 * times [ seg_sizes ] )
fastest_non_bulked_time = min ( times [ ( 0 , 0 , True ) ] , times [ ( 1 , 1 , True ) ] , times [ ( 15 , 15 , False ) ] )
slowest_half_bulked_time = max ( times [ ( 0 , 15 , True ) ] , times [ ( 15 , 0 , True ) ] )
fastest_half_bulked_time = min ( times [ ( 0 , 15 , True ) ] , times [ ( 15 , 0 , True ) ] )
fully_bulked_time = times [ ( 15 , 15 , True ) ]
print ( times_str )
# Non-bulked times[0,0,True], times[1,1,True] and times[15,15,False] should be about the same,
# slower than both half-bulked times[0,15,True] and times[15,0,True]
2019-03-12 13:25:12 -07:00
assert slowest_half_bulked_time < fastest_non_bulked_time , \
' A half-bulked exec time is slower than the non-bulked time by {} secs! {} ' \
2019-03-06 21:58:52 -08:00
. format ( slowest_half_bulked_time - fastest_non_bulked_time , times_str )
# The fully bulked times[15,15,True] should be faster than both half-bulked runs
2019-03-12 13:25:12 -07:00
assert fully_bulked_time < fastest_half_bulked_time , \
' The fully-bulked exec time is slower than a half-bulked time by {} secs! {} ' \
2019-03-06 21:58:52 -08:00
. format ( fully_bulked_time - fastest_half_bulked_time , times_str )
2019-10-15 15:56:43 -07:00
@with_seed ( )
def test_allclose_function_gpu ( ) :
allclose_function ( [ mx . cpu ( ) , mx . gpu ( 0 ) ] )
2018-05-15 20:39:02 +02:00
def test_context_num_gpus ( ) :
# Test that num_gpus reports at least one GPU, as the test is run on a GPU host.
assert mx . context . num_gpus ( ) > 0
2018-05-12 22:48:34 -07:00
2019-05-23 07:17:38 +08:00
def math_log ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . log ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . log ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def math_erf ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . erf ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . erf ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def math_square ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . square ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . square ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def run_math ( op , shape , dtype = " float32 " , check_value = True ) :
run_num = 10
for i in range ( run_num ) :
if op == ' log ' :
math_log ( shape = shape , dtype = dtype , check_value = check_value )
elif op == ' erf ' :
math_erf ( shape = shape , dtype = dtype , check_value = check_value )
elif op == ' square ' :
math_square ( shape = shape , dtype = dtype , check_value = check_value )
@with_seed ( )
def test_math ( ) :
ops = [ ' log ' , ' erf ' , ' square ' ]
check_value = True
shape_lst = [ [ 1000 ] , [ 100 , 1000 ] , [ 10 , 100 , 100 ] , [ 10 , 100 , 100 , 100 ] ]
dtypes = [ " float32 " , " float64 " ]
for shape in shape_lst :
for dtype in dtypes :
for op in ops :
run_math ( op , shape , dtype , check_value = check_value )
2019-08-26 07:37:39 +08:00
@with_seed ( )
def test_arange_like_dtype ( ) :
dtypes = [ np . float16 , np . float32 , np . float64 ]
for t in dtypes :
x = mx . sym . Variable ( ' x ' , dtype = t )
y = mx . sym . reshape ( x , shape = ( 0 , 0 , - 1 ) )
z = mx . sym . contrib . arange_like ( y , axis = - 1 )
2019-11-06 21:00:43 -05:00
2019-08-26 07:37:39 +08:00
mod = z . simple_bind ( ctx = mx . gpu ( 0 ) , x = ( 3 , 4 , 5 , 6 ) , grad_req = ' null ' )
mod . arg_arrays [ 0 ] [ : ] = np . random . normal ( size = mod . arg_arrays [ 0 ] . shape ) . astype ( t )
out = mod . forward ( is_train = False )
for v in out :
assert v . dtype == t
2019-09-23 10:13:42 +08:00
2020-09-02 18:47:01 -07:00
def test_fp16_spmm ( ) :
inp = mxsps . csr_matrix ( sps . coo_matrix ( ( [ 2.0 ] , ( [ 150 ] , [ 100000 ] ) ) ) . tocsr ( ) )
inp = inp . astype ( ' float16 ' , copy = False )
weight = mx . nd . random . randn ( 100001 , 151 )
weight = weight . astype ( ' float16 ' , copy = False )
out = mxsps . dot ( inp , weight )
out_np = mx . nd . dot ( inp , weight )
assert_almost_equal ( out . asnumpy ( ) , out_np , rtol = 1e-3 , atol = 1e-5 )
2017-05-31 09:56:32 -07:00
if __name__ == ' __main__ ' :
import nose
nose . runmodule ( )
2020-09-02 18:47:01 -07:00