2017-08-08 16:36:23 -07:00
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
2018-01-30 10:45:25 -08:00
from __future__ import print_function
2017-04-03 15:18:41 -07:00
import sys
2016-03-19 23:45:52 -07:00
import os
2017-06-26 22:37:11 -07:00
import time
2018-01-30 10:45:25 -08:00
import multiprocessing as mp
2017-06-26 22:37:11 -07:00
import mxnet as mx
import numpy as np
2020-04-22 23:53:12 -07:00
import pytest
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
import itertools
2020-08-28 14:53:11 -07:00
import scipy . sparse as sps
import mxnet . ndarray . sparse as mxsps
2019-10-15 15:56:43 -07:00
from mxnet . test_utils import check_consistency , set_default_context , assert_almost_equal , assert_allclose
2020-06-20 14:49:58 -07:00
from mxnet . test_utils import check_symbolic_forward , check_symbolic_backward , discard_stderr
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
from mxnet . test_utils import default_context , rand_shape_2d , rand_ndarray , same , environment
2018-04-03 10:33:56 -07:00
from mxnet . base import MXNetError
2018-04-09 14:43:53 -07:00
from mxnet import autograd
2017-06-26 22:37:11 -07:00
2016-03-19 23:45:52 -07:00
curr_path = os . path . dirname ( os . path . abspath ( os . path . expanduser ( __file__ ) ) )
sys . path . insert ( 0 , os . path . join ( curr_path , ' ../unittest ' ) )
2020-04-22 23:53:12 -07:00
from common import setup_module , with_seed , teardown_module , assert_raises_cudnn_not_satisfied , assert_raises_cuda_not_satisfied
2019-03-06 21:58:52 -08:00
from common import run_in_spawned_process
2020-06-20 14:49:58 -07:00
from test_operator import check_sequence_reverse , allclose_function
2015-10-24 15:57:42 -07:00
from test_operator import *
Numpy-compatible Infra (#15581)
* [Do not review] [Do not merge] New numpy-compatible sum (#14739)
* Add numpy namespace and initial impl of np.sum (not complete)
* Clean up
* Fix import error
* numpy sum
* add test and backward data type support
* add license to test_numpy_op.py
* improve test to reduce flakiness
* fix sanity build
* extra numeric test and imperative test
* add error message for initial argument
* [numpy] Infra for supporting numpy ops in imperative mode and Gluon APIs (#14758)
* Infra of new ndarray and symbol types for numpy operators
* Rename
* Fix import problem
* Refactor
* Remove redundant code
* Add docstring
* More on numpy ndarray and symbol
* Override unimplemented methdos for ndarray and _NumpySymbol
* Fix built-in methods of ndarray and _NumpySymbol
* Fix test and sanity check
* Fix pylint
* Address cr comments
* Add unit tests for ndarray and _NumpySymbol
* Add _true_divide
* Fix gpu build
* Add future import division
* More correct way of checking if an output is from a np compat op
* Fix gpu build
* Fix output ndarray/symbol types with at least one new ndarray/symbol
* Modify true_divide doc
* Fix flaky copying zero-size arrays via gpus
* Fix zero size in gluon hybridize and zeros/ones symbol not creating new symbol type
* Fix doc
* Enable np op compat check with name prefix (#14897)
* [numpy] Numpy dot (#14831)
* Numpy Dot case 1-4 + case 3.5 forward and 0.5 backward
* Backward computation and test coverage
* numpy-compatible mean (#14859)
* [numpy] Some np ops for d2l (#14924)
* Add np transpose
More ops and namespaces for submodules
Add relu and sigmoid
Add reshape
Fix symbolic name mismatch
Add maximum and minimum
* Add convenience fluent method
* Add ndarray.item()
* Fix CI
* Fix lint
* Fix lint
* Fix reshape gpu
* Add example
* Remove python notebook outputs
* Remove notebook output
* Add one more example
* [numpy] Refactor np modules (#14989)
* Refactor
* Initial refactoring
* Fix notebook
* Move numpy op check from backend to frontend
* Add homogeneous ndarray check
* Fix grouping inhomogeneous types of symbols
* Improve error handling of different types of symbols as outputs
* Fix test
* Fix numpy test
* Fix ci
* Try to fix gpu ci failure
* [numpy] Refactor np module (example runs through) (#15055)
* Refactor notebook
* notebook working with hybrid block
* More refactoring
* Remove unnecessary use_np_compat
* Use class decorator to initialize numpy ndarrays in parameter.py
* Clear notebook outputs
* Improve np decorator
* Remove npe op from optimizer
* Fix CI
* Fix functools.wraps issue in Python2
* Fix ci
* Change np_compat to np_shape
* Temporarily disable test_amp
* Numpy-compatible stack (#15027)
* numpy stack
* migrate to use_np_shape
* Numpy Unary Ops (#15010)
* Unary Ops
* new version of unit tests
* [numpy] Fix np branch after rebase (#15086)
* Add np_array semantics for Gluon
Fix notebook
Fix sanity
Fix gluon deferred infer shape
Add np.random.uniform
Add random normal
Add boolean comparison ops
Add np.ndarray indexing
Reformat test ndarray indexing
Fix unit tests
Add one more test of indexing
Fix sanity
Enable amp test
Add np.arange
Revert cython unit test to ctypes
Delete unnecessary use_np_shape decorator from test
Rebase with numpy branch
support range as index
Fix python2 range type check
Add argmax
Disable clojure test
* Fix ci
* Add np.linalg.norm for ord='fro'
* Fix pylint
* numpy concatenate (#15104)
* [WIP][numpy] Fix for D2L Chapters 2/3/4 (#15139)
* Fix
* Fix linear regression gluon
* More fix
* Fix pylint
* Fix for chapter 4
* Add np.add mul div mod pow sub and shuffle
* Fix model selection, underfitting, overfitting
* Fix weight decay
* Fix dropout
* Fix
* Fix chapter 4
* [numpy] Fix d2l performance regression (#15173)
* Add np array adapter decorator for layers
* Fix performance regression caused by too many conversions between nd.NDArray and np.ndarray
* Fix pylint
* Fix test backward compatibility issue
* Fix test_lambda
* Fix (#15188)
* fix for chapter6 conv nn (#15224)
* [numpy] Fix d2l chapter8 (#15237)
* Add np op doc
* Fix several issues
* Add a N-D dot b 2D support
* Simplify array creation api
* Add swapaxes
* Fix rnn gluon
* More fix
* Fix pylint
* Delete
* Fix mp windows
* fix for ch11 (#15244)
* Numpy-compatible split (#15049)
* numpy split
* numpy split
* unit test
* unit test
* [numpy] [DO NOT MERGE] Fix d2l chapters 9 and 13 (#15246)
* Add npx batch_dot and topk
* Text embedding uses numpy
* Fix SoftmaxCrossEntropyLoss with np
* Fix sentiment cnn
* Fix pylint
* Fix dot attention
* Fix seq2seq attention
* Add np.tile
* Fix transformer
* Fix ci
* Fix ci and rebase
* [numpy] Fix d2l chapter 5 (#15264)
* Fix parameter initializer
* Add np.save and np.load
* Fix read-write
* Fix lint
* Numpy compatible max (#15161)
* numpy amax
* weird cu file diff
* fix the unit test error
* fix gpu bug
* minor fix
* fix lint
* remove scalar value check
* fix the bug on unit test
* fix the case () that breaks the kernel launch
* add zero dimension unit test
* revert the tuple change
* use mshadow maximum
* remove test zero
* change the macro for now
* change the cuda to use mashadow op
* fix the broadcast_reduce_op_value.cu wrong kernel
* add more logic in shape to detect the invalid situation
* change back to type swtich
* change to as_nd_ndarray
* add missing @npx.use_np_shape
* retrigger CI
* address the comment
* undo algorithm import
* remove the numeric gradient check
* Numpy compatible multinomial (#15219)
* draft of multinomial
* rename to more concise name
* finish shape
* complete the forward function
* complete forward without handle 0 dimension & scalar
* handle 0 dimension
* add new line
* fix lint
* fix the build error
* fix lint
* finish unit test
* change the registration
* make multinomial support pvals as mx.ndarray
* delete newline
* fix lint error
* support input as list, mx.ndarray, np.ndarray & unit test
* fix lint
* fix the include error
* fix lint
* refactor & pass the tensor instead of tuple to kernel
* fix lint
* updata the doc
* address the comment
* Numpy compatible linspace (#15256)
* draft
* finish linspace implementation
* finish linspace
* delete newline
* fix pylint
* add more unit test
* address comment
* add more test case
* disable too-many-arguments
* resolve confliction
* add ctx
* numpy-compatible cumsum (#15309)
* [numpy] Misc fix for other chapters (#15332)
* Add np.prod
* Fix ndarray.reshape accepting positional integers as arguments
* Rebase
* Fix rebase error
* Add np.ndarray.flatten
* Fix
* Add broadcast_to
* Add meshgrid and broadcast_arrays
* Fix sin, cos, sinh, cosh not supporting scalars
* Add more unary ops supporting python scalars
* Fix
* Fix
* Fix ci
* Fix sanity
* [numpy] Change d2l chapters cv and gan to use numpy (#15368)
* Change op name style to lower case underscore
* Add ops under image to npx
* Add image submodule to npx
* Fix split_and_load use np
* Fix fine tuning
* Fix bbox and anchor
* Fix odd
* Fix ssd and rcnn
* Remove restriction on binary element-wise scalar
* Fix gan
* Fix sanity
* Try to fix website build failure
* Add npx.random.seed
* Fix doc
* add doc for multinomial, dot, cumsum, clip, abs, exp, arctan (#15386)
* [numpy] Fix several places in numpy (#15398)
* Fix
* More fix
* [numpy] fix cython (#15418)
* add cython support for numpy
* stay with original API for backward compatibility
* fix after rebase
* get rid of coverage in clang60 mkldnn
* fix lint issues
* fix flaky test and get rid of extra print
* remove numpy examples
* revert #15309 #15256 #15219 #15161
* remove numpy docs
* remove changes to contrib/text/embedding.py
* remove numpy changes to gluon peripherals
* Revert "remove numpy docs"
This reverts commit c104695b28a26738b8700d80c70814e0f583ac55.
* get rid of most operators
* Revert "get rid of coverage in clang60 mkldnn"
This reverts commit 77dc90520b6a2282716ba41987a1f37522daf078.
* remove np-compatible from mxnet.image mxnet.initializer
* address comments
2019-08-07 19:54:02 -07:00
from test_numpy_ndarray import *
2019-08-08 20:30:50 -07:00
from test_numpy_op import *
2019-09-04 16:36:50 -07:00
from test_numpy_interoperability import *
Gluon.probability (#18403)
* package created
* mvn WIP
* normal wip, to be tested
* update
* docstring added, normal mostly done
* add test file
* Bernoulli WIP
* bernoulli wip
* bernoulli doc done
* dense variational WIP
* add kl infra
* implement normal kl method
* refactor kl
* add not implemented handling, rename kl_storage
* add abstract method and Categorical class
* rewrite logit2prob prob2logit for multiclass support
* normal broadcast_to implemented
* categorical mostly done
* update distributions/utils.py
* add dot ahead of import
* fix normal F
* bernoulli, normal brief tests implemented
* add hybridize tests
* transformation infras done
* affine transformation, implemented tested
* add tests cases
* add sum_right_most
* fix get F bug
* compose transform implemented, tested
* fix
* add event_dim
* fetch mvn from upstremm
* clean code, implement normal cdf and tests
* constraint in bernoulli done
* fix constraint
* finish half normal
* add cached_property
* add test on cached_property
* add more features to distribution and constratins
* change constraint
* fix bernoulli
* add independent
* add independent tests
* update naming of cached_property
* revert
* add constraints
* add Cat
* add Stack for imperative mode
* add Stack for imperative mode
* add bernoulli entropy
* categorical WIP
* categorical sampling implemented
* finish categorical log_prob, sampling
* enumerate_support finished
* polish StochasticBlock, add test
* add test for stochastic sequential
* clean loss list in __call__
* fix affine, implement sigmoid, softmax
* add gumbel, relaxed bernoulli
* relaxed one-hot sampling implemented
* gamma done
* gamma, dirichlet implemented
* beta done
* gumbel softmax log-likelihood implemented
* refactor tests, implement exponential, fix compose transform
* weibull implemented, transformed distribution cdf icdf added
* pareto implemented
* uniform wip
* uniform done
* rewrite lgamma, implement chi2
* fix chi2 scale
* F distributiion done
* t implemented
* fix tiny problem
* cauchy done
* add half cauchy
* multinomial done, tests to be added
* add multinomial test
* MVN done, tests todo
* mvn polished
* fix a few precison issues
* add erf, erfinv unified api and learnable transform
* fix mvn attribute check
* MVN done
* poisson done
* hack poisson for size support
* geometric finished
* negative binomial done
* binomial done
* implement some kl
* add more kl
* refactor kl test
* add more kl
* binomial kl todo
* change constraint logical op implement
* implement gamma entropy
* finish beta dirchlet entropy
* finishi all entropy
* kl finished
* add constraint test
* domain map done
* remove bayesian dense
* fix tiny problems
* add kl uniform normal
* add kl tests
* acquire patch from upstream
* add some doc
* finish doc
* refactor kl test(WIP)
* add more kl, fix float32 underflow issue
* make sampling more stable
* handle inconsistent mode
* replace boolean idx with np.where
* fix file name
* add more doc
* add constraint check
* add half_normal/cauchy pdf cdf support check
* fix import problem
* change nosetest to pytest
* remove buggy lines
* change alias register path
* attempt to fix ci
* fix lint, change a few tests
* fix lint
* modify hybrid sequential
* fix lint
* change import order
* add test gluon probability v2
* fix hybridize flag
* change implementation of stochastic block
* fix lint
* fix comments
* fix block
* modify domain map
* add raises for improper add_loss
* add raises for improper add_loss
* add extra cases
* change collectLoss decorator to mandatory
* skip stochastic block tests
* remove test cases
* put gpu tests back
* add test_gluon_stochastic_block back
* remove export test
* put a test back
* tiny refactor
* add memory leak flag
* small changes
Co-authored-by: Zheng <shzheng@a483e789dd93.ant.amazon.com>
2020-07-08 01:22:05 +08:00
from test_gluon_probability_v1 import *
from test_gluon_probability_v2 import *
2017-02-07 13:37:43 +08:00
from test_optimizer import *
2017-04-27 12:14:37 -07:00
from test_random import *
2018-02-13 11:13:04 -08:00
from test_exc_handling import *
2018-05-15 09:55:52 -07:00
from test_sparse_ndarray import *
2017-08-30 23:12:06 -07:00
from test_sparse_operator import *
from test_ndarray import *
2018-08-30 19:13:33 -07:00
from test_subgraph_op import *
2019-10-15 15:56:43 -07:00
from test_gluon_gpu import _test_bulking
2018-10-18 23:17:52 -07:00
from test_contrib_operator import test_multibox_target_op
2019-10-19 16:51:23 -07:00
from test_contrib_optimizer import test_adamw
2020-06-20 14:49:58 -07:00
del test_custom_op_fork #noqa
2016-03-19 23:45:52 -07:00
2016-10-19 00:06:32 -07:00
set_default_context ( mx . gpu ( 0 ) )
2017-04-03 15:18:41 -07:00
def check_countsketch ( in_dim , out_dim , n ) :
2018-07-17 22:59:43 -07:00
data = mx . sym . Variable ( " data " )
h = mx . sym . Variable ( " h " )
s = mx . sym . Variable ( " s " )
sym = mx . sym . contrib . count_sketch ( data = data , h = h , s = s , name = ' countsketch ' , out_dim = out_dim )
2017-04-03 15:18:41 -07:00
shape = [ ( n , in_dim ) , ( 1 , in_dim ) , ( 1 , in_dim ) ] #shape of input x, hash h and hash s
arr = [ mx . nd . empty ( shape [ i ] ) for i in range ( 3 ) ]
arr_grad = [ mx . nd . empty ( shape [ i ] ) for i in range ( 3 ) ]
x = np . random . uniform ( - 10 , 10 , shape [ 0 ] )
arr [ 0 ] [ : ] = x #input x
h = np . random . randint ( 0 , out_dim , shape [ 1 ] )
arr [ 1 ] [ : ] = h #hash h
s = np . random . randint ( 0 , 2 , shape [ 2 ] ) * 2 - np . ones ( shape [ 2 ] )
arr [ 2 ] [ : ] = s #hash s
2018-07-17 22:59:43 -07:00
locations = { " data " : x , " h " : h , " s " : s }
2017-04-03 15:18:41 -07:00
a = np . zeros ( ( n , out_dim ) )
temp = np . multiply ( x , s )
for num_sample in np . arange ( 0 , n ) :
for idx in np . arange ( 0 , in_dim ) :
a [ num_sample ] [ h [ 0 ] [ idx ] ] + = temp [ num_sample ] [ idx ]
2018-07-17 22:59:43 -07:00
check_symbolic_forward ( sym , locations , [ a ] , rtol = 1e-3 , atol = 1e-5 , ctx = mx . gpu ( 0 ) )
2017-04-03 15:18:41 -07:00
out_grad = mx . nd . empty ( ( n , out_dim ) )
out_grad [ : ] = np . random . normal ( - 3 , 3 , ( n , out_dim ) )
2018-07-17 22:59:43 -07:00
a = np . zeros ( ( n , in_dim ) )
for j in np . arange ( 0 , n ) :
for i in np . arange ( 0 , in_dim ) :
a [ j , i ] = out_grad . asnumpy ( ) [ j , h [ 0 , i ] ] * s [ 0 , i ]
check_symbolic_backward ( sym , locations , [ out_grad ] , [ a ] , rtol = 1e-3 , atol = 1e-5 , ctx = mx . gpu ( 0 ) )
2017-04-27 12:14:37 -07:00
2018-06-28 06:03:13 +02:00
2018-07-17 22:59:43 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-04-03 15:18:41 -07:00
def test_countsketch ( ) :
minindim = 40
maxindim = 100
minoutdim = 5
maxoutdim = 30
maxn = 200
2018-07-17 22:59:43 -07:00
in_dim = np . random . randint ( minindim , maxindim )
out_dim = np . random . randint ( minoutdim , maxoutdim )
n = np . random . randint ( 1 , maxn )
check_countsketch ( in_dim , out_dim , n )
2017-04-03 15:18:41 -07:00
2018-02-18 03:11:58 -08:00
2017-04-03 15:18:41 -07:00
def check_fft ( shape ) :
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . fft ( name = ' fft ' , compute_size = 128 )
2017-04-03 15:18:41 -07:00
if len ( shape ) == 2 :
if shape [ 1 ] % 2 != 0 :
lst = list ( shape )
lst [ 1 ] = lst [ 1 ] * 2
shape = tuple ( lst )
shape_old = shape
if len ( shape ) == 4 :
if shape [ 3 ] % 2 != 0 :
lst = list ( shape )
lst [ 3 ] = lst [ 3 ] * 2
shape = tuple ( lst )
shape_old = shape
init = [ np . random . normal ( size = shape , scale = 1.0 ) ]
arr_grad = [ mx . nd . empty ( shape ) ]
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' fft_data ' : shape , ' type_dict ' : { ' fft_data ' : np . float32 } } ]
2020-06-20 14:49:58 -07:00
exe_list = [ sym . _simple_bind ( * * ctx ) for ctx in ctx_list ]
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
for exe in exe_list :
for arr , iarr in zip ( exe . arg_arrays , init ) :
arr [ : ] = iarr . astype ( arr . dtype )
2018-08-07 10:29:47 -07:00
# forward
2017-04-03 15:18:41 -07:00
for exe in exe_list :
exe . forward ( is_train = True )
out1 = [ exe . outputs [ 0 ] . asnumpy ( ) for exe in exe_list ]
out = np . fft . fft ( init , n = None , axis = - 1 , norm = None )
if len ( shape ) == 2 :
out = np . reshape ( out , ( out . shape [ 1 ] , out . shape [ 2 ] ) )
out2 = np . append ( out . real , out . imag , axis = 1 )
a = np . zeros ( out1 [ 0 ] . shape )
p = 0
for i in range ( out2 . shape [ 1 ] / / 2 ) :
a [ : , p ] = out2 [ : , i ]
a [ : , p + 1 ] = out2 [ : , i + out2 . shape [ 1 ] / / 2 ]
p = p + 2
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
out = np . reshape ( out , ( out . shape [ 1 ] , out . shape [ 2 ] , out . shape [ 3 ] , out . shape [ 4 ] ) )
out2 = np . append ( out . real , out . imag , axis = 1 )
a = np . zeros ( out1 [ 0 ] . shape )
for i in range ( out1 [ 0 ] . shape [ 0 ] ) :
for j in range ( out1 [ 0 ] . shape [ 1 ] ) :
p = 0
for k in range ( out2 . shape [ 3 ] ) :
a [ i , j , : , p ] = out2 [ i , j , : , k ]
a [ i , j , : , p + 1 ] = out2 [ i , j + out1 [ 0 ] . shape [ 1 ] , : , k ]
p = p + 2
2017-04-27 12:14:37 -07:00
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a , out1 [ 0 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
# backward
if len ( shape ) == 2 :
out_grad = mx . nd . empty ( ( shape [ 0 ] , 2 * shape [ 1 ] ) )
out_grad [ : ] = np . random . normal ( - 3 , 3 , ( shape [ 0 ] , 2 * shape [ 1 ] ) )
# out_grad_to_complex
out_grad_complex = np . zeros ( shape , dtype = np . complex64 )
for i in range ( 0 , shape [ 1 ] ) :
out_grad_complex . real [ : , i ] = out_grad . asnumpy ( ) [ : , 2 * i ]
out_grad_complex . imag [ : , i ] = out_grad . asnumpy ( ) [ : , 2 * i + 1 ]
for exe in exe_list :
2017-04-27 12:14:37 -07:00
exe . backward ( [ out_grad ] )
2017-04-03 15:18:41 -07:00
a = np . fft . ifft ( out_grad_complex , n = None , axis = - 1 , norm = None )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a . real , exe . grad_arrays [ 0 ] / shape [ 1 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-27 12:14:37 -07:00
2017-04-03 15:18:41 -07:00
if len ( shape ) == 4 :
out_grad = mx . nd . empty ( out1 [ 0 ] . shape )
out_grad [ : ] = np . random . normal ( - 3 , 3 , out1 [ 0 ] . shape )
# out_grad_to_complex
out_grad_complex = np . zeros ( shape , dtype = np . complex64 )
for i in range ( 0 , shape [ 3 ] ) :
out_grad_complex . real [ : , : , : , i ] = out_grad . asnumpy ( ) [ : , : , : , 2 * i ]
out_grad_complex . imag [ : , : , : , i ] = out_grad . asnumpy ( ) [ : , : , : , 2 * i + 1 ]
for exe in exe_list :
2017-04-27 12:14:37 -07:00
exe . backward ( [ out_grad ] )
2017-04-03 15:18:41 -07:00
a = np . fft . ifft ( out_grad_complex , n = None , axis = - 1 , norm = None )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( a . real , exe . grad_arrays [ 0 ] / shape [ 3 ] , rtol = 1e-3 , atol = 1e-5 )
2017-04-03 15:18:41 -07:00
2018-08-07 10:29:47 -07:00
@with_seed ( )
2017-04-03 15:18:41 -07:00
def test_fft ( ) :
nrepeat = 2
maxdim = 10
for repeat in range ( nrepeat ) :
for order in [ 2 , 4 ] :
shape = tuple ( np . random . randint ( 1 , maxdim , size = order ) )
check_fft ( shape )
2019-09-30 17:14:58 -04:00
def _make_ndarrays ( input_list , ctx = mx . gpu ( 0 ) ) :
return [ mx . nd . array ( arr , dtype = arr . dtype , ctx = ctx ) for arr in input_list ]
2019-12-14 08:32:50 -08:00
def check_multi_sum_sq ( dtype , shapes , ctx , tol1 , tol2 ) :
values_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 10. for shape in shapes ]
mx_vals = _make_ndarrays ( values_arr , ctx = ctx )
sum_sq = mx . nd . multi_sum_sq ( * mx_vals , num_arrays = len ( shapes ) )
sum_sq2 = mx . nd . multi_sum_sq ( * mx_vals , num_arrays = len ( shapes ) )
# checks that operator is deterministic
assert np . array_equal ( sum_sq . asnumpy ( ) , sum_sq2 . asnumpy ( ) )
ref_sum_sq = mx . nd . array ( [ ( v . astype ( ' float32 ' ) * * 2 ) . sum ( ) for v in values_arr ] ,
dtype = ' float32 ' , ctx = ctx )
assert_almost_equal ( ref_sum_sq . asnumpy ( ) , sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-12-14 08:32:50 -08:00
def test_multi_sum_sq ( ) :
min_nparam = 100
max_nparam = 120
min_dim = 50000
max_dim = 100000
max_ndim = 1
dtypes = [ ' float16 ' , ' float32 ' , ' float64 ' ]
for ctx in [ mx . gpu ( 0 ) ] :
for dtype in dtypes :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( min_dim , max_dim + 1 , size = max_ndim ) for i in range ( nparam ) ]
low_tol = ctx == mx . cpu ( 0 ) and ( ' float16 ' in [ dtype ] )
tol1 = 1e-3 if low_tol else 1e-5
tol2 = 1e-6 if low_tol else 1e-7
check_multi_sum_sq ( dtype , shapes , ctx , tol1 , tol2 )
2019-09-30 17:14:58 -04:00
def check_fast_lars ( w_dtype , g_dtype , shapes , ctx , tol1 , tol2 ) :
weights_arr = [ np . random . rand ( * shape ) . astype ( w_dtype ) * 10. for shape in shapes ]
grads_arr = [ np . random . rand ( * shape ) . astype ( g_dtype ) for shape in shapes ]
lrs = ( np . random . rand ( len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 100.
wds = ( np . random . rand ( len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 1000.
eta = ( np . random . rand ( ) + 0.1 )
eps = ( np . random . rand ( ) + 0.1 ) / 10000.
mx_w = _make_ndarrays ( weights_arr , ctx = ctx )
mx_g = _make_ndarrays ( grads_arr , ctx = ctx )
mx_lrs = mx . nd . array ( lrs , dtype = ' float32 ' , ctx = ctx )
mx_wds = mx . nd . array ( wds , dtype = ' float32 ' , ctx = ctx )
w_sum_sq = mx . nd . multi_sum_sq ( * mx_w , num_arrays = len ( shapes ) )
g_sum_sq = mx . nd . multi_sum_sq ( * mx_g , num_arrays = len ( shapes ) )
ref_w_sum_sq = mx . nd . array ( [ ( w . astype ( ' float32 ' ) * * 2 ) . sum ( ) for w in weights_arr ] ,
dtype = ' float32 ' , ctx = ctx )
ref_g_sum_sq = mx . nd . array ( [ ( g . astype ( ' float32 ' ) * * 2 ) . sum ( ) for g in grads_arr ] ,
dtype = ' float32 ' , ctx = ctx )
assert_almost_equal ( ref_w_sum_sq . asnumpy ( ) , w_sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
assert_almost_equal ( ref_g_sum_sq . asnumpy ( ) , g_sum_sq . asnumpy ( ) , atol = tol1 , rtol = tol1 )
rescale_grad = ( np . random . rand ( ) + 0.5 ) * 100.
mx_new_lrs = mx . nd . multi_lars ( mx_lrs , w_sum_sq , g_sum_sq , mx_wds , eta = eta , eps = eps ,
rescale_grad = rescale_grad )
ref_w_l2norm = mx . nd . sqrt ( ref_w_sum_sq )
ref_g_l2norm = mx . nd . sqrt ( ref_g_sum_sq * rescale_grad * rescale_grad )
ref_new_lrs = mx . nd . zeros ( ref_w_l2norm . shape , dtype = ' float32 ' , ctx = ctx )
for i in range ( ref_w_l2norm . size ) :
_w = ref_w_l2norm [ i ]
_g = ref_g_l2norm [ i ]
if _w > 0.0 and _g > 0.0 :
ref_new_lrs [ i ] = lrs [ i ] * eta * _w / ( _g + wds [ i ] * _w + eps )
else :
ref_new_lrs [ i ] = lrs [ i ]
assert_almost_equal ( ref_new_lrs . asnumpy ( ) , mx_new_lrs . asnumpy ( ) , atol = tol2 , rtol = tol2 )
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-09-30 17:14:58 -04:00
def test_fast_lars ( ) :
min_nparam = 50
max_nparam = 60
maxdim = 10000
maxndim = 1
dtypes = [ ' float16 ' , ' float32 ' , ' float64 ' ]
for ctx in [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] :
for w_dtype in dtypes :
for g_dtype in dtypes :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( 1 , maxdim + 1 , size = maxndim ) for i in range ( nparam ) ]
lowTol = ctx == mx . cpu ( 0 ) and ( ' float16 ' in [ w_dtype , g_dtype ] )
tol1 = 1e-3 if lowTol else 1e-5
tol2 = 1e-6 if lowTol else 1e-7
check_fast_lars ( w_dtype , g_dtype , shapes , ctx , tol1 , tol2 )
def check_preloaded_multi_sgd ( dtype , shapes , momentum , use_master_weights ) :
def _flatten_list ( nested_list ) :
return [ item for sublist in nested_list for item in sublist ]
weights_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 100. for shape in shapes ]
grads_arr = [ np . random . rand ( * shape ) . astype ( dtype ) * 100. for shape in shapes ]
rescale_grad = ( np . random . random ( ) + 1.0 )
mx_w = _make_ndarrays ( weights_arr )
mx_g = _make_ndarrays ( grads_arr )
mx_p_w = _make_ndarrays ( weights_arr )
mx_p_g = _make_ndarrays ( grads_arr )
lrs = list ( ( np . random . random ( size = len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 100. )
mx_lrs = mx . nd . array ( lrs , dtype = ' float32 ' , ctx = mx . gpu ( 0 ) )
wds = list ( ( np . random . random ( size = len ( shapes ) ) . astype ( ' float32 ' ) + 0.1 ) / 1000. )
mx_wds = mx . nd . array ( wds , dtype = ' float32 ' , ctx = mx . gpu ( 0 ) )
if use_master_weights :
weights32_arr = [ arr . astype ( ' float32 ' ) for arr in weights_arr ]
mx_w32 = _make_ndarrays ( weights32_arr )
mx_p_w32 = _make_ndarrays ( weights32_arr )
if momentum is None :
if use_master_weights :
mx . nd . multi_mp_sgd_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_w32 ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = rescale_grad , out = mx_w )
mx . nd . preloaded_multi_mp_sgd_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_w32 ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = rescale_grad , out = mx_p_w )
else :
out = mx . nd . multi_sgd_update (
* _flatten_list ( zip ( mx_w , mx_g ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = rescale_grad , out = mx_w )
preloaded_out = mx . nd . preloaded_multi_sgd_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = rescale_grad , out = mx_p_w )
else :
if use_master_weights :
momentums_arr = [ np . random . rand ( * shape ) . astype ( " float32 " ) for shape in shapes ]
mx_m = _make_ndarrays ( momentums_arr )
mx_p_m = _make_ndarrays ( momentums_arr )
out = mx . nd . multi_mp_sgd_mom_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_m , mx_w32 ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = 0.95 , momentum = momentum , out = mx_w )
preloaded_out = mx . nd . preloaded_multi_mp_sgd_mom_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_m , mx_p_w32 ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = 0.95 , momentum = momentum , out = mx_p_w )
else :
momentums_arr = [ np . random . rand ( * shape ) . astype ( dtype ) for shape in shapes ]
mx_m = _make_ndarrays ( momentums_arr )
mx_p_m = _make_ndarrays ( momentums_arr )
mx . nd . multi_sgd_mom_update (
* _flatten_list ( zip ( mx_w , mx_g , mx_m ) ) ,
num_weights = len ( shapes ) , lrs = lrs , wds = wds ,
rescale_grad = 0.95 , momentum = momentum , out = mx_w )
mx . nd . preloaded_multi_sgd_mom_update (
* ( _flatten_list ( zip ( mx_p_w , mx_p_g , mx_p_m ) ) +
[ mx_lrs , mx_wds ] ) , num_weights = len ( shapes ) ,
rescale_grad = 0.95 , momentum = momentum , out = mx_p_w )
def _assert_all_almost_equal ( lhs_list , rhs_list , rtol , atol ) :
for i , ( lhs , rhs ) in enumerate ( zip ( lhs_list , rhs_list ) ) :
assert_almost_equal ( lhs . asnumpy ( ) , rhs . asnumpy ( ) , rtol = rtol , atol = atol )
if dtype == ' float16 ' :
rtol = 1e-3
2019-10-02 17:47:59 -04:00
atol = 1e-2
2019-09-30 17:14:58 -04:00
else :
rtol = 1e-5
atol = 1e-6
_assert_all_almost_equal ( mx_p_w , mx_w , rtol , atol )
if momentum is not None :
_assert_all_almost_equal ( mx_p_m , mx_m , rtol , atol )
if use_master_weights :
_assert_all_almost_equal ( mx_p_w32 , mx_w32 , 1e-5 , 1e-6 )
@with_seed ( )
def test_preloaded_multi_sgd ( ) :
dtypes = [ ' float16 ' , ' float32 ' ]
momentums = [ None , 0.9 ]
min_nparam = 5
max_nparam = 10
maxdim = 6
maxndim = 4
for dtype in dtypes :
use_master_weights_list = [ False , ] if dtype == ' float32 ' else [ True , False ]
for use_master_weights in use_master_weights_list :
for momentum in momentums :
nparam = np . random . randint ( min_nparam + 1 , max_nparam + 1 )
shapes = [ np . random . randint ( 1 , maxdim + 1 , size = maxndim ) for i in range ( nparam ) ]
check_preloaded_multi_sgd ( dtype , shapes , momentum , use_master_weights )
2018-02-18 03:11:58 -08:00
2019-12-09 16:52:02 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2016-07-05 11:29:40 -07:00
def test_batchnorm_with_type ( ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
ctx_list_v2_2D = [
2018-06-26 20:18:10 +00:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
ctx_list_v2_1D = [
2018-06-26 20:18:10 +00:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 5 , 2 , 5 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
ctx_list_v2_3D = [
2018-09-05 12:31:30 -07:00
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float16 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' norm_data ' : ( 3 , 2 , 3 , 2 , 3 ) , ' type_dict ' : { ' norm_data ' : np . float64 } }
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
]
# V2, 2D
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
bools = [ False , True ]
for fix_gamma , cudnn_off in itertools . product ( bools , bools ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_2D )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
# V2, 1D
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
for fix_gamma , cudnn_off in itertools . product ( bools , bools ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_1D )
# V2, 3D
for fix_gamma , cudnn_off in itertools . product ( bools , [ True , ] ) :
sym = mx . sym . BatchNorm ( name = ' norm ' , fix_gamma = fix_gamma , cudnn_off = cudnn_off )
check_consistency ( sym , ctx_list_v2_3D )
2018-10-10 21:00:17 -07:00
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
def test_batchnorm_versions ( ) :
2018-10-10 21:00:17 -07:00
def test_batchnorm_versions_helper ( batchnorm_op_list , data , fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
ctx_list = [ ]
sym_list = [ ]
# BatchNorm cpu
if ' batchnorm_cpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' ) )
# BatchNorm gpu (organic)
if ' batchnorm_gpu ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' , cudnn_off = True ) )
# BatchNorm gpu cudnn (if cudnn is enabled)
if ' batchnorm_cudnn ' in batchnorm_op_list :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' batchnorm_data ' : data , ' type_dict ' : { ' batchnorm_data ' : np . float32 } } )
sym_list . append ( mx . sym . BatchNorm ( fix_gamma = fix_gamma ,
use_global_stats = use_global_stats ,
name = ' batchnorm ' , cudnn_off = False ) )
check_consistency ( sym_list , ctx_list )
2018-10-10 21:00:17 -07:00
def test_1d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 20 )
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_cpu ' ,
' batchnorm_gpu ' , ' batchnorm_cudnn ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-10-10 21:00:17 -07:00
def test_2d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 10 , 10 )
2020-06-20 14:49:58 -07:00
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_cpu ' ,
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
' batchnorm_gpu ' , ' batchnorm_cudnn ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
2018-10-10 21:00:17 -07:00
def test_3d_batchnorm ( fix_gamma , use_global_stats ) :
Batch Norm rewrite without mshadow, 1D, 2D, 3D, float16, float32, float64 as well as operator gtest framework (#5936)
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* DeviceTensor3 added, forEachFast not yet converted
* DeviceTensor3 version working
* DeviceTensor3 working
* .
* Fix for use_global_stats
* fixed bug with testing suite for double (Float64)
* python unit tests working for batchnorm
* python unit tests
* Update documentation for mxnet.initializer.Mixed (#5937)
* Update documentation for SVMOutput. (#5931)
* Update documentation for SVMOutput.
* Update doc for SVMOutput - fix formatting.
* Adding install instruction for Ubuntu-CPU-Python (#5885)
* edit ndarray API docs (#5806)
* edit docs in broadcast_reduce_op
* edit docs in broadcast_reduce_op
* minor change
* lint fix
* fix
* mx.nd.ones
* mx.nd.repeat
* mx.nd.reverse
* add example in repeat
* optimizer update
* fix nanprod
* fix optimizer_op api doc
* fix reduce_op api doc
* fix nd.ones api doc
* mx.nd.repeat doc change
* Update broadcast_reduce_op.h
* Symbol docs fixes (#5930)
* symbol docs minor formatting changes
* deepcopy, infer_shape, infer_shape_partial docs modified
* Few more small fixes
* arithmetic functions fixes
* some more modifications
* changes after review
* small change
* grad function note added
* More API Doc Edits (#5886)
* edit activation doc
* doc l2_normalization
* edit MakeLoss doc
* edit blockgrad doc
* blockgrad fileline fix
* edit MakeLoss doc cont.
* doc change 'tensor' to 'multidimensional array'
* l2normalization doc improve
* makeloss doc improve, blockgrad doc improve
* fix doc in activation, l2_normalization, make_loss
* fix minor grammar
* use .describe to avoid build failure.
* Update documentation for mxnet.image.imdecode (#5957)
* Update documentation for mxnet.image.imdecode
* Update documentation for mxnet.image.imdecode (clarify that we need OpenCV and not the CV2 Python library)
* Fix script by adding path to Dockerfile (#5958)
* Clean install script
* Add test for pip installations
* Remove debug statements & comments
* Make test runnable as script and from framework
* Fix path to Dockerfiles
* Putting failing cases at the end
* Update doc for Custom operator. (#5875)
* Update doc for Custom operator.
* Update doc for Custom operator.
* Fix formating in doc for Custom operator.
* Fix formating in doc for Custom operator.
* Minor change to ndarray.Custom documentation.
* Minor edit in doc for Custom operator.
* Minor change to doc for Custom operator. Data is 'NDArray-or-Symbol'.
* Minor formatting change for Custom operator documentation.
* For Custom operator doc, move example into ndarray_doc.py.
* Minor change in Custom operator documentation
* Improve the doc of pick + Update dmlc-core (#5946)
* Add PickParam to fix the docstring and the initial value for axis
* Update dmlc-core
* Update dmlc-core
* Image docs modified (#5973)
* imageIter doc modified
* edited imageiter
* ADD missing Libri_sample.json, FIX minor bugs in speech_recognition example (#5962)
* [KVStore] Add support for other data types (#5818)
* Fix kvstore type
* Fix lint
* Parse inputs to DataDesc
* Make module support dtype
* Fix lint
* Add default dtype in Comm
* Fix lint
* Revert rename
* [cpp-package] Add C++ basic tutorial and build instruction (#5971)
* Add C++ basic tutorial and build instruction
* Remove binaries
* Fix lint
* Avoid sign-compare
* Update documentation for mxnet.metric.np (#5977)
* Getting rid of identity (#5935)
* Activation ops (#5938)
* [Ops] Add op: 'relu'
* Add op: 'sigmoid'
* Introduce 'kernel_launch_op'
* Add tests and describe; move it to elemwise_unary_op
* Fix GPU version
* Convert caffe AbsVal to mx.symbol.abs in caffe converter (#5984)
* Correction to LSTMCell docstring (#5986)
* [Module] fix input_grads order (#5980)
* fix input_grads order + update dmlc-core
* set label to be optional
* update env_var doc (#5964)
* Adjusting make, Callback removed
* batch norm gpu testing
* Batch Norm rewrite without mshadow as well as operator gtest framework
* performance testing
* lint fixes
* use CUDNN for this test
* remove superfluous omp define
* Fix file names in comments
* build, run, clean gtest works (although a test is failing)
* CR comments
* Adjust timing tests for more strenuous sample
* Remove temp resource allocation
* rearrange source into cc and cu files
* lint fixes
* Trigger build
* Use latest mshadow
* temporarily revert channel position parameter field
* Add more tests for batchnorm
* Add more tests for batchnorm
* test_operator_gpu working for all types
* Compiles after AccReal
* Compiles after AccReal
* All tests working
* All tests working
* build, run, clean gtest works (although a test is failing)
* vc++ requires explicit int type for omp for loop
* Repair cpp-package
* signed/unsigned fixed in cuda file
* lint fixes in tests and cpp-package directories
* more lint
* use IsWriting() helper
* Fall-through for unsupported MKL shapes/types
* Fall-through for unsupported MKL shapes/types
* cleaner mkl_off approach
* Warning only whem MKL is requested
* Warning only whem MKL is requested
* lint
* ..
* python problem fixed
* python problem fixed
* Merge branch 'batchnorm' into batchnorm_pr
# Conflicts:
# src/operator/batch_norm.cc
# src/operator/batch_norm.cu
# tests/cpp/operator/batchnorm_test.cc
* lint fix
* lint fix
* lint fix
* lint fix
* lint fix
* Fix visual c++ compile problem
* .
* .
* All unit tests pass again
* lint fix
* fix strange compile errors in CUDNN batchnorm header
* FInish using flags instead of bools
* lint
* Fix timing pass count for forward pass
* Fix R script install roxygen problem
* code formatting, addition of doc strings is causing IDE to add spaces before the calls
* removed commented
* cr comments
* Change back to compilable code
* For CPU mode, store as invstd
* move testing code around a little
* lint fix
* Use AccReal in some places to avoid fp16 problems
* Fix minor invstd problem in cuda version
* remove unused scale param
* add permutation unit test, handle cudnn doesn't like 3D
* .
* lint
* .
* Remove mkl_off
* lint fix and time cudnn when enabled
2017-05-15 20:27:28 -07:00
data = ( 2 , 3 , 3 , 5 , 5 )
test_batchnorm_versions_helper ( batchnorm_op_list = [ ' batchnorm_cpu ' ,
' batchnorm_gpu ' ] ,
data = data ,
2018-10-10 21:00:17 -07:00
fix_gamma = fix_gamma , use_global_stats = use_global_stats )
test_1d_batchnorm ( True , False )
test_1d_batchnorm ( False , False )
test_1d_batchnorm ( False , True )
test_1d_batchnorm ( True , True )
test_2d_batchnorm ( True , False )
test_2d_batchnorm ( False , False )
test_2d_batchnorm ( False , True )
test_2d_batchnorm ( True , True )
test_3d_batchnorm ( True , False )
test_3d_batchnorm ( False , False )
test_3d_batchnorm ( False , True )
test_3d_batchnorm ( True , True )
2016-07-05 11:29:40 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( 1234 )
2018-11-01 21:11:44 -07:00
@assert_raises_cudnn_not_satisfied ( min_version = ' 5.1.10 ' )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2016-03-19 23:45:52 -07:00
def test_convolution_with_type ( ) :
2016-12-23 23:55:49 -08:00
sym1 = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , name = ' conv ' )
data = mx . sym . Variable ( ' conv_data ' )
w = mx . sym . Variable ( ' conv_weight ' )
b = mx . sym . Variable ( ' conv_bias ' )
w = mx . sym . transpose ( w , axes = ( 0 , 2 , 3 , 1 ) )
sym2 = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym2 = mx . sym . Convolution ( sym2 , w , b , layout = ' NHWC ' , num_filter = 3 , kernel = ( 3 , 3 ) )
sym2 = mx . sym . transpose ( sym2 , axes = ( 0 , 3 , 1 , 2 ) , name = ' conv ' )
sym = [ sym1 , sym1 , sym1 , sym1 , sym1 , sym2 , sym2 ]
2016-03-19 23:45:52 -07:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
2016-12-23 23:55:49 -08:00
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
# NHWC
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' conv_weight ' : ( 3 , 2 , 3 , 3 ) ,
' type_dict ' : { ' conv_data ' : np . float32 , ' conv_weight ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 10 , 10 ) , ' conv_weight ' : ( 3 , 2 , 3 , 3 ) ,
' type_dict ' : { ' conv_data ' : np . float16 , ' conv_weight ' : np . float16 } }
]
2017-04-18 22:00:04 -07:00
# wider tolerance needed for true-fp16 NCHW test above
tol = { np . dtype ( np . float16 ) : 0.5 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
2017-04-21 21:15:00 -07:00
# test ability to turn off training on bias
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , grad_req = { ' conv_data ' : ' write ' , ' conv_weight ' : ' write ' , ' conv_bias ' : ' null ' } , rtol = tol , atol = tol )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
2017-04-18 22:00:04 -07:00
# Apply N symbols against each of M contexts, checking that all NxM combinations match.
def check_consistency_NxM ( sym_list , ctx_list ) :
# e.g. if sym_list=[sym1, sym2] and ctx_list=[ctx1, ctx2, ctx3], then resulting lists are:
# sym_list=[sym1, sym1, sym1, sym2, sym2, sym2] and ctx_list=[ctx1, ctx2, ctx3, ctx1, ctx2, ctx3]
2018-07-02 13:53:32 -07:00
check_consistency ( np . repeat ( sym_list , len ( ctx_list ) ) , ctx_list * len ( sym_list ) , scale = 0.5 )
2017-04-18 22:00:04 -07:00
2018-06-28 06:03:13 +02:00
2020-05-16 19:04:44 -07:00
@pytest.mark.skip ( reason = " test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/10141 " )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2016-12-23 23:55:49 -08:00
def test_convolution_options ( ) :
2017-03-17 12:42:11 -07:00
# 1D convolution
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
2018-01-02 10:47:41 -08:00
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 1 , ) , pad = ( 0 , ) , name = ' conv ' )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , ) , pad = ( 0 , ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-03-17 12:42:11 -07:00
# 2D convolution
2016-12-23 23:55:49 -08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 ) , pad = ( 0 , 0 ) , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 ) , pad = ( 0 , 0 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2016-12-23 23:55:49 -08:00
2017-03-17 12:42:11 -07:00
# 3D convolution
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ,
2016-12-23 23:55:49 -08:00
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
2016-12-23 23:55:49 -08:00
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , stride = ( 2 , 2 , 2 ) , name = ' conv ' )
2017-04-18 22:00:04 -07:00
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , stride = ( 2 , 2 , 2 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
1x1 convolution acceleration (#7613)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* fix linalg_impl (#7611)
* fix linalg_impl
* fix
* fix
* fix
* set build status to success only after job ends (#7628)
Earlier code marks status as success initially. So any new PR shows jenkins status as success if we see the check mark on github. On opening the full build status, we see that builds haven't even started or are running.
If something fails, variable changes to failure then. So even without this merge, a red mark on github indicates that build has failed correctly. That behavior is unchanged.
* Fix build status of a test (#7629)
installs bc required by sh2ju.sh and changes the regex match to capital alphabet as it clashes with a warning thrown by opencv driver
* entire codebase build with mshadow_use_clas=0 (#7625)
* Update README.md (#7630)
* unit test for csv iter, doc update for libsvmiter (#7623)
* add unit test for csv iter
* fix lint
* add libsvm to mxnet.io doc
* update libsvm doc
* gpu access of ndarray (#7496)
* gpu access of ndarray
* gpu access from C++ api
* gpu access fix
* Update c_api.cc
* Update c_api.cc
* refactor cudnn algo reg to no use string (#7561)
* refactor cudnn algo reg to no use string
* refactor ctx list
* fix
* refactor save_inputs
* Update io.md (#7634)
* fix tests (#7633)
* [build] explicitly install JDK8 (#7574)
* explicitly install openjdk8
* handle earlier version of ubuntu
* install software-properties-common
* update -y
* update commands
* Indents correction
* Add script to build doc files for all versions (#7636)
* Add script to build doc files for all versions
* Fix
* Use add versipn script of each different version
* add fashion mnist and move mnists to s3 (#7635)
* add fashion mnist and move mnists to s3
* refactor
* add doc for dataset (#7644)
* Change apache package URL to https (#7622)
* Pip installer for CoreML Converter: mxnet-to-coreml (#7624)
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Fixing CoreML converter's README: typos/grammar/etc.
* CoreML converter README update: Talk about layers first and then about models.
* Providing examples on converting various standard models; calling out issues with InceptionV3.
* Pip installer for converter: mxnet-coreml-converter.
Runs only on MacOS and python 2.7. Once inside the directory pip_package, user needs
to run:
python setup.py bdist_wheel
twine upload dist/*
Once uploaded it'll look like this:
https://testpypi.python.org/pypi/mxnet-coreml-converter
Also updated the README for converter to reflect this.
Note that we are going with a package per tool for the time being. Please leave feedback if you think it is better to adopt the policy of all the tools in one single package.
Unit tests continue to pass.
* More informative pypi package documentation.
* Updating MacOS in release notes to 10.11 after testing on it.
* Changing the name to mxnet-to-coreml and version to 0.1.0.
* Added license to setup.py
* Updating readme files with the correct pip package name.
* Parallelize windows unit tests of python 2 and 3 in jenkins (#7646)
* parallelize python windows tests
* reordered for clarity
* Removed asset loaded insecurely and added the asset to be loaded from the origin securely (#7649)
* skip failing test temporarily (#7648)
* lower really high threshold to fix test failure (#7650)
* Doc updates for install and doc generation (#7647)
* fluent (#7584)
* add 1x1 convolution to tests
* indent
* Refactor random linalg contrib namespaces (#7604)
* Refactor namespaces contrib, linalg, random, and sparse for op registration
Change examples in documentation
Change namespace usage in examples
Fix pylint
Remove unused import
Switch name and alias in linalg and random
Change stype comparison from string to int for functions used internally
Change documentation to use the right namespace
Register ops under ndarray/op.py and symbol/op.py
Remove unused import
Change .cu op names
* Add __all__ to ndarray and symbol modules
* Revert "Add __all__ to ndarray and symbol modules"
This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
* Add __all__ to ndarray and symbol modules
* fix gluon fasionmnist dataset (#7655)
fix gluon fasionmnist dataset
* Parallelize Python 2 and 3 unit test cases in Jenkins CI. (#7658)
* Parallelize Python 2 and 3 unit test cases.
* Parallelize python 2 and 3 unit tests cases in jenkins
* Parallelize python 2 and 3 unit tests cases in jenkins
* Change namespace and make logging functionality changes (#7627)
* Change namespace and make logging functionality changes
* Help comment changes
* update mklml and mkl mac support (#7587)
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* 1x1 convolution acceleration
* GEMM directly without im2col or col2im in 1x1 convolution(stride=1,pad=0). The 1x1 convolution is used very common in modern CNN networks such as Googlenet/Inception/Resnet/Mobilenet etc.
* cpplint
* Indents correction
* add 1x1 convolution to tests
* indent
* cpplint
* indent
2017-08-31 02:18:47 +08:00
# 1x1 convolution
sym = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 , 1 ) , pad = ( 0 , 0 , 0 ) , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 1 , 1 , 1 ) , pad = ( 0 , 0 , 0 ) , cudnn_off = True , name = ' conv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-03-17 12:42:11 -07:00
2019-02-23 18:56:30 -08:00
2019-03-13 12:17:05 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-03-13 12:17:05 -07:00
def test_conv_deconv_guards ( ) :
# Test cases for convolution and deconvolution via strided fft. Ensure that the framework
# guards against problematic CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING in cuDNN [7.3.1,7.5)
# see https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_750.html#rel_750
for ( op , opname ) in [ ( mx . sym . Convolution , ' conv ' ) , ( mx . sym . Deconvolution , ' deconv ' ) ] :
dataname = opname + ' _data '
ctx = { ' ctx ' : mx . gpu ( 0 ) , dataname : ( 32 , 32 , 64 , 64 ) , ' type_dict ' : { dataname : np . float32 } }
test_cases = [
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 6 ) , ' pad ' : ( 0 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 6 ) , ' pad ' : ( 1 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 6 , 7 ) , ' pad ' : ( 0 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 6 ) , ' pad ' : ( 1 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 7 ) , ' pad ' : ( 0 , 0 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ,
{ ' num_filter ' : 32 , ' kernel ' : ( 7 , 7 ) , ' pad ' : ( 1 , 1 ) , ' stride ' : ( 2 , 2 ) , ' name ' : opname } ]
for test_case_args in test_cases :
try :
sym = op ( * * test_case_args )
sym_no_cudnn = op ( cudnn_off = True , * * test_case_args )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym_no_cudnn ] , [ ctx , ctx ] , scale = 0.1 )
2019-03-13 12:17:05 -07:00
except :
print ( ' Test failure of mx.sym. {} with args: {} ' . format ( op . __name__ , test_case_args ) )
raise
2019-02-23 18:56:30 -08:00
def _conv_with_num_streams ( seed ) :
with random_seed ( seed ) :
# Try to expose timing-dependent improper workspace sharing by parallel dgrad and wgrad
num_trials = 20
for _ in range ( num_trials ) :
size = np . random . randint ( 32 , 128 )
# The cudnn conv operator runs dgrad and wgrad in separate streams if enabled, with possible
# kernel overlap. The non-cudnn conv op doesn't do this so is used as the 'golden copy'.
ctx = { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , size , size ) ,
' type_dict ' : { ' conv_data ' : np . float32 } }
# Adding 'flip' here isolates the model from the input node (which can't use inplace store)
flipped = mx . sym . flip ( axis = 0 , name = ' conv ' )
sym = mx . sym . Convolution ( data = flipped , num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
flipped_no_cudnn = mx . sym . flip ( axis = 0 , name = ' conv ' )
sym_no_cudnn = mx . sym . Convolution ( data = flipped_no_cudnn , num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) ,
cudnn_off = True , name = ' conv ' )
try :
# tol can be pretty high- we're looking for a large diff due to garbaged workspace
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym_no_cudnn ] , [ ctx , ctx ] , rtol = 1e-2 , atol = 1e-2 )
2019-02-23 18:56:30 -08:00
except :
print ( ' Failing conv size = {} ' . format ( size ) )
raise
2020-02-06 22:10:30 -08:00
2020-05-16 19:04:44 -07:00
@pytest.mark.skip ( reason = " skipping for now due to severe flakiness " )
2019-02-23 18:56:30 -08:00
@with_seed ( )
def test_convolution_multiple_streams ( ) :
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
for num_streams in [ ' 1 ' , ' 2 ' ] :
2019-03-11 11:56:53 +08:00
for engine in [ ' NaiveEngine ' , ' ThreadedEngine ' , ' ThreadedEnginePerDevice ' ] :
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
print ( ' Starting engine {} with {} streams. ' . format ( engine , num_streams ) , file = sys . stderr )
2019-03-06 21:58:52 -08:00
run_in_spawned_process ( _conv_with_num_streams ,
2019-02-23 18:56:30 -08:00
{ ' MXNET_GPU_WORKER_NSTREAMS ' : num_streams , ' MXNET_ENGINE_TYPE ' : engine } )
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
print ( ' Finished engine {} with {} streams. ' . format ( engine , num_streams ) , file = sys . stderr )
2019-02-23 18:56:30 -08:00
2018-07-30 13:34:34 -07:00
# This test is designed to expose an issue with cudnn v7.1.4 algo find() when invoked with large c.
# Algos returned by find() can fail to run with grad_req='add' (wgrad kernel beta parameter == 1.0f).
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2018-07-30 13:34:34 -07:00
def test_convolution_large_c ( ) :
problematic_c = 64 * 1024
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
# The convolution accumulates many values, so scale the input magnitude.
scale = 0.1
2018-07-30 13:34:34 -07:00
def test_1D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , width ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , width ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ]
sym = mx . sym . Convolution ( layout = ' NCW ' , num_filter = 8 , kernel = ( 2 , ) , name = ' conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
def test_2D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , 2 , width ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 1 , problematic_c , 2 , width ) , ' type_dict ' : { ' conv_data ' : np . float64 } } ]
sym = mx . sym . Convolution ( layout = ' NCHW ' , num_filter = 4 , kernel = ( 2 , 2 ) , name = ' conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
# Then run models that must avoid cached cudnnFind() results in some cases (req == 'add').
widths = [ 4 , 16 , 64 ]
for req in [ ' write ' , ' add ' ] :
for width in widths :
test_1D_with_width ( width , req )
test_2D_with_width ( width , req )
# This test is designed to expose an issue with cudnn v7.1.4 algo find() when invoked with large c.
# Algos returned by find() can fail to run with grad_req='add' (wgrad kernel beta parameter == 1.0f).
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2018-07-30 13:34:34 -07:00
def test_deconvolution_large_c ( ) :
problematic_c = 64 * 1024
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
# The deconvolution accumulates many values, so scale the input magnitude.
scale = 0.1
2018-07-30 13:34:34 -07:00
def test_1D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , width ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , width ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ]
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = problematic_c , kernel = ( 2 , ) , name = ' deconv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
def test_2D_with_width ( width , grad_req ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , 2 , width ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 1 , 8 , 2 , width ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ]
sym = mx . sym . Deconvolution ( layout = ' NCHW ' , num_filter = problematic_c , kernel = ( 2 , 2 ) , name = ' deconv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( [ sym , sym ] , ctx_list , grad_req = grad_req , scale = scale )
2018-07-30 13:34:34 -07:00
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
# Then run models that must avoid cached cudnnFind() results in some cases (req == 'add').
widths = [ 4 , 16 , 64 ]
for req in [ ' write ' , ' add ' ] :
for width in widths :
test_1D_with_width ( width , req )
test_2D_with_width ( width , req )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-03-17 12:42:11 -07:00
def test_convolution_versions ( ) :
# 2D convolution NCHW
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
conv_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
conv_cpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' conv ' )
conv_gpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' conv ' )
2020-08-23 15:24:40 -07:00
syms = [ conv_cudnn , conv_cpu , conv_gpu ]
2017-03-17 12:42:11 -07:00
check_consistency ( syms , ctx_list )
# 3D convolution NCDHW
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 2 , 5 , 7 , 7 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
conv_cudnn = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
conv_cpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , name = ' conv ' )
conv_gpu = mx . sym . Convolution ( num_filter = 3 , kernel = ( 2 , 3 , 3 ) , pad = ( 1 , 1 , 1 ) , cudnn_off = True , name = ' conv ' )
syms = [ conv_cudnn , conv_cpu , conv_gpu ]
check_consistency ( syms , ctx_list )
2018-02-18 03:11:58 -08:00
2019-02-16 15:17:33 -08:00
# More max-pooling strides and pads to test cudnn pooling implementation code paths
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-02-16 15:17:33 -08:00
def test_pooling_nhwc_with_convention ( ) :
def make_pooling_syms ( * * kwargs ) :
# Conventional NCHW layout pooling
sym = mx . sym . Pooling ( * * kwargs )
# NHWC pooling
data = mx . sym . Variable ( ' pool_data ' )
sym_nhwc = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym_nhwc = mx . sym . Pooling ( sym_nhwc , layout = ' NHWC ' , * * kwargs )
sym_nhwc = mx . sym . transpose ( sym_nhwc , axes = ( 0 , 3 , 1 , 2 ) , name = ' pool ' )
return [ sym , sym_nhwc ]
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare nhwc and nchw results only within a given precision.
for in_shape in [ ( 3 , 4 , 8 , 8 ) , ( 2 , 2 , 20 , 20 ) ] :
for kernel in [ ( 2 , 2 ) , ( 3 , 3 ) , ( 4 , 4 ) ] :
for stride in [ ( 1 , 1 ) , ( 1 , 2 ) , ( 2 , 1 ) , ( 2 , 2 ) ] :
for data_type in [ np . float64 , np . float32 , np . float16 ] :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : in_shape ,
' type_dict ' : { ' pool_data ' : data_type } } ]
symlist = make_pooling_syms ( kernel = kernel , pool_type = ' max ' , stride = stride ,
pooling_convention = ' valid ' , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( kernel = kernel , pool_type = ' max ' , stride = stride ,
pooling_convention = ' full ' , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( kernel = ( 300 , 300 ) , pool_type = ' max ' ,
global_pool = True , name = ' pool ' )
check_consistency_NxM ( symlist , ctx_list )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2016-12-23 23:55:49 -08:00
def test_pooling_with_type ( ) :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : np . float32 } } ]
sym = mx . sym . Pooling ( kernel = ( 3 , 3 ) , pool_type = ' max ' , pooling_convention = ' valid ' , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-12-23 23:55:49 -08:00
sym = mx . sym . Pooling ( kernel = ( 3 , 3 ) , pool_type = ' max ' , pooling_convention = ' full ' , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-12-23 23:55:49 -08:00
sym = mx . sym . Pooling ( kernel = ( 300 , 300 ) , pool_type = ' max ' , global_pool = True , name = ' pool ' )
2018-08-12 12:43:19 -07:00
check_consistency ( sym , ctx_list , rand_type = np . float16 )
2016-03-19 23:45:52 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2016-06-09 01:32:07 +09:00
def test_deconvolution_with_type ( ) :
2018-01-02 10:47:41 -08:00
# Test basic deconvolution without exercising stride, pad or dilation.
# 1D deconvolution
sym = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , name = ' deconv ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
# wider tolerance needed for true-fp16 test above
tol = { np . dtype ( np . float16 ) : 0.3 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
check_consistency ( sym , ctx_list , rtol = tol , atol = tol , grad_req = " add " )
2018-01-02 10:47:41 -08:00
# 2D deconvolution
2016-06-10 11:31:17 +09:00
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , name = ' deconv ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# wider tolerance needed for true-fp16 test above
tol = { np . dtype ( np . float16 ) : 0.3 ,
np . dtype ( np . float32 ) : 1e-3 ,
np . dtype ( np . float64 ) : 1e-5 ,
np . dtype ( np . uint8 ) : 0 ,
np . dtype ( np . int32 ) : 0 }
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , rtol = tol , atol = tol )
check_consistency ( sym , ctx_list , rtol = tol , atol = tol , grad_req = " add " )
2017-04-18 22:00:04 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-04-18 22:00:04 -07:00
def test_deconvolution_options ( ) :
2018-01-02 10:47:41 -08:00
# 1D deconvolution
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 2 , 7 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
# Pad > 0
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , pad = ( 1 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , stride = ( 2 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
sym = mx . sym . Deconvolution ( layout = ' NCW ' , num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 3 , kernel = ( 3 , ) , dilate = ( 2 , ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2017-04-18 22:00:04 -07:00
# 2D deconvolution
2018-11-29 17:24:18 -08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' deconv_data ' : ( 2 , 8 , 10 , 10 ) , ' type_dict ' : { ' deconv_data ' : np . float32 } } ]
2017-04-18 22:00:04 -07:00
# Pad > 0
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Stride > 1
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
# Dilate > 1
sym = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' deconv ' )
sym_no_cudnn = mx . sym . Deconvolution ( num_filter = 2 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , cudnn_off = True , name = ' deconv ' )
check_consistency_NxM ( [ sym , sym_no_cudnn ] , ctx_list )
2018-01-02 10:47:41 -08:00
# # 3D deconvolution (not yet enabled)
2017-04-18 22:00:04 -07:00
# ctx_list = [{'ctx': mx.cpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.cpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.gpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float64}},
# {'ctx': mx.gpu(0), 'conv_data': (2, 2, 5, 7, 7), 'type_dict': {'conv_data': np.float32}}]
# # Pad > 0
# sym = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), pad=(1,1,1), name='conv')
# sym_no_cudnn = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), pad=(1,1,1), cudnn_off=True, name='conv')
# check_consistency_NxM([sym, sym_no_cudnn], ctx_list)
# # Stride > 1
# sym = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), stride=(2,2,2), name='conv')
# sym_no_cudnn = mx.sym.Convolution(num_filter=3, kernel=(2,3,3), stride=(2,2,2), cudnn_off=True, name='conv')
# check_consistency_NxM([sym, sym_no_cudnn], ctx_list)
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( 1234 )
2017-01-28 00:45:17 +08:00
def test_bilinear_sampler_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
grid = mx . sym . Variable ( ' grid ' )
sym = mx . sym . BilinearSampler ( data = data , grid = grid )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' grid ' : ( 1 , 2 , 10 , 10 ) ,
' type_dict ' : { ' data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-01-28 00:45:17 +08:00
def test_grid_generator_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
sym = mx . sym . GridGenerator ( data = data , transform_type = ' affine ' , target_shape = ( 20 , 20 ) )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
scale = 1
2017-01-28 00:45:17 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 3 , 6 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 3 , 6 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = scale )
check_consistency ( sym , ctx_list , scale = scale , grad_req = " add " )
2017-01-28 00:45:17 +08:00
sym = mx . sym . GridGenerator ( data = data , transform_type = ' warp ' , target_shape = ( 20 , 20 ) )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 3 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 3 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-02-18 03:11:58 -08:00
2018-06-28 16:16:29 -07:00
@with_seed ( )
2017-08-12 08:08:36 +08:00
def test_spatial_transformer_with_type ( ) :
data = mx . sym . Variable ( ' data ' )
loc = mx . sym . Flatten ( data )
loc = mx . sym . FullyConnected ( data = loc , num_hidden = 10 )
loc = mx . sym . Activation ( data = loc , act_type = ' relu ' )
loc = mx . sym . FullyConnected ( data = loc , num_hidden = 6 )
sym = mx . sym . SpatialTransformer ( data = data , loc = loc , target_shape = ( 10 , 10 ) ,
2018-09-14 14:25:38 -07:00
transform_type = " affine " , sampler_type = " bilinear " , cudnn_off = True )
2018-06-28 16:16:29 -07:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' type_dict ' : { ' data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 1 , 5 , 10 , 10 ) , ' type_dict ' : { ' data ' : np . float64 } } ]
2017-08-12 08:08:36 +08:00
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2018-09-14 14:25:38 -07:00
sym = mx . sym . SpatialTransformer ( data = data , loc = loc , target_shape = ( 10 , 10 ) ,
transform_type = " affine " , sampler_type = " bilinear " , cudnn_off = False )
check_consistency ( sym , ctx_list )
check_consistency ( sym , ctx_list , grad_req = " add " )
2017-03-17 12:42:11 -07:00
2018-08-12 12:43:19 -07:00
@with_seed ( )
def test_pooling_with_type2 ( ) :
2019-02-16 15:17:33 -08:00
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare cpu and gpu results only within a given precision.
for data_type in [ np . float64 , np . float32 , np . float16 ] :
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ]
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency ( sym , ctx_list )
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' avg ' )
check_consistency ( sym , ctx_list )
2016-11-17 22:19:38 -08:00
2019-02-16 15:17:33 -08:00
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 5 , 5 ) , pad = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency ( sym , ctx_list )
sym = mx . sym . Pooling ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' sum ' )
check_consistency ( sym , ctx_list )
@with_seed ( )
def test_pooling_nhwc_with_type ( ) :
def make_pooling_syms ( * * kwargs ) :
# Conventional NCHW layout pooling
sym = mx . sym . Pooling ( * * kwargs )
# NHWC pooling
data = mx . sym . Variable ( ' pool_data ' )
sym_nhwc = mx . sym . transpose ( data , axes = ( 0 , 2 , 3 , 1 ) )
sym_nhwc = mx . sym . Pooling ( sym_nhwc , layout = ' NHWC ' , * * kwargs )
sym_nhwc = mx . sym . transpose ( sym_nhwc , axes = ( 0 , 3 , 1 , 2 ) , name = ' pool ' )
return [ sym , sym_nhwc ]
# While the float32 and float64 output is reliably consistent, float16 departs occasionally.
# We compare nhwc and nchw results only within a given precision.
for data_type in [ np . float64 , np . float32 , np . float16 ] :
# NHWC pooling only enabled on GPU with CUDNN
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : ( 10 , 2 , 10 , 10 ) , ' type_dict ' : { ' pool_data ' : data_type } } ]
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , pool_type = ' avg ' )
check_consistency_NxM ( symlist , ctx_list )
symlist = make_pooling_syms ( name = ' pool ' , kernel = ( 5 , 5 ) , pad = ( 2 , 2 ) , pool_type = ' max ' )
check_consistency_NxM ( symlist , ctx_list )
2016-11-17 22:19:38 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-03-21 23:01:57 -07:00
def test_pooling_versions ( ) :
2019-02-16 15:17:33 -08:00
# Produce the name of the 'transposed' layout, given the dimension
def transposed_layout ( ndim ) :
if ndim < 3 or ndim > 5 :
raise RuntimeError ( " Invalid data dim, expecting 3, 4 or 5 " )
return ( ' NWC ' , ' NHWC ' , ' NDHWC ' ) [ ndim - 3 ]
# default padding is all zeros
def is_default_pad ( pad ) :
return pad == ( 0 , ) * len ( pad )
# default stride is all ones
def is_default_stride ( stride ) :
return stride == ( 1 , ) * len ( stride )
# returns True/False randomly with equal probability
def random_choice ( ) :
return np . random . random ( 1 ) [ 0 ] < 0.5
def test_pooling_versions_helper ( pool_op_list , data , kernel , pool_type , pad , stride ,
pooling_convention = ' valid ' , global_pool = False , p_value = 2 ,
count_include_pad = True , tol = None , dtype = np . float32 ) :
2017-03-21 23:01:57 -07:00
ctx_list = [ ]
sym_list = [ ]
2019-02-16 15:17:33 -08:00
for pool_ctx in pool_op_list :
( pool_op , ctx_type ) = pool_ctx . rsplit ( ' _ ' , 1 )
expected_ctxs = [ ' cpu ' , ' gpu ' , ' cudnn ' ]
if ctx_type not in expected_ctxs :
raise RuntimeError ( ' Expected one of {} , saw {} . ' . format ( expected_ctxs , ctx_type ) )
ctx = mx . cpu ( 0 ) if ctx_type == ' cpu ' else mx . gpu ( 0 )
ctx_list . append ( { ' ctx ' : ctx , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : dtype } } )
# start with pool args present in all cases
pool_op_args = { ' kernel ' : kernel , ' pool_type ' : pool_type ,
' pooling_convention ' : pooling_convention , ' name ' : ' pool ' }
# add other args as needed
if global_pool :
pool_op_args [ ' global_pool ' ] = True
2017-03-21 23:01:57 -07:00
else :
2019-02-16 15:17:33 -08:00
# Add pad and stride param if needed, plus randomly when it matches the default
if not is_default_pad ( pad ) or random_choice ( ) :
pool_op_args . update ( { ' pad ' : pad } )
if not is_default_stride ( stride ) or random_choice ( ) :
pool_op_args . update ( { ' stride ' : stride } )
2020-08-23 15:24:40 -07:00
expected_pool_ops = [ ' pool ' , ' pool_transposed ' ]
pool_op_args . update ( { ' p_value ' : p_value , ' count_include_pad ' : count_include_pad } )
if ctx_type != ' cpu ' :
pool_op_args [ ' cudnn_off ' ] = ctx_type == ' gpu '
if pool_op == ' pool ' :
# isolate pooling input from symbol input to test shared tensor optimizations
buffered_input = mx . sym . identity ( name = ' pool ' )
sym = mx . sym . Pooling ( buffered_input , * * pool_op_args )
elif pool_op == ' pool_transposed ' :
ndim = len ( data )
# NCW->NWC axes=(0,2,1) NCHW->NHWC axes=(0,2,3,1) NCDHW->NDHWC axes=(0,2,3,4,1);
axes = ( 0 , ) + tuple ( range ( 2 , ndim ) ) + ( 1 , )
transposed = mx . sym . transpose ( axes = axes , name = ' pool ' )
pooled = mx . sym . Pooling ( data = transposed , layout = transposed_layout ( ndim ) ,
* * pool_op_args )
# NWC->NCW axes=(0,2,1) NHWC->NCHW axes=(0,3,1,2) NDHWC->NCDHW axes=(0,4,1,2,3);
axes = ( 0 , ndim - 1 ) + tuple ( range ( 1 , ndim - 1 ) )
sym = mx . sym . transpose ( data = pooled , axes = axes , name = ' pool ' )
2017-03-21 23:01:57 -07:00
else :
2020-08-23 15:24:40 -07:00
raise RuntimeError ( ' Expected one of {} , saw {} . ' . format ( expected_pool_ops ,
pool_op ) )
2019-02-16 15:17:33 -08:00
sym_list . append ( sym )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym_list , ctx_list , equal_nan = ( not count_include_pad ) , rtol = tol , atol = tol )
2017-03-21 23:01:57 -07:00
2019-02-16 15:17:33 -08:00
def test_pooling_dim ( dim , pool_type , dtype , pool_op_list , p_value = 2 , count_include_pad = True ,
tol = None ) :
if dim == ' 1D ' :
data = ( 3 , 3 , 10 )
kernels = [ ( 4 , ) , ( 4 , ) , ( 5 , ) ]
pads = [ ( 0 , ) , ( 2 , ) , ( 2 , ) ]
strides = [ ( 1 , ) , ( 2 , ) , ( 1 , ) ]
elif dim == ' 2D_no_padding ' :
data = ( 3 , 2 , 20 , 20 )
kernels = [ ( 3 , 3 ) , ( 4 , 5 ) ]
pads = [ ( 0 , 0 ) , ( 0 , 0 ) ]
strides = [ ( 1 , 1 ) , ( 2 , 1 ) ]
elif dim == ' 2D ' :
data = ( 2 , 2 , 20 , 20 )
kernels = [ ( 3 , 3 ) , ( 3 , 5 ) , ( 4 , 5 ) , ( 4 , 5 ) ]
pads = [ ( 0 , 0 ) , ( 1 , 2 ) , ( 0 , 0 ) , ( 2 , 3 ) ]
strides = [ ( 1 , 1 ) , ( 1 , 1 ) , ( 2 , 1 ) , ( 1 , 1 ) ]
elif dim == ' 3D ' :
data = ( 2 , 3 , 20 , 20 , 20 )
kernels = [ ( 4 , 5 , 3 ) , ( 4 , 5 , 3 ) , ( 3 , 5 , 7 ) ]
pads = [ ( 0 , 0 , 0 ) , ( 2 , 3 , 2 ) , ( 1 , 2 , 3 ) ]
strides = [ ( 1 , 1 , 1 ) , ( 2 , 3 , 1 ) , ( 1 , 1 , 1 ) ]
2018-05-18 10:49:43 -07:00
else :
2019-02-16 15:17:33 -08:00
raise RuntimeError ( ' Unexpected pooling test class: {} . ' . format ( dim ) )
for kernel , pad , stride in zip ( kernels , pads , strides ) :
for pooling_convention in [ ' valid ' , ' full ' ] :
try :
test_pooling_versions_helper ( pool_op_list = pool_op_list ,
data = data , kernel = kernel , pad = pad , stride = stride ,
pool_type = pool_type , pooling_convention = pooling_convention ,
global_pool = False , p_value = p_value ,
count_include_pad = count_include_pad , tol = tol , dtype = dtype )
except :
print ( ' pool_op_list = {} ' . format ( pool_op_list ) )
print ( ' kernel= {} , pad= {} , stride= {} ' . format ( kernel , pad , stride ) )
print ( ' pool_type= {} , pooling_convention= {} , global_pool=False ' . format ( pool_type ,
pooling_convention ) )
print ( ' p_value= {} , count_include_pad= {} , dtype= {} ' . format ( p_value ,
count_include_pad , dtype ) )
print ( ' environ = \n {} ' . format ( os . environ ) )
raise
# Make sure kernel is ignored during global_pool by sometimes setting it to a crazy value
kernel = kernels [ 0 ]
if random_choice ( ) :
kernel = ( 300 , ) * len ( kernel )
test_pooling_versions_helper ( pool_op_list = pool_op_list ,
data = data , kernel = kernel , pad = None , stride = None ,
pool_type = pool_type , global_pool = True , p_value = p_value ,
count_include_pad = count_include_pad , tol = tol , dtype = dtype )
# The various implementations of the standard pooling operator
std_pool_op_list = [ ' pool_cpu ' , ' pool_transposed_cpu ' ,
' pool_gpu ' , ' pool_transposed_gpu ' ,
' pool_cudnn ' , ' pool_transposed_cudnn ' ]
for dtype in [ np . float32 , np . float64 , np . float16 ] :
# Testing of the standard (not 'v1') pooling operator is universal across all
# data dimensions, implementations and layouts.
for dim in [ ' 1D ' , ' 2D ' , ' 3D ' ] :
test_pooling_dim ( dim , ' max ' , dtype , std_pool_op_list )
test_pooling_dim ( dim , ' avg ' , dtype , std_pool_op_list , count_include_pad = True )
test_pooling_dim ( dim , ' avg ' , dtype , std_pool_op_list , count_include_pad = False )
test_pooling_dim ( dim , ' sum ' , dtype , std_pool_op_list )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 1 )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 2 )
test_pooling_dim ( dim , ' lp ' , dtype , std_pool_op_list , p_value = 3 )
2017-03-21 23:01:57 -07:00
2018-11-17 09:43:05 +08:00
@with_seed ( )
def test_pooling_full_2d ( ) :
def test_pooling_full_2d_type ( pool_type ) :
data = ( 2 , 2 , 10 , 10 )
kernel = ( 4 , 5 )
pad = ( 1 , 2 )
stride = ( 3 , 4 )
convention = ' full '
ctx_list = [ ]
sym_list = [ ]
# o_h = ceil((10 + 1 + 1 - 4) / 3) + 1 = 4
# o_w = ceil((10 + 2 + 2 - 5) / 4) + 1 = 4
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
pooling_convention = convention , global_pool = False , name = ' pool ' ) )
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
pooling_convention = convention , global_pool = False , name = ' pool ' ) )
check_consistency ( sym_list , ctx_list )
test_pooling_full_2d_type ( ' max ' )
test_pooling_full_2d_type ( ' avg ' )
test_pooling_full_2d_type ( ' sum ' )
2019-07-08 10:07:37 +08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-07-08 10:07:37 +08:00
def test_flatten_slice_after_conv ( ) :
ctx_list = [ ]
data = mx . sym . Variable ( ' conv_data ' )
conv = mx . symbol . Convolution ( data = data , name = ' conv ' , num_filter = 16 , kernel = ( 3 , 3 ) , stride = ( 1 , 1 ) )
flatten = mx . symbol . flatten ( data = conv )
slice_sym = mx . symbol . slice ( data = flatten , begin = 0 , end = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' conv_data ' : ( 2 , 16 , 16 , 16 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' conv_data ' : ( 2 , 16 , 16 , 16 ) , ' type_dict ' : { ' conv_data ' : np . float32 } } ]
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( slice_sym , ctx_list , scale = 0.5 )
2019-07-08 10:07:37 +08:00
2019-07-22 07:25:30 +08:00
@with_seed ( )
def test_bilinear_resize_op ( ) :
ctx_list = [ { ' ctx ' : mx . cpu ( 0 ) , ' data ' : ( 2 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' data ' : ( 2 , 2 , 20 , 20 ) , ' type_dict ' : { ' data ' : np . float32 } } ]
data = mx . sym . Variable ( ' data ' )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , height = 10 , width = 5 , align_corners = True )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , height = 10 , width = 5 , align_corners = False )
2020-04-22 23:53:12 -07:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 2 , scale_width = 0.5 , mode = ' odd_scale ' , align_corners = True )
check_consistency ( sym , ctx_list )
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 2 , scale_width = 0.5 , mode = ' odd_scale ' , align_corners = False )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 0.5 , scale_width = 2 , mode = ' to_even_up ' , align_corners = True )
2019-07-22 07:25:30 +08:00
check_consistency ( sym , ctx_list )
2019-11-12 19:03:10 +08:00
sym = mx . sym . contrib . BilinearResize2D ( data , None , scale_height = 0.5 , scale_width = 2 , mode = ' to_even_up ' , align_corners = False )
check_consistency ( sym , ctx_list )
2019-07-22 07:25:30 +08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2018-02-09 09:53:16 +08:00
def test_global_pooling ( ) :
2018-05-18 10:49:43 -07:00
def test_1d_pooling ( pool_type , p_value = 2 ) :
2018-02-09 09:53:16 +08:00
data = ( 2 , 3 , 20 )
kernel = ( 4 , )
pad = ( 2 , )
stride = ( 2 , )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list = [ ]
sym_list = [ ]
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
pooling_convention = ' valid '
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , name = ' pool ' , p_value = p_value ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
check_consistency ( sym_list , ctx_list )
2018-03-30 17:57:39 -07:00
2018-05-18 10:49:43 -07:00
def test_2d_pooling ( pool_type , p_value = 2 ) :
2018-02-09 09:53:16 +08:00
data = ( 2 , 3 , 20 , 20 )
kernel = ( 4 , 4 )
pad = ( 2 , 2 )
stride = ( 2 , 2 )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list = [ ]
sym_list = [ ]
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
pooling_convention = ' valid '
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = False , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pad = pad , stride = stride , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-02-09 09:53:16 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( kernel = kernel , pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-03-30 17:57:39 -07:00
2018-04-10 02:49:24 +08:00
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' pool_data ' : data , ' type_dict ' : { ' pool_data ' : np . float32 } } )
sym_list . append ( mx . sym . Pooling ( pool_type = pool_type ,
2018-05-18 10:49:43 -07:00
pooling_convention = pooling_convention , global_pool = True , p_value = p_value , cudnn_off = True , name = ' pool ' ) )
2018-04-10 02:49:24 +08:00
2018-02-09 09:53:16 +08:00
check_consistency ( sym_list , ctx_list )
test_1d_pooling ( ' max ' )
test_1d_pooling ( ' avg ' )
test_1d_pooling ( ' sum ' )
2018-05-18 10:49:43 -07:00
test_1d_pooling ( ' lp ' , p_value = 1 )
test_1d_pooling ( ' lp ' , p_value = 2 )
test_1d_pooling ( ' lp ' , p_value = 3 )
2018-02-09 09:53:16 +08:00
test_2d_pooling ( ' max ' )
test_2d_pooling ( ' avg ' )
test_2d_pooling ( ' sum ' )
2018-05-18 10:49:43 -07:00
test_2d_pooling ( ' lp ' , p_value = 1 )
test_2d_pooling ( ' lp ' , p_value = 2 )
test_2d_pooling ( ' lp ' , p_value = 3 )
2018-02-09 09:53:16 +08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 11:31:17 +09:00
def test_upsampling_with_type ( ) :
2017-02-08 01:01:35 +08:00
sym = mx . sym . UpSampling ( scale = 2 , num_filter = 2 , name = ' up ' , sample_type = ' nearest ' , num_args = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_arg0 ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_arg0 ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-02-08 01:01:35 +08:00
def test_upsampling_bilinear_with_type ( ) :
sym = mx . sym . UpSampling ( scale = 2 , num_filter = 2 , name = ' up ' , sample_type = ' bilinear ' , num_args = 1 )
2017-02-09 01:24:53 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' up_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' up_data ' : np . float32 } } ]
2016-06-10 11:31:17 +09:00
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 11:31:17 +09:00
def test_concat_with_type ( ) :
sym = mx . sym . Concat ( name = ' concat ' , num_args = 2 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float64 , ' concat_arg1 ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float32 , ' concat_arg1 ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float16 , ' concat_arg1 ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float64 , ' concat_arg1 ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' concat_arg1 ' : ( 2 , 10 ) , ' concat_arg0 ' : ( 2 , 10 ) ,
' type_dict ' : { ' concat_arg0 ' : np . float32 , ' concat_arg1 ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2016-06-09 01:32:07 +09:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-12 12:26:12 +09:00
def test_elementwisesum_with_type ( ) :
2017-05-11 21:36:16 -07:00
dev_types = [ [ mx . gpu ( 0 ) , [ np . float64 , np . float32 , np . float16 ] ] ,
[ mx . cpu ( 0 ) , [ np . float64 , np . float32 ] ] ]
for num_args in range ( 1 , 6 ) :
ews_arg_shape = { }
for i in range ( num_args ) :
ews_arg_shape [ ' ews_arg ' + str ( i ) ] = ( 2 , 10 )
sym = mx . sym . ElementWiseSum ( name = ' ews ' , num_args = num_args )
ctx_list = [ ]
for dev , types in dev_types :
for dtype in types :
ews_arg_dtype = { ' type_dict ' : { } }
for i in range ( num_args ) :
ews_arg_dtype [ ' type_dict ' ] [ ' ews_arg ' + str ( i ) ] = dtype
ctx_elem = { ' ctx ' : dev }
ctx_elem . update ( ews_arg_shape )
ctx_elem . update ( ews_arg_dtype )
ctx_list . append ( ctx_elem )
2016-06-12 12:26:12 +09:00
check_consistency ( sym , ctx_list )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 15:58:43 +09:00
def test_reshape_with_type ( ) :
sym = mx . sym . Reshape ( name = ' reshape ' , shape = ( - 1 , 1 , 1 , 0 ) )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' reshape_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' reshape_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-10 16:13:47 +09:00
def test_blockgrad_with_type ( ) :
sym = mx . sym . BlockGrad ( name = ' bg ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' bg_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' bg_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-06-11 10:46:25 +09:00
def test_swapaxis_with_type ( ) :
sym = mx . sym . SwapAxis ( name = ' swap ' , dim1 = 1 )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' swap_data ' : ( 2 , 2 , 2 , 10 ) , ' type_dict ' : { ' swap_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-03-19 23:45:52 -07:00
def test_fullyconnected_with_type ( ) :
sym = mx . sym . FullyConnected ( num_hidden = 3 , name = ' inner ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 2 , 10 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ]
2016-05-27 23:27:19 -07:00
check_consistency ( sym , ctx_list )
2017-08-17 21:16:51 -07:00
# Sizes are divisible by 8 to test TensorCore on Volta GPU.
sym = mx . sym . FullyConnected ( num_hidden = 8 , name = ' inner ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' inner_data ' : ( 16 , 24 ) , ' type_dict ' : { ' inner_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' inner_data ' : ( 16 , 24 ) , ' type_dict ' : { ' inner_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2016-03-19 23:45:52 -07:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2016-03-19 23:45:52 -07:00
def test_activation_with_type ( ) :
2018-05-08 15:10:36 -07:00
act_types = [ ' relu ' , ' sigmoid ' , ' tanh ' , ' softrelu ' , ' softsign ' ]
shape = ( 2 , 2 , 10 , 10 )
for act_type in act_types :
sym = mx . sym . Activation ( name = ' act ' , act_type = act_type )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' act_data ' : shape , ' type_dict ' : { ' act_data ' : np . float16 } } ]
check_consistency ( sym , ctx_list )
2015-10-24 15:57:42 -07:00
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2018-02-15 14:44:34 -08:00
def test_lrn ( ) :
sym = mx . sym . LRN ( alpha = 0.0001 , beta = 0.75 , knorm = 2 , nsize = 5 , name = ' lrn ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' lrn_data ' : ( 2 , 6 , 10 , 10 ) , ' type_dict ' : { ' lrn_data ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' lrn_data ' : ( 2 , 6 , 10 , 10 ) , ' type_dict ' : { ' lrn_data ' : np . float32 } } ]
check_consistency ( sym , ctx_list )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-16 19:04:44 -07:00
@pytest.mark.skipif ( os . environ . get ( ' MXNET_ENGINE_TYPE ' ) == ' NaiveEngine ' ,
reason = " Testing with naive engine consistently triggers illegal memory access. Tracked in #17713 " )
2016-06-24 11:13:26 +08:00
def test_embedding_with_type ( ) :
2017-04-13 22:54:27 -07:00
def test_embedding_helper ( data_types , weight_types , low_pad , high_pad ) :
2020-08-13 22:18:26 -07:00
NVD = [ [ 20 , 10 , 20 ] , [ 200 , 10 , 300 ] , [ 10000 , 4 , 20 ] ]
for safe_accumulation in [ ' 0 ' , ' 1 ' , None ] :
for N , V , D in NVD :
with environment ( ' MXNET_SAFE_ACCUMULATION ' , safe_accumulation ) :
if N > 1000 and safe_accumulation != ' 1 ' :
break
sym = mx . sym . Embedding ( name = ' embedding ' , input_dim = V , output_dim = D )
ctx_list = [ ]
for data_type in data_types :
for weight_type in weight_types :
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' embedding_data ' : ( N , ) ,
' type_dict ' : { ' embedding_data ' : data_type , ' embedding_weight ' : weight_type } } )
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' embedding_data ' : ( N , ) ,
' type_dict ' : { ' embedding_data ' : data_type , ' embedding_weight ' : weight_type } } )
arg_params = { ' embedding_data ' : np . random . randint ( low = - low_pad , high = V + high_pad , size = ( N , ) ) }
check_consistency ( sym , ctx_list , grad_req = { ' embedding_data ' : ' null ' , ' embedding_weight ' : ' write ' } ,
arg_params = arg_params , scale = 0.1 )
2017-04-13 22:54:27 -07:00
data_types = [ np . float16 , np . float32 , np . float64 , np . int32 ]
weight_types = [ np . float16 , np . float32 , np . float64 ]
test_embedding_helper ( data_types , weight_types , 5 , 5 )
data_types = [ np . uint8 ]
weight_types = [ np . float16 , np . float32 , np . float64 ]
test_embedding_helper ( data_types , weight_types , 0 , 5 )
2017-03-17 12:42:11 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-01-19 01:45:49 +08:00
def test_take_with_type ( ) :
sym = mx . sym . take ( name = ' take ' )
2020-08-13 22:18:26 -07:00
for safe_accumulation in [ ' 0 ' , ' 1 ' , None ] :
for data_ndim in range ( 2 , 5 ) :
for idx_ndim in range ( 1 , 4 ) :
data_shape = ( )
for _ in range ( data_ndim ) :
data_shape + = ( np . random . randint ( low = 3 , high = 6 ) , )
idx_shape = ( )
for _ in range ( idx_ndim ) :
idx_shape + = ( np . random . randint ( low = 3 , high = 5 ) , )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float64 ,
' take_a ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float32 ,
' take_a ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float16 ,
' take_a ' : np . float16 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float64 ,
' take_a ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float32 ,
' take_a ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : idx_shape ,
' take_a ' : data_shape ,
' type_dict ' : { ' take_indices ' : np . float16 ,
' take_a ' : np . float16 } } ]
arg_params = { ' take_indices ' : np . random . randint ( low = 0 ,
high = data_shape [ 0 ] ,
size = idx_shape ) ,
' take_a ' : np . random . normal ( size = data_shape ) }
with environment ( ' MXNET_SAFE_ACCUMULATION ' , safe_accumulation ) :
check_consistency ( sym , ctx_list ,
grad_req = { ' take_indices ' : ' null ' ,
' take_a ' : ' write ' } ,
arg_params = arg_params )
# check a large num of indices: may underflow calculating gradient in FP16,
# if MXNET_SAFE_ACCUMULATION is not activated
with environment ( ' MXNET_SAFE_ACCUMULATION ' , ' 1 ' ) :
data_size = 4
indices_size = 10000
out_dim = 20
data_types = [ np . float16 , np . float32 , np . float64 ]
indices_types = [ np . float16 , np . float32 , np . float64 , np . int32 ]
# axis 0
sym = mx . sym . take ( name = ' take ' , axis = 0 )
ctx_list = [ ]
for data_type in data_types :
for index_type in indices_types :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : ( indices_size , ) ,
' take_a ' : ( data_size , out_dim ) ,
' type_dict ' : { ' take_indices ' : index_type , ' take_a ' : data_type } } )
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : ( indices_size , ) ,
' take_a ' : ( data_size , out_dim ) ,
' type_dict ' : { ' take_indices ' : index_type , ' take_a ' : data_type } } )
arg_params = { ' take_indices ' : np . random . randint ( 0 , data_size ,
size = ( indices_size , ) ) ,
' take_a ' : np . random . normal ( size = ( data_size , out_dim ) ) }
check_consistency ( sym , ctx_list ,
grad_req = { ' take_indices ' : ' null ' , ' take_a ' : ' write ' } ,
arg_params = arg_params )
# axis 1
sym = mx . sym . take ( name = ' take ' , axis = 1 )
ctx_list = [ ]
for data_type in data_types :
for index_type in indices_types :
ctx_list . append ( { ' ctx ' : mx . cpu ( 0 ) , ' take_indices ' : ( indices_size , ) ,
' take_a ' : ( data_size , out_dim ) ,
' type_dict ' : { ' take_indices ' : index_type , ' take_a ' : data_type } } )
ctx_list . append ( { ' ctx ' : mx . gpu ( 0 ) , ' take_indices ' : ( indices_size , ) ,
' take_a ' : ( data_size , out_dim ) ,
' type_dict ' : { ' take_indices ' : index_type , ' take_a ' : data_type } } )
arg_params = { ' take_indices ' : np . random . randint ( 0 , data_size ,
size = ( indices_size , ) ) ,
' take_a ' : np . random . normal ( size = ( data_size , out_dim ) ) }
check_consistency ( sym , ctx_list ,
grad_req = { ' take_indices ' : ' null ' , ' take_a ' : ' write ' } ,
arg_params = arg_params )
2017-03-17 12:42:11 -07:00
2018-08-07 18:33:55 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-06-18 01:53:37 +08:00
def test_psroipooling_with_type ( ) :
arg_params = {
' psroipool_rois ' : np . array ( [ [ 0 , 10 , 22 , 161 , 173 ] , [ 0 , 20 , 15 , 154 , 160 ] ] ) }
# plain psroipooling
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . PSROIPooling ( spatial_scale = 0.0625 , output_dim = 2 , pooled_size = 3 , name = ' psroipool ' )
2017-06-18 01:53:37 +08:00
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float64 , ' psroipool_rois ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float32 , ' psroipool_rois ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' psroipool_rois ' : ( 2 , 5 ) ,
' type_dict ' : { ' psroipool_data ' : np . float16 , ' psroipool_rois ' : np . float16 } } ,
]
check_consistency ( sym , ctx_list , grad_req = { ' psroipool_data ' : ' write ' ,
' psroipool_rois ' : ' null ' } , arg_params = arg_params )
2018-02-18 03:11:58 -08:00
2018-08-12 12:26:16 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-06-18 01:53:37 +08:00
def test_deformable_psroipooling_with_type ( ) :
2018-08-12 12:26:16 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 ,
np . dtype ( np . float16 ) : 1e-2 }
2017-06-18 01:53:37 +08:00
arg_params = {
' deformable_psroipool_rois ' : np . array ( [ [ 0 , 10 , 22 , 161 , 173 ] , [ 0 , 20 , 15 , 154 , 160 ] ] ) }
# deformable psroipooling
2017-08-29 10:34:56 -07:00
sym = mx . sym . contrib . DeformablePSROIPooling ( spatial_scale = 0.0625 , sample_per_part = 4 , group_size = 3 , pooled_size = 3 ,
2017-06-18 01:53:37 +08:00
output_dim = 2 , trans_std = 0.1 , no_trans = False , name = ' deformable_psroipool ' )
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float64 , ' deformable_psroipool_rois ' : np . float64 ,
' deformable_psroipool_trans ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float32 , ' deformable_psroipool_rois ' : np . float32 ,
' deformable_psroipool_trans ' : np . float32 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float16 , ' deformable_psroipool_rois ' : np . float16 ,
' deformable_psroipool_trans ' : np . float16 } } ,
2019-05-24 11:21:15 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float64 , ' deformable_psroipool_rois ' : np . float64 ,
' deformable_psroipool_trans ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float32 , ' deformable_psroipool_rois ' : np . float32 ,
' deformable_psroipool_trans ' : np . float32 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_psroipool_data ' : ( 1 , 18 , 14 , 14 ) ,
' deformable_psroipool_rois ' : ( 2 , 5 ) ,
' deformable_psroipool_trans ' : ( 2 , 4 , 3 , 3 ) ,
' type_dict ' : { ' deformable_psroipool_data ' : np . float16 , ' deformable_psroipool_rois ' : np . float16 ,
' deformable_psroipool_trans ' : np . float16 } } ,
2017-06-18 01:53:37 +08:00
]
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol ,
2018-08-12 12:26:16 -07:00
grad_req = { ' deformable_psroipool_data ' : ' write ' ,
' deformable_psroipool_rois ' : ' null ' ,
' deformable_psroipool_trans ' : ' write ' } , arg_params = arg_params )
2017-06-18 01:53:37 +08:00
2018-02-18 03:11:58 -08:00
2018-08-12 12:26:16 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-06-18 01:53:37 +08:00
def test_deformable_convolution_with_type ( ) :
2018-08-12 12:26:16 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 }
2020-09-05 11:11:38 -07:00
sym = mx . sym . npx . deformable_convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , name = ' deformable_conv ' )
2017-06-18 01:53:37 +08:00
# since atomicAdd does not support fp16 (which deformable conv uses in backward), we do not test fp16 here
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 10 , 10 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 8 , 8 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2018-08-12 12:26:16 -07:00
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# test ability to turn off training on bias
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol ,
2018-08-12 12:26:16 -07:00
grad_req = { ' deformable_conv_data ' : ' write ' ,
' deformable_conv_offset ' : ' write ' ,
' deformable_conv_weight ' : ' write ' ,
' deformable_conv_bias ' : ' null ' } )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2017-06-18 01:53:37 +08:00
def test_deformable_convolution_options ( ) :
2018-07-06 18:48:57 -07:00
tol = { np . dtype ( np . float32 ) : 1e-1 ,
np . dtype ( np . float64 ) : 1e-3 }
2017-06-18 01:53:37 +08:00
# 2D convolution
2019-05-23 21:00:33 -05:00
# since atomicAdd does not support fp16 (which deformable conv uses in backward), we do not test fp16 here
2017-06-18 01:53:37 +08:00
# Pad > 0
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 7 , 7 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2020-09-05 11:11:38 -07:00
sym = mx . sym . npx . deformable_convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , pad = ( 1 , 1 ) , name = ' deformable_conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Stride > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2020-09-05 11:11:38 -07:00
sym = mx . sym . npx . deformable_convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , stride = ( 2 , 2 ) , name = ' deformable_conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Dilate > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 18 , 3 , 3 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2020-09-05 11:11:38 -07:00
sym = mx . sym . npx . deformable_convolution ( num_filter = 3 , kernel = ( 3 , 3 ) , dilate = ( 2 , 2 ) , name = ' deformable_conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2017-06-18 01:53:37 +08:00
# Deformable group > 1
ctx_list = [ { ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . gpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2019-05-23 21:00:33 -05:00
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float64 , ' deformable_conv_offset ' : np . float64 } } ,
{ ' ctx ' : mx . cpu ( 0 ) ,
' deformable_conv_data ' : ( 2 , 2 , 7 , 7 ) ,
' deformable_conv_offset ' : ( 2 , 36 , 5 , 5 ) ,
' type_dict ' : { ' deformable_conv_data ' : np . float32 , ' deformable_conv_offset ' : np . float32 } } ,
2017-06-18 01:53:37 +08:00
]
2020-09-05 11:11:38 -07:00
sym = mx . sym . npx . deformable_convolution ( num_filter = 4 , kernel = ( 3 , 3 ) , num_deformable_group = 2 , name = ' deformable_conv ' )
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <spanev@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <spanev@nvidia.com>
Co-authored-by: Bart Gawrych <gawrych.bartlomiej@intel.com>
2020-07-19 14:12:50 -07:00
check_consistency ( sym , ctx_list , scale = 0.1 , rtol = tol , atol = tol )
2019-05-23 21:00:33 -05:00
2017-05-31 09:56:32 -07:00
2017-06-26 22:37:11 -07:00
def check_rnn_layer ( layer ) :
Simplify mxnet.gluon Block APIs (#18413)
## Motivations
Currently the implementation of mxnet.gluon.block is not so pythonic and there are many redundancies
### 1. overlaps between Block._params and Block._reg_params
when we want to self-define a model, we currently need to use the code as follows:
```
class Net(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
with self.name_scope():
self.hidden1 = nn.Dense(256, activation='relu')
self.a=self.params.get('a', shape=(1, ))
```
There are several shortcomings when using this form of registration:
a. adding parameter ‘a’ will lead to double recordings in both self._params and self._reg_params, which is a redundancy. And there is also a discrepancy in Block:
i. In the method “collect_params”, we use “_params” to get all parameters
ii. while in the method “_collect_params_with_prefix” (and methods “load_parameters” accordingly), we use “_reg_params” to get all parameters.
b. Currently if we do not use “with self.name_scope():” for children blocks, it will lead to wrong name scopes. For the following example, we actually can not get the parameters of self.hidden1 from the result of collect_params
```
class HybridNet(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.hidden1 = nn.Dense(256, activation='relu')
with self.name_scope():
self.hidden2 = nn.Dense(10, activation='relu')
def hybrid_forward(self, F, x):
x = self.hidden2(self.hidden1(x))
return x
>>> net = HybridNet()
>>> net.initialize()
>>> print(net.collect_params())
hybridnet0_ (
Parameter dense0_weight (shape=(256, -1), dtype=float32)
Parameter dense0_bias (shape=(256,), dtype=float32)
Parameter hybridnet0_dense0_weight (shape=(10, -1), dtype=float32)
Parameter hybridnet0_dense0_bias (shape=(10,), dtype=float32)
)
```
From the above example we can also find that the parameter names are not related to the attributes’ names, which is not straightforward.
In all, we find that using name_scope and ParameterDict is not user-friendly. Thus we plan to remove such redundancies and simplify the definitions of children blocks and parameters, like:
```
class Net(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.hidden1 = nn.Dense(256, activation='relu')
self.a=gluon.parameter.Parameter(name="a", shape=(1, ))
```
### 2. parameter sharing
Currently, we use parameter “params” in the definition of Block for parameter sharing. It means before the __init__ of Block, shared parameters already recorded in self._params.shared. And currently Block forbids overriding parameters.
We think that this is not convenient. A most common way to share parameter is like what Pytorch does, like
```
self.hidden1.weight=self.hidden2.weight
```
But note that in the case where we have a HybridBlock and the block has been hybridized, then we shouldn't allow overriding the parameter but ask the user to unhybridize the Block first.
To further allow sharing parameters recursively, we plan to add an API:
```
def share_parameters(self, params : Dict):
```
We plan to use the structured based form (like what is used in “_collect_params_with_prefix()”) to represent each parameter recursively. For example, we denote “self.hidden1.weight” as “hidden_weight”
In all, we plan to make the following improvements:
1. remove parameters “prefix” and “params” in the “\_\_init\_\_" function.
2. remove the use of self._params(ParameterDict) in Block
3. allow parameter attribute overriding in non-hydridization case.
4. add the method “share_parameters" to recursively share parameters in children blocks.
## Parameter naming
Once a parameter is created, `param.name` would not be changed in the following operations. It is in the form of `param_{uuid4}_{name}`, where `name` is from `__init __` parameter. Here `name` is optional, default `weight`. It is mainly used to denote which default initialization should be used.
We use `param.name` as the name of a parameter's symbol representation.
## collect_params()
It returns a `dict`, where the keys are structural names of parameters, like
`{'hidden1.weight': Parameter (shape=(3, -1), dtype=float32), 'hidden1.bias': Parameter (shape=(3,), dtype=float32)}`
Note that we use `.` as the linking character again because the structured based naming scheme is no longer used in the symbol representation.
## Save and Load
For `HybridBlock`, there are two ways to save and load parameters:
### save_parameters() and load_parameters()
In `save_parameters()`, we use `structural name` to save parameters, and they should be loaded by `load_parameters()`, which loads parameters based on a model's structure.
### HybridBlock.export and SymbolBlock.imports
In `export`, we only save parameters using `param.name` without `structural name`. The param file should be loaded in SymbolBlock.imports.
## SymbolBlock
When using `SymbolBlock.imports`, keys in `self.param` would be the loaded parameters' names `param.name`.
While in `SymbolBlock(outputs, inputs, params=None)`, if you provide like `params=net.collect_params()`, keys in `self.param` would be structural names of `net`'s parameters (keys in net.collect_params() ). It is often used in this situation that a `SymbolBlock` is a children block of another `HybridBlock`. Otherwise, keys in `self.param` would be the loaded parameters' names `param.name`.
2020-06-19 14:31:08 +08:00
layer . initialize ( ctx = [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] )
2017-06-26 22:37:11 -07:00
with mx . gpu ( 0 ) :
x = mx . nd . ones ( ( 10 , 16 , 30 ) )
states = layer . begin_state ( 16 )
go , gs = layer ( x , states )
2017-06-05 10:07:12 -07:00
2017-06-26 22:37:11 -07:00
with mx . cpu ( 0 ) :
x = mx . nd . ones ( ( 10 , 16 , 30 ) )
states = layer . begin_state ( 16 )
co , cs = layer ( x , states )
2017-06-05 10:07:12 -07:00
2018-02-18 03:11:58 -08:00
# atol of 1e-6 required, as exposed by seed 2124685726
2019-10-15 15:56:43 -07:00
assert_almost_equal ( go , co , rtol = 1e-2 , atol = 1e-6 )
2017-06-26 22:37:11 -07:00
for g , c in zip ( gs , cs ) :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( g , c , rtol = 1e-2 , atol = 1e-6 )
2017-06-05 10:07:12 -07:00
2018-03-09 21:46:34 -08:00
def check_rnn_layer_w_rand_inputs ( layer ) :
Simplify mxnet.gluon Block APIs (#18413)
## Motivations
Currently the implementation of mxnet.gluon.block is not so pythonic and there are many redundancies
### 1. overlaps between Block._params and Block._reg_params
when we want to self-define a model, we currently need to use the code as follows:
```
class Net(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
with self.name_scope():
self.hidden1 = nn.Dense(256, activation='relu')
self.a=self.params.get('a', shape=(1, ))
```
There are several shortcomings when using this form of registration:
a. adding parameter ‘a’ will lead to double recordings in both self._params and self._reg_params, which is a redundancy. And there is also a discrepancy in Block:
i. In the method “collect_params”, we use “_params” to get all parameters
ii. while in the method “_collect_params_with_prefix” (and methods “load_parameters” accordingly), we use “_reg_params” to get all parameters.
b. Currently if we do not use “with self.name_scope():” for children blocks, it will lead to wrong name scopes. For the following example, we actually can not get the parameters of self.hidden1 from the result of collect_params
```
class HybridNet(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.hidden1 = nn.Dense(256, activation='relu')
with self.name_scope():
self.hidden2 = nn.Dense(10, activation='relu')
def hybrid_forward(self, F, x):
x = self.hidden2(self.hidden1(x))
return x
>>> net = HybridNet()
>>> net.initialize()
>>> print(net.collect_params())
hybridnet0_ (
Parameter dense0_weight (shape=(256, -1), dtype=float32)
Parameter dense0_bias (shape=(256,), dtype=float32)
Parameter hybridnet0_dense0_weight (shape=(10, -1), dtype=float32)
Parameter hybridnet0_dense0_bias (shape=(10,), dtype=float32)
)
```
From the above example we can also find that the parameter names are not related to the attributes’ names, which is not straightforward.
In all, we find that using name_scope and ParameterDict is not user-friendly. Thus we plan to remove such redundancies and simplify the definitions of children blocks and parameters, like:
```
class Net(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.hidden1 = nn.Dense(256, activation='relu')
self.a=gluon.parameter.Parameter(name="a", shape=(1, ))
```
### 2. parameter sharing
Currently, we use parameter “params” in the definition of Block for parameter sharing. It means before the __init__ of Block, shared parameters already recorded in self._params.shared. And currently Block forbids overriding parameters.
We think that this is not convenient. A most common way to share parameter is like what Pytorch does, like
```
self.hidden1.weight=self.hidden2.weight
```
But note that in the case where we have a HybridBlock and the block has been hybridized, then we shouldn't allow overriding the parameter but ask the user to unhybridize the Block first.
To further allow sharing parameters recursively, we plan to add an API:
```
def share_parameters(self, params : Dict):
```
We plan to use the structured based form (like what is used in “_collect_params_with_prefix()”) to represent each parameter recursively. For example, we denote “self.hidden1.weight” as “hidden_weight”
In all, we plan to make the following improvements:
1. remove parameters “prefix” and “params” in the “\_\_init\_\_" function.
2. remove the use of self._params(ParameterDict) in Block
3. allow parameter attribute overriding in non-hydridization case.
4. add the method “share_parameters" to recursively share parameters in children blocks.
## Parameter naming
Once a parameter is created, `param.name` would not be changed in the following operations. It is in the form of `param_{uuid4}_{name}`, where `name` is from `__init __` parameter. Here `name` is optional, default `weight`. It is mainly used to denote which default initialization should be used.
We use `param.name` as the name of a parameter's symbol representation.
## collect_params()
It returns a `dict`, where the keys are structural names of parameters, like
`{'hidden1.weight': Parameter (shape=(3, -1), dtype=float32), 'hidden1.bias': Parameter (shape=(3,), dtype=float32)}`
Note that we use `.` as the linking character again because the structured based naming scheme is no longer used in the symbol representation.
## Save and Load
For `HybridBlock`, there are two ways to save and load parameters:
### save_parameters() and load_parameters()
In `save_parameters()`, we use `structural name` to save parameters, and they should be loaded by `load_parameters()`, which loads parameters based on a model's structure.
### HybridBlock.export and SymbolBlock.imports
In `export`, we only save parameters using `param.name` without `structural name`. The param file should be loaded in SymbolBlock.imports.
## SymbolBlock
When using `SymbolBlock.imports`, keys in `self.param` would be the loaded parameters' names `param.name`.
While in `SymbolBlock(outputs, inputs, params=None)`, if you provide like `params=net.collect_params()`, keys in `self.param` would be structural names of `net`'s parameters (keys in net.collect_params() ). It is often used in this situation that a `SymbolBlock` is a children block of another `HybridBlock`. Otherwise, keys in `self.param` would be the loaded parameters' names `param.name`.
2020-06-19 14:31:08 +08:00
layer . initialize ( ctx = [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] )
2018-03-09 21:46:34 -08:00
x = mx . nd . uniform ( shape = ( 10 , 16 , 30 ) )
with mx . gpu ( 0 ) :
x = x . copyto ( mx . gpu ( 0 ) )
states = layer . begin_state ( 16 )
go , gs = layer ( x , states )
with mx . cpu ( 0 ) :
x = x . copyto ( mx . cpu ( 0 ) )
states = layer . begin_state ( 16 )
co , cs = layer ( x , states )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( go , co , rtol = 1e-2 , atol = 1e-6 )
2018-03-09 21:46:34 -08:00
for g , c in zip ( gs , cs ) :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( g , c , rtol = 1e-2 , atol = 1e-6 )
2018-03-09 21:46:34 -08:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-07-16 17:41:32 -07:00
def test_sequence_reverse ( ) :
check_sequence_reverse ( mx . gpu ( 0 ) )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-10-14 19:44:32 -07:00
def test_autograd_save_memory ( ) :
x = mx . nd . zeros ( ( 128 , 512 , 512 ) , ctx = mx . gpu ( 0 ) )
x . attach_grad ( )
2017-07-16 17:41:32 -07:00
2017-10-14 19:44:32 -07:00
with mx . autograd . record ( ) :
for i in range ( 200 ) :
x = x + 1
x . wait_to_read ( )
x . backward ( )
2017-08-15 12:24:35 -07:00
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-09-26 12:10:26 -07:00
def test_cuda_rtc ( ) :
source = r '''
extern " C " __global__ void axpy(const float *x, float *y, float alpha) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
y[i] += alpha * x[i];
}
extern " C " __global__ void saxpy(const float *x, float *y, float alpha) {
extern __shared__ float smem[];
int i = threadIdx.x + blockIdx.x * blockDim.x;
smem[threadIdx.x] = x[i];
y[i] += alpha * smem[threadIdx.x];
}
'''
module = mx . rtc . CudaModule ( source )
axpy = module . get_kernel ( " axpy " , " const float *x, float *y, float alpha " )
x = mx . nd . ones ( ( 10 , ) , ctx = mx . gpu ( 0 ) )
y = mx . nd . zeros ( ( 10 , ) , ctx = mx . gpu ( 0 ) )
axpy . launch ( [ x , y , 3.0 ] , mx . gpu ( 0 ) , ( 1 , 1 , 1 ) , ( 10 , 1 , 1 ) )
assert ( y . asnumpy ( ) == 3 ) . all ( )
saxpy = module . get_kernel ( " saxpy " , " const float *x, float *y, float alpha " )
saxpy . launch ( [ x , y , 4.0 ] , mx . gpu ( 0 ) , ( 1 , 1 , 1 ) , ( 10 , 1 , 1 ) , 10 )
assert ( y . asnumpy ( ) == 7 ) . all ( )
saxpy . launch ( [ x , y , 5.0 ] , mx . gpu ( 0 ) , ( 2 , 1 , 1 ) , ( 5 , 1 , 1 ) , 5 )
assert ( y . asnumpy ( ) == 12 ) . all ( )
2018-02-18 03:11:58 -08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2017-10-02 11:48:51 -07:00
def test_cross_device_autograd ( ) :
x = mx . nd . random . uniform ( shape = ( 10 , ) )
x . attach_grad ( )
with mx . autograd . record ( ) :
y = mx . nd . tanh ( x )
y = y . copyto ( mx . gpu ( 0 ) )
y = mx . nd . tanh ( y )
y = y . copyto ( mx . cpu ( 0 ) )
y = mx . nd . tanh ( y )
y = y . copyto ( mx . gpu ( 0 ) )
y = y . copyto ( mx . gpu ( 0 ) )
y . backward ( )
2019-10-15 15:56:43 -07:00
dx = x . grad . copy ( )
2017-10-02 11:48:51 -07:00
x . grad [ : ] = 0
with mx . autograd . record ( ) :
y = x
for i in range ( 3 ) :
y = mx . nd . tanh ( y )
y . backward ( )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( dx , x . grad )
2017-10-02 11:48:51 -07:00
2018-03-21 06:56:55 +08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2018-03-21 06:56:55 +08:00
def test_multi_proposal_op ( ) :
# paramters
feature_stride = 16
scales = ( 8 , 16 , 32 )
ratios = ( 0.5 , 1 , 2 )
rpn_pre_nms_top_n = 12000
rpn_post_nms_top_n = 2000
rpn_min_size = feature_stride
feat_len = ( 1000 + 15 ) / / 16
H , W = feat_len , feat_len
num_anchors = len ( scales ) * len ( ratios )
count_anchors = H * W * num_anchors
def get_new_data ( batch_size , ctx ) :
'''
cls_prob: (batch_size, 2 * num_anchors, H, W)
bbox_pred: (batch_size, 4 * num_anchors, H, W)
im_info: (batch_size, 3)
'''
dtype = np . float32
cls_prob = mx . nd . empty ( ( batch_size , 2 * num_anchors , H , W ) , dtype = dtype , ctx = ctx )
bbox_pred = mx . nd . empty ( ( batch_size , 4 * num_anchors , H , W ) , dtype = dtype , ctx = ctx )
im_info = mx . nd . empty ( ( batch_size , 3 ) , dtype = dtype , ctx = ctx )
cls = [ 1.0 * ( i + 1 ) / cls_prob . size for i in range ( cls_prob . size ) ]
np . random . shuffle ( cls )
cls_prob = mx . nd . reshape ( mx . nd . array ( cls , dtype = dtype , ctx = ctx ) , shape = cls_prob . shape )
bbox_pred = mx . nd . array ( np . random . randint ( - 2 , 3 , size = bbox_pred . shape ) , dtype = dtype , ctx = ctx )
for i in range ( batch_size ) :
im_size = np . random . randint ( 600 , feat_len * feature_stride , size = ( 2 , ) )
im_scale = np . random . randint ( 80 , 100 ) / 100.0
im_info [ i , : ] = [ im_size [ 0 ] , im_size [ 1 ] , im_scale ]
return cls_prob , bbox_pred , im_info
2018-07-02 17:28:52 -07:00
def check_proposal_consistency ( op , batch_size , with_nms = False ) :
2018-03-21 06:56:55 +08:00
'''
op is mx.nd.contrib.Proposal or mx.nd.contrib.MultiProposal
'''
cls_prob , bbox_pred , im_info = get_new_data ( batch_size , mx . cpu ( 0 ) )
rois_cpu , score_cpu = op (
2018-05-30 06:38:16 +08:00
cls_prob = cls_prob ,
2018-03-21 06:56:55 +08:00
bbox_pred = bbox_pred ,
im_info = im_info ,
feature_stride = feature_stride ,
scales = scales ,
ratios = ratios ,
rpn_pre_nms_top_n = rpn_pre_nms_top_n ,
rpn_post_nms_top_n = rpn_post_nms_top_n ,
2018-07-02 17:28:52 -07:00
threshold = 0.7 if with_nms else 1.0 ,
2018-03-21 06:56:55 +08:00
rpn_min_size = rpn_min_size , output_score = True )
gpu_ctx = mx . gpu ( 0 )
# copy data to gpu from cpu
cls_prob_gpu = cls_prob . as_in_context ( gpu_ctx )
bbox_pred_gpu = bbox_pred . as_in_context ( gpu_ctx )
im_info_gpu = im_info . as_in_context ( gpu_ctx )
rois_gpu , score_gpu = op (
2018-05-30 06:38:16 +08:00
cls_prob = cls_prob_gpu ,
2018-03-21 06:56:55 +08:00
bbox_pred = bbox_pred_gpu ,
im_info = im_info_gpu ,
feature_stride = feature_stride ,
scales = scales ,
ratios = ratios ,
rpn_pre_nms_top_n = rpn_pre_nms_top_n ,
rpn_post_nms_top_n = rpn_post_nms_top_n ,
2018-07-02 17:28:52 -07:00
threshold = 0.7 if with_nms else 1.0 ,
2018-03-21 06:56:55 +08:00
rpn_min_size = rpn_min_size , output_score = True )
rois_cpu_np = rois_cpu . asnumpy ( )
rois_gpu_np = rois_gpu . asnumpy ( )
score_cpu_np = score_cpu . asnumpy ( )
score_gpu_np = score_gpu . asnumpy ( )
2018-07-02 17:28:52 -07:00
if not with_nms :
assert_almost_equal ( score_cpu_np , score_gpu_np , atol = 1e-3 , rtol = 1e-3 )
assert_almost_equal ( rois_cpu_np , rois_gpu_np , atol = 1e-3 , rtol = 1e-3 )
else :
# no 100% gurantee with nms
assert ( np . sum ( np . abs ( score_cpu_np - score_gpu_np ) < 1e-3 ) > = 10 )
assert ( np . sum ( np . abs ( rois_cpu_np - rois_gpu_np ) < 1e-3 ) > = 40 )
2018-03-21 06:56:55 +08:00
check_proposal_consistency ( mx . nd . contrib . Proposal , 1 )
2018-07-02 17:28:52 -07:00
check_proposal_consistency ( mx . nd . contrib . MultiProposal , 5 )
check_proposal_consistency ( mx . nd . contrib . Proposal , 1 , with_nms = True )
check_proposal_consistency ( mx . nd . contrib . MultiProposal , 5 , with_nms = True )
2018-03-21 06:56:55 +08:00
2018-01-30 10:45:25 -08:00
# The following 2 functions launch 0-thread kernels, an error that should be caught and signaled.
def kernel_error_check_imperative ( ) :
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
with environment ( ' MXNET_ENGINE_TYPE ' , ' NaiveEngine ' ) :
with mx . np_shape ( active = True ) :
a = mx . nd . array ( [ 1 , 2 , 3 ] , ctx = mx . gpu ( 0 ) )
b = mx . nd . array ( [ ] , ctx = mx . gpu ( 0 ) )
c = ( a / b ) . asnumpy ( )
2018-01-30 10:45:25 -08:00
def kernel_error_check_symbolic ( ) :
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
with environment ( ' MXNET_ENGINE_TYPE ' , ' NaiveEngine ' ) :
with mx . np_shape ( active = True ) :
a = mx . sym . Variable ( ' a ' )
b = mx . sym . Variable ( ' b ' )
c = a / b
f = c . bind ( mx . gpu ( 0 ) , { ' a ' : mx . nd . array ( [ 1 , 2 , 3 ] , ctx = mx . gpu ( 0 ) ) ,
' b ' : mx . nd . array ( [ ] , ctx = mx . gpu ( 0 ) ) } )
f . forward ( )
g = f . outputs [ 0 ] . asnumpy ( )
2018-01-30 10:45:25 -08:00
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2018-01-30 10:45:25 -08:00
def test_kernel_error_checking ( ) :
# Running tests that may throw exceptions out of worker threads will stop CI testing
# if not run in a separate process (with its own address space for CUDA compatibility).
try :
mpctx = mp . get_context ( ' spawn ' )
except :
print ( ' SKIP: python %s . %s lacks the required process fork-exec support ... ' %
sys . version_info [ 0 : 2 ] , file = sys . stderr , end = ' ' )
else :
with discard_stderr ( ) :
for f in [ kernel_error_check_imperative , kernel_error_check_symbolic ] :
p = mpctx . Process ( target = f )
p . start ( )
p . join ( )
assert p . exitcode != 0 , \
" Expected a synchronous kernel error from %s (), none seen. " % f . __name__
2018-04-03 10:33:56 -07:00
def test_incorrect_gpu ( ) :
# Try setting dev_id to a really big number
2020-04-22 23:53:12 -07:00
pytest . raises ( MXNetError , mx . nd . ones , ( 2 , 2 ) , ctx = mx . gpu ( 100001 ) )
2018-01-30 10:45:25 -08:00
2018-04-09 14:43:53 -07:00
@with_seed ( )
def test_batchnorm_backwards_notrain ( ) :
for ctx in [ mx . cpu ( 0 ) , mx . gpu ( 0 ) ] :
for cudnn_o in [ False , True ] :
B , C , H , W = 4 , 3 , 2 , 2
x = mx . nd . random . poisson ( 1 , shape = ( B , C , H , W ) ) . as_in_context ( ctx )
gamma = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
beta = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
mean = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
std = mx . nd . random . normal ( shape = ( C ) ) . as_in_context ( ctx )
x . attach_grad ( )
with autograd . record ( False ) :
y = mx . ndarray . BatchNorm ( x , gamma , beta , mean , std . square ( ) ,
fix_gamma = False , cudnn_off = cudnn_o )
loss = y . square ( ) . sum ( )
loss . backward ( train_mode = False )
2018-05-06 13:57:15 -07:00
@with_seed ( )
def test_create_sparse_ndarray_gpu_to_cpu ( ) :
dim0 = 10
dim1 = 5
densities = [ 0 , 0.5 , 1 ]
for density in densities :
shape = rand_shape_2d ( dim0 , dim1 )
matrix = rand_ndarray ( shape , ' row_sparse ' , density )
data = matrix . data
indices = matrix . indices
rsp_created = mx . nd . sparse . row_sparse_array ( ( data , indices ) , shape = shape , ctx = mx . cpu ( ) )
assert rsp_created . stype == ' row_sparse '
assert same ( rsp_created . data . asnumpy ( ) , data . asnumpy ( ) )
assert same ( rsp_created . indices . asnumpy ( ) , indices . asnumpy ( ) )
rsp_copy = mx . nd . array ( rsp_created )
assert ( same ( rsp_copy . asnumpy ( ) , rsp_created . asnumpy ( ) ) )
2018-05-12 22:48:34 -07:00
@with_seed ( )
def test_softmax_activation ( ) :
gpu_a = mx . nd . array ( [ [ 3. , 0.5 , - 0.5 , 2. , 7. ] ,
[ 2. , - .4 , 7. , 3. , 0.2 ] ] , ctx = mx . gpu ( 0 ) )
cpu_a = mx . nd . array ( [ [ 3. , 0.5 , - 0.5 , 2. , 7. ] ,
[ 2. , - .4 , 7. , 3. , 0.2 ] ] , ctx = mx . cpu ( ) )
cpu_a . attach_grad ( )
gpu_a . attach_grad ( )
with mx . autograd . record ( ) :
gpu_y = mx . nd . SoftmaxActivation ( data = gpu_a )
cpu_y = mx . nd . SoftmaxActivation ( data = cpu_a )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( cpu_y , gpu_y , atol = 1e-3 , rtol = 1e-3 )
2018-05-12 22:48:34 -07:00
gpu_y . backward ( )
cpu_y . backward ( )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( cpu_a . grad , gpu_a . grad , atol = 1e-3 , rtol = 1e-3 )
2018-05-12 22:48:34 -07:00
2018-09-05 11:34:54 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
@pytest.mark.serial
2018-09-05 11:34:54 -07:00
def test_bilinear_sampler_versions ( ) :
data = mx . sym . Variable ( ' data ' )
grid = mx . sym . Variable ( ' grid ' )
sym1 = mx . sym . BilinearSampler ( data = data , grid = grid )
sym2 = mx . sym . BilinearSampler ( data = data , grid = grid , cudnn_off = True )
sym3 = mx . sym . BilinearSampler ( data = data , grid = grid )
test_cases = [ [ ( 1 , 3 , 15 , 16 ) , ( 1 , 2 , 10 , 10 ) ] ,
[ ( 1 , 6 , 7 , 16 ) , ( 1 , 2 , 10 , 4 ) ] ,
[ ( 1 , 7 , 3 , 16 ) , ( 1 , 2 , 8 , 11 ) ] ,
[ ( 1 , 9 , 50 , 50 ) , ( 1 , 2 , 50 , 50 ) ] ]
for item in test_cases :
data_shape , grid_shape = item
# kWriteTo
2020-06-20 14:49:58 -07:00
exe_cpu = sym1 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = ' write ' )
exe_gpu = sym2 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' write ' )
exe_cudnn = sym3 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' write ' )
2018-09-05 11:34:54 -07:00
exe_list = [ exe_cpu , exe_gpu , exe_cudnn ]
ref_idx = 0
test_data = np . random . uniform ( low = - 0.1 , high = 0.1 , size = data_shape ) . astype ( np . float32 )
test_grid = np . random . uniform ( low = - 2 , high = 2 , size = grid_shape ) . astype ( np . float32 )
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . forward ( is_train = True )
2019-10-15 15:56:43 -07:00
mx . test_utils . assert_almost_equal ( exe_list [ ref_idx ] . outputs [ 0 ] , exe . outputs [ 0 ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
out_grad = np . random . uniform ( low = - 0.01 , high = 0.01 , size = data_shape [ : 2 ] + grid_shape [ 2 : ] ) . astype ( np . float32 )
for exe in exe_list :
exe . backward ( mx . nd . array ( out_grad ) )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
data_grad = exe_list [ ref_idx ] . grad_dict [ ' data ' ] . asnumpy ( )
grid_grad = exe_list [ ref_idx ] . grad_dict [ ' grid ' ] . asnumpy ( )
# kAddTo
2020-06-20 14:49:58 -07:00
exe_cpu_addto = sym1 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = ' add ' )
exe_gpu_addto = sym2 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' add ' )
exe_cudnn_addto = sym3 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = ' add ' )
2018-09-05 11:34:54 -07:00
exe_list = [ exe_cpu_addto , exe_gpu_addto , exe_cudnn_addto ]
data_initial_grad = np . random . normal ( size = exe_list [ ref_idx ] . grad_dict [ ' data ' ] . shape ) . astype ( np . float32 )
grid_initial_grad = np . random . normal ( size = exe_list [ ref_idx ] . grad_dict [ ' grid ' ] . shape ) . astype ( np . float32 )
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . grad_dict [ ' data ' ] [ : ] = data_initial_grad
exe . grad_dict [ ' grid ' ] [ : ] = grid_initial_grad
exe . forward ( is_train = True )
exe . backward ( mx . nd . array ( out_grad ) )
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe_list [ ref_idx ] . grad_dict [ ' data ' ] , data_grad + data_initial_grad , rtol = 1e-3 , atol = 1e-5 )
assert_almost_equal ( exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , grid_grad + grid_initial_grad , rtol = 1e-3 , atol = 1e-5 )
2018-09-05 11:34:54 -07:00
2018-09-11 10:20:41 -07:00
for req_dict in [ { ' data ' : ' null ' , ' grid ' : ' write ' } , { ' data ' : ' write ' , ' grid ' : ' null ' } ] :
# Mixture of kWriteTo and kNullOp
2020-06-20 14:49:58 -07:00
exe_cpu_mix = sym1 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = mx . cpu ( ) , grad_req = req_dict )
exe_gpu_mix = sym2 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = req_dict )
exe_cudnn_mix = sym3 . _simple_bind ( data = data_shape , grid = grid_shape , ctx = default_context ( ) , grad_req = req_dict )
2018-09-11 10:20:41 -07:00
exe_list = [ exe_cpu_mix , exe_gpu_mix , exe_cudnn_mix ]
for exe in exe_list :
exe . arg_dict [ ' data ' ] [ : ] = test_data
exe . arg_dict [ ' grid ' ] [ : ] = test_grid
exe . forward ( is_train = True )
exe . backward ( mx . nd . array ( out_grad ) )
if req_dict [ ' data ' ] is ' write ' :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' data ' ] , exe_list [ ref_idx ] . grad_dict [ ' data ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-11 10:20:41 -07:00
if req_dict [ ' grid ' ] is ' write ' :
2019-10-15 15:56:43 -07:00
assert_almost_equal ( exe . grad_dict [ ' grid ' ] , exe_list [ ref_idx ] . grad_dict [ ' grid ' ] , rtol = 1e-3 , atol = 1e-5 )
2018-09-11 10:20:41 -07:00
2018-09-05 11:34:54 -07:00
2019-03-12 13:25:12 -07:00
# isolated execution bulking test function to be invoked with different env var settings
def _test_bulking_in_process ( seed , time_per_iteration ) :
2019-03-06 21:58:52 -08:00
data_shape = ( 10 , )
num_ops = 1000
num_iterations = 20
2019-03-12 13:25:12 -07:00
ctx = default_context ( )
# build symbol
X = mx . sym . Variable ( ' X ' )
sym = mx . sym . flip ( X , axis = 0 )
for _ in range ( num_ops - 1 ) :
sym = mx . sym . flip ( sym , axis = 0 )
x = mx . ndarray . zeros ( data_shape )
dx = mx . ndarray . zeros ( data_shape )
dy = mx . ndarray . ones ( data_shape )
2020-06-20 14:49:58 -07:00
exe = sym . _bind ( ctx = ctx , args = [ x ] , args_grad = { ' X ' : dx } )
2019-03-12 13:25:12 -07:00
# time a number of forward() and backward() executions after some warm-up iterations
warmups = 1
for i in range ( num_iterations + warmups ) :
if i == warmups :
start = time . time ( )
exe . forward ( is_train = True )
exe . backward ( dy )
dx . wait_to_read ( )
time_per_iteration . value = ( time . time ( ) - start ) / num_iterations
2019-10-15 15:56:43 -07:00
2019-03-12 13:25:12 -07:00
@with_seed ( )
2020-05-16 19:04:44 -07:00
@pytest.mark.skip ( reason = ' skippping temporarily, tracked by https://github.com/apache/incubator-mxnet/issues/16517 ' )
2019-10-15 15:56:43 -07:00
def test_bulking_operator_gpu ( ) :
_test_bulking ( _test_bulking_in_process )
2020-05-16 19:04:44 -07:00
@pytest.mark.skip ( reason = ' skippping temporarily, tracked by https://github.com/apache/incubator-mxnet/issues/14970 ' )
2019-03-12 13:25:12 -07:00
def test_bulking ( ) :
2019-03-06 21:58:52 -08:00
# test case format: (max_fwd_segment_size, max_bwd_segment_size, enable_bulking_in_training)
test_cases = [ ( 0 , 0 , True ) , ( 1 , 1 , True ) , ( 15 , 15 , False ) , ( 15 , 0 , True ) , ( 0 , 15 , True ) , ( 15 , 15 , True ) ]
times = { }
times_str = ' '
for seg_sizes in test_cases :
2019-03-12 13:25:12 -07:00
# Create shared variable to return measured time from test process
time_per_iteration = mp . Manager ( ) . Value ( ' d ' , 0.0 )
if not run_in_spawned_process ( _test_bulking_in_process ,
Improve environment variable handling in unittests (#18424)
This PR makes it easy to create unittests that require specific settings of environment variables, while avoiding the pitfalls (discussed in comments section). This PR can be considered a recasting and expansion of the great vision of @larroy in creating the EnvManager class in #13140.
In its base form, the facility is a drop-in replacement for EnvManager, and is called 'environment':
with environment('MXNET_MY_NEW_FEATURE', '1'):
<test with feature enabled>
with environment('MXNET_MY_NEW_FEATURE', '0'):
<test with feature disabled>
Like EnvManager, this facility takes care of the save/restore of the previous environment variable state, including when exceptions are raised. In addition though, this PR introduces the features:
A similarly-named unittest decorator: @with_environment(key, value)
The ability to pass in multiple env vars as a dict (as is needed for some tests) in both forms, so for example:
with environment({'MXNET_FEATURE_A': '1',
'MXNET_FEATURE_B': '1'}):
<test with both features enabled>
Works on Windows! This PR includes a wrapping of the backend's setenv() and getenv() functions, and uses this direct access to the backend environment to keep it in sync with the python environment. This works around the problem that the C Runtime on Windows gets a snapshot of the Python environment at startup that is immutable from Python.
with environment() has a simple implementation using the @contextmanager decorator
Tests are included that validate the facility works with all combinations of before_val/set_val, namely unset/unset, unset/set, set/unset, set/set.
There were 5 unittests previously using EnvManager, and this PR shifts those uses to with environment():, while converting over 20 other ad-hoc uses of os.environ[] within the unittests. This PR also enables those unittests that were bypassed on Windows (due to the inability to set environment variables) to run on all platforms.
Further Comments
Environment variables are a two-edged sword- they enable useful operating modes for testing, debugging or niche applications, but like all features they must be tested. The correct approach for testing with a particular env var setting is:
def set_env_var(key, value):
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
old_env_var_value = os.environ.get(env_var_name)
try:
set_env_var(env_var_name, test_env_var_value)
<perform test>
finally:
set_env_var(env_var_name, old_env_var_value )
The above code makes no assumption about whether the before-test and within-test state of the env var is set or unset, and restores the prior environment even if the test raises an exception. This represents a lot of boiler-plate code that could be potentially mishandled. The with environment() context makes it simple to handle all this properly. If an entire unittest wants a forced env var setting, then using the @with_environment() decorator avoids the code indent of the with environment() approach if used otherwise within the test.
2020-07-23 11:17:10 -07:00
{ ' MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_FWD ' : str ( seg_sizes [ 0 ] ) ,
' MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_BWD ' : str ( seg_sizes [ 1 ] ) ,
' MXNET_EXEC_BULK_EXEC_TRAIN ' : str ( seg_sizes [ 2 ] ) } ,
2019-03-12 13:25:12 -07:00
time_per_iteration ) :
# skip test since the python version can't run it properly. Warning msg was logged.
return
times [ seg_sizes ] = time_per_iteration . value
times_str + = \
2019-03-06 21:58:52 -08:00
' \n runtime of (fwd,bwd,enable) op seg setting ( {} , {} , {} ) = \t {:.1f} msec ' . format (
seg_sizes [ 0 ] , seg_sizes [ 1 ] , seg_sizes [ 2 ] , 1000.0 * times [ seg_sizes ] )
fastest_non_bulked_time = min ( times [ ( 0 , 0 , True ) ] , times [ ( 1 , 1 , True ) ] , times [ ( 15 , 15 , False ) ] )
slowest_half_bulked_time = max ( times [ ( 0 , 15 , True ) ] , times [ ( 15 , 0 , True ) ] )
fastest_half_bulked_time = min ( times [ ( 0 , 15 , True ) ] , times [ ( 15 , 0 , True ) ] )
fully_bulked_time = times [ ( 15 , 15 , True ) ]
print ( times_str )
# Non-bulked times[0,0,True], times[1,1,True] and times[15,15,False] should be about the same,
# slower than both half-bulked times[0,15,True] and times[15,0,True]
2019-03-12 13:25:12 -07:00
assert slowest_half_bulked_time < fastest_non_bulked_time , \
' A half-bulked exec time is slower than the non-bulked time by {} secs! {} ' \
2019-03-06 21:58:52 -08:00
. format ( slowest_half_bulked_time - fastest_non_bulked_time , times_str )
# The fully bulked times[15,15,True] should be faster than both half-bulked runs
2019-03-12 13:25:12 -07:00
assert fully_bulked_time < fastest_half_bulked_time , \
' The fully-bulked exec time is slower than a half-bulked time by {} secs! {} ' \
2019-03-06 21:58:52 -08:00
. format ( fully_bulked_time - fastest_half_bulked_time , times_str )
2019-10-15 15:56:43 -07:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-10-15 15:56:43 -07:00
def test_allclose_function_gpu ( ) :
allclose_function ( [ mx . cpu ( ) , mx . gpu ( 0 ) ] )
2018-05-15 20:39:02 +02:00
def test_context_num_gpus ( ) :
# Test that num_gpus reports at least one GPU, as the test is run on a GPU host.
assert mx . context . num_gpus ( ) > 0
2018-05-12 22:48:34 -07:00
2019-05-23 07:17:38 +08:00
def math_log ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . log ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . log ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def math_erf ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . erf ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . erf ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def math_square ( shape , dtype , check_value ) :
np_x = np . random . rand ( * tuple ( shape ) )
x = mx . nd . array ( np_x , dtype = dtype )
y = mx . nd . square ( data = x )
if check_value :
x_ = x . as_in_context ( mx . cpu ( ) )
y_ = mx . nd . square ( data = x_ )
assert_almost_equal ( y . asnumpy ( ) , y_ . asnumpy ( ) )
def run_math ( op , shape , dtype = " float32 " , check_value = True ) :
run_num = 10
for i in range ( run_num ) :
if op == ' log ' :
math_log ( shape = shape , dtype = dtype , check_value = check_value )
elif op == ' erf ' :
math_erf ( shape = shape , dtype = dtype , check_value = check_value )
elif op == ' square ' :
math_square ( shape = shape , dtype = dtype , check_value = check_value )
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-05-23 07:17:38 +08:00
def test_math ( ) :
ops = [ ' log ' , ' erf ' , ' square ' ]
check_value = True
2020-04-22 23:53:12 -07:00
shape_lst = [ [ 1000 ] , [ 100 , 1000 ] , [ 10 , 100 , 100 ] , [ 10 , 100 , 100 , 100 ] ]
2019-05-23 07:17:38 +08:00
dtypes = [ " float32 " , " float64 " ]
for shape in shape_lst :
for dtype in dtypes :
for op in ops :
run_math ( op , shape , dtype , check_value = check_value )
2019-08-26 07:37:39 +08:00
@with_seed ( )
2020-05-04 16:44:27 -07:00
@pytest.mark.serial
2019-08-26 07:37:39 +08:00
def test_arange_like_dtype ( ) :
dtypes = [ np . float16 , np . float32 , np . float64 ]
for t in dtypes :
x = mx . sym . Variable ( ' x ' , dtype = t )
y = mx . sym . reshape ( x , shape = ( 0 , 0 , - 1 ) )
z = mx . sym . contrib . arange_like ( y , axis = - 1 )
2019-11-06 21:00:43 -05:00
2020-06-20 14:49:58 -07:00
mod = z . _simple_bind ( ctx = mx . gpu ( 0 ) , x = ( 3 , 4 , 5 , 6 ) , grad_req = ' null ' )
2019-08-26 07:37:39 +08:00
mod . arg_arrays [ 0 ] [ : ] = np . random . normal ( size = mod . arg_arrays [ 0 ] . shape ) . astype ( t )
out = mod . forward ( is_train = False )
for v in out :
assert v . dtype == t
2019-09-23 10:13:42 +08:00
2020-08-28 14:53:11 -07:00
def test_fp16_spmm ( ) :
inp = mxsps . csr_matrix ( sps . coo_matrix ( ( [ 2.0 ] , ( [ 150 ] , [ 100000 ] ) ) ) . tocsr ( ) )
inp = inp . astype ( ' float16 ' , copy = False )
weight = mx . nd . random . randn ( 100001 , 151 )
weight = weight . astype ( ' float16 ' , copy = False )
out = mxsps . dot ( inp , weight )
out_np = mx . nd . dot ( inp , weight )
assert_almost_equal ( out . asnumpy ( ) , out_np , rtol = 1e-3 , atol = 1e-5 )