/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file fully_conn_perf.cc * \brief Sample for running C++ performance tests on a single operator. This method is also * useful for profiling with vtune or gprof, avoiding the "noise" of python and executor * \author Chris Olivier */ #include #include #include "../../src/operator/nn/fully_connected-inl.h" #include "../include/test_op_runner.h" #include "../include/test_core_op.h" using namespace mxnet; typedef std::vector > kwargs_t; const kwargs_t basic_fullyconn_args = {{"num_hidden", "250"}, {"no_bias", "true"}}; /*! * \brief Generic bidirectional sanity test */ TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) { mxnet::TShape shape1({5, 5}); mxnet::TShape shape2({250, 5}); kwargs_t kwargs = basic_fullyconn_args; test::op::CoreOperatorRunner runner; runner.set_verbose(true); kwargs = test::op::CoreOpExecutor::ArgsWithOpName( kwargs, "FullyConnected", "_backward_FullyConnected"); runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); } /*! * \brief Timing test for CPU */ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) { kwargs_t kwargs = basic_fullyconn_args; mxnet::TShape shape1({10, 10, 10, 10}); mxnet::TShape shape2({250, 1000}); test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName( kwargs, "FullyConnected", "_backward_FullyConnected"); runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); std::vector shapes; if (test::performance_run) { shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { {1, 1, 28, 28}, {50, 3, 18, 32}, }; } for (const mxnet::TShape& shape : shapes) { mxnet::TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); kwargs = test::op::CoreOpExecutor::ArgsWithOpName( kwargs, "FullyConnected", "_backward_FullyConnected"); runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10, {shape, shape2}, false); } } #if MXNET_USE_CUDA == 1 /*! * \brief Timing test for GPU */ TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) { kwargs_t kwargs = basic_fullyconn_args; mxnet::TShape shape1({10, 10, 10, 10}); mxnet::TShape shape2({250, 1000}); test::op::CoreOperatorRunner runner; kwargs = test::op::CoreOpExecutor::ArgsWithOpName( kwargs, "FullyConnected", "_backward_FullyConnected"); runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); std::vector shapes; if (test::performance_run) { shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { {1, 1, 28, 28}, {50, 3, 18, 32}, }; } for (const mxnet::TShape& shape : shapes) { mxnet::TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); kwargs = test::op::CoreOpExecutor::ArgsWithOpName( kwargs, "FullyConnected", "_backward_FullyConnected"); runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10, {shape, shape2}, false); } } #endif // MXNET_USE_CUDA == 1