2018-10-08 13:45:59 -07:00
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Example: mlp_csv
* Description:
* The following example demonstrates how to use CSVIter. This example creates
* mlp (multi-layer perceptron) model and trains the MNIST data which is in
* CSV format.
*/
# include <chrono>
2019-06-27 06:50:05 +08:00
# include <string>
2018-10-08 13:45:59 -07:00
# include "utils.h"
# include "mxnet-cpp/MxNetCpp.h"
using namespace mxnet : : cpp ;
/*
* Implementing the mlp symbol with given hidden units configuration.
*/
Symbol mlp ( const std : : vector < int > & hidden_units ) {
auto data = Symbol : : Variable ( " data " ) ;
auto label = Symbol : : Variable ( " label " ) ;
std : : vector < Symbol > weights ( hidden_units . size ( ) ) ;
std : : vector < Symbol > biases ( hidden_units . size ( ) ) ;
std : : vector < Symbol > outputs ( hidden_units . size ( ) ) ;
for ( size_t i = 0 ; i < hidden_units . size ( ) ; + + i ) {
weights [ i ] = Symbol : : Variable ( " w " + std : : to_string ( i ) ) ;
biases [ i ] = Symbol : : Variable ( " b " + std : : to_string ( i ) ) ;
Symbol fc = FullyConnected (
i = = 0 ? data : outputs [ i - 1 ] , // data
weights [ i ] ,
biases [ i ] ,
hidden_units [ i ] ) ;
outputs [ i ] = i = = hidden_units . size ( ) - 1 ? fc : Activation ( fc , ActivationActType : : kRelu ) ;
}
return SoftmaxOutput ( outputs . back ( ) , label ) ;
}
/*
* Convert the input string of number of hidden units into the vector of integers.
*/
std : : vector < int > getLayers ( const std : : string & hidden_units_string ) {
std : : vector < int > hidden_units ;
char * pNext ;
int num_unit = strtol ( hidden_units_string . c_str ( ) , & pNext , 10 ) ;
hidden_units . push_back ( num_unit ) ;
while ( * pNext ) {
num_unit = strtol ( pNext , & pNext , 10 ) ;
hidden_units . push_back ( num_unit ) ;
}
return hidden_units ;
}
void printUsage ( ) {
std : : cout < < " Usage: " < < std : : endl ;
std : : cout < < " mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
2018-11-27 10:32:17 -08:00
< < " --batch_size 100 --hidden_units \" 128 64 64 \" --gpu " < < std : : endl ;
2018-10-08 13:45:59 -07:00
std : : cout < < " The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
< < " the column 0 to be label and the rest 784 column to be data. " < < std : : endl ;
std : : cout < < " By default, the example uses 'cpu' context. If '--gpu' is specified, "
< < " program uses 'gpu' context. " < < std : : endl ;
}
int main ( int argc , char * * argv ) {
const int image_size = 28 ;
const int num_mnist_features = image_size * image_size ;
int batch_size = 100 ;
int max_epoch = 10 ;
const float learning_rate = 0.1 ;
const float weight_decay = 1e-2 ;
bool isGpu = false ;
std : : string training_set ;
std : : string test_set ;
std : : string hidden_units_string ;
int index = 1 ;
while ( index < argc ) {
if ( strcmp ( " --train " , argv [ index ] ) = = 0 ) {
index + + ;
training_set = argv [ index ] ;
} else if ( strcmp ( " --test " , argv [ index ] ) = = 0 ) {
index + + ;
test_set = argv [ index ] ;
} else if ( strcmp ( " --epochs " , argv [ index ] ) = = 0 ) {
index + + ;
2020-02-09 02:50:49 +01:00
max_epoch = strtol ( argv [ index ] , nullptr , 10 ) ;
2018-10-08 13:45:59 -07:00
} else if ( strcmp ( " --batch_size " , argv [ index ] ) = = 0 ) {
index + + ;
2020-02-09 02:50:49 +01:00
batch_size = strtol ( argv [ index ] , nullptr , 10 ) ;
2018-10-08 13:45:59 -07:00
} else if ( strcmp ( " --hidden_units " , argv [ index ] ) = = 0 ) {
index + + ;
hidden_units_string = argv [ index ] ;
} else if ( strcmp ( " --gpu " , argv [ index ] ) = = 0 ) {
isGpu = true ;
index + + ;
} else if ( strcmp ( " --help " , argv [ index ] ) = = 0 ) {
printUsage ( ) ;
return 0 ;
}
index + + ;
}
if ( training_set . empty ( ) | | test_set . empty ( ) | | hidden_units_string . empty ( ) ) {
std : : cout < < " ERROR: The mandatory arguments such as path to training and test data or "
< < " number of hidden units for mlp are not specified. " < < std : : endl < < std : : endl ;
printUsage ( ) ;
return 1 ;
}
std : : vector < int > hidden_units = getLayers ( hidden_units_string ) ;
if ( hidden_units . empty ( ) ) {
std : : cout < < " ERROR: Number of hidden units are not provided in correct format. "
< < " The numbers need to be separated by ' '. " < < std : : endl < < std : : endl ;
printUsage ( ) ;
return 1 ;
}
/*
* The MNIST data in CSV format has 785 columns.
* The first column is "Label" and rest of the columns contain data.
* The mnist_train.csv has 60000 records and mnist_test.csv has
* 10000 records.
*/
auto train_iter = MXDataIter ( " CSVIter " )
. SetParam ( " data_csv " , training_set )
. SetParam ( " data_shape " , Shape ( num_mnist_features + 1 , 1 ) )
. SetParam ( " batch_size " , batch_size )
. SetParam ( " flat " , 1 )
. SetParam ( " shuffle " , 0 )
. CreateDataIter ( ) ;
auto val_iter = MXDataIter ( " CSVIter " )
. SetParam ( " data_csv " , test_set )
. SetParam ( " data_shape " , Shape ( num_mnist_features + 1 , 1 ) )
. SetParam ( " batch_size " , batch_size )
. SetParam ( " flat " , 1 )
. SetParam ( " shuffle " , 0 )
. CreateDataIter ( ) ;
2019-04-02 16:23:54 -07:00
TRY
2018-10-08 13:45:59 -07:00
auto net = mlp ( hidden_units ) ;
Context ctx = Context : : cpu ( ) ;
if ( isGpu ) {
ctx = Context : : gpu ( ) ;
}
std : : map < std : : string , NDArray > args ;
args [ " data " ] = NDArray ( Shape ( batch_size , num_mnist_features ) , ctx ) ;
args [ " label " ] = NDArray ( Shape ( batch_size ) , ctx ) ;
// Let MXNet infer shapes other parameters such as weights
net . InferArgsMap ( ctx , & args , args ) ;
// Initialize all parameters with uniform distribution U(-0.01, 0.01)
auto initializer = Uniform ( 0.01 ) ;
for ( auto & arg : args ) {
// arg.first is parameter name, and arg.second is the value
initializer ( arg . first , & arg . second ) ;
}
// Create sgd optimiz er
Optimizer * opt = OptimizerRegistry : : Find ( " sgd " ) ;
opt - > SetParam ( " rescale_grad " , 1.0 / batch_size )
- > SetParam ( " lr " , learning_rate )
- > SetParam ( " wd " , weight_decay ) ;
// Create executor by binding parameters to the model
auto * exec = net . SimpleBind ( ctx , args ) ;
auto arg_names = net . ListArguments ( ) ;
// Start training
for ( int iter = 0 ; iter < max_epoch ; + + iter ) {
int samples = 0 ;
train_iter . Reset ( ) ;
auto tic = std : : chrono : : system_clock : : now ( ) ;
while ( train_iter . Next ( ) ) {
samples + = batch_size ;
auto data_batch = train_iter . GetDataBatch ( ) ;
/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch . data . Reshape ( Shape ( ( num_mnist_features + 1 ) ,
batch_size ) ) ;
/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
reshapedData . Slice ( 0 , 1 ) . Reshape ( Shape ( batch_size ) ) . CopyTo ( & args [ " label " ] ) ;
/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData . Slice ( 1 , ( num_mnist_features + 1 ) ) . Reshape ( Shape ( batch_size ,
num_mnist_features ) )
. CopyTo ( & args [ " data " ] ) ;
exec - > Forward ( true ) ;
// Compute gradients
exec - > Backward ( ) ;
// Update parameters
for ( size_t i = 0 ; i < arg_names . size ( ) ; + + i ) {
if ( arg_names [ i ] = = " data " | | arg_names [ i ] = = " label " ) continue ;
opt - > Update ( i , exec - > arg_arrays [ i ] , exec - > grad_arrays [ i ] ) ;
}
}
auto toc = std : : chrono : : system_clock : : now ( ) ;
Accuracy acc ;
val_iter . Reset ( ) ;
while ( val_iter . Next ( ) ) {
auto data_batch = val_iter . GetDataBatch ( ) ;
/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch . data . Reshape ( Shape ( ( num_mnist_features + 1 ) ,
batch_size ) ) ;
/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
NDArray labelData = reshapedData . Slice ( 0 , 1 ) . Reshape ( Shape ( batch_size ) ) ;
labelData . CopyTo ( & args [ " label " ] ) ;
/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData . Slice ( 1 , ( num_mnist_features + 1 ) ) . Reshape ( Shape ( batch_size ,
num_mnist_features ) )
. CopyTo ( & args [ " data " ] ) ;
// Forward pass is enough as no gradient is needed when evaluating
exec - > Forward ( false ) ;
acc . Update ( labelData , exec - > outputs [ 0 ] ) ;
}
float duration = std : : chrono : : duration_cast < std : : chrono : : milliseconds >
( toc - tic ) . count ( ) / 1000.0 ;
LG < < " Epoch[ " < < iter < < " ] " < < samples / duration < < " samples/sec Accuracy: "
< < acc . Get ( ) ;
}
delete exec ;
2019-03-03 12:37:11 +08:00
delete opt ;
2018-10-08 13:45:59 -07:00
MXNotifyShutdown ( ) ;
2019-04-02 16:23:54 -07:00
CATCH
2018-10-08 13:45:59 -07:00
return 0 ;
}