2017-08-11 14:12:47 -07:00
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
2017-03-22 11:55:51 +08:00
/*!
*/
# include <iostream>
# include <vector>
# include <string>
# include "mxnet-cpp/MxNetCpp.h"
2019-04-02 16:23:54 -07:00
# include "utils.h"
2017-11-07 02:13:07 +08:00
2017-03-22 11:55:51 +08:00
using namespace mxnet : : cpp ;
/*
* In this example,
* we make by hand some data in 10 classes with some pattern
* and try to use MLP to recognize the pattern.
*/
void OutputAccuracy ( mx_float * pred , mx_float * target ) {
int right = 0 ;
for ( int i = 0 ; i < 128 ; + + i ) {
float mx_p = pred [ i * 10 + 0 ] ;
float p_y = 0 ;
for ( int j = 0 ; j < 10 ; + + j ) {
if ( pred [ i * 10 + j ] > mx_p ) {
mx_p = pred [ i * 10 + j ] ;
p_y = j ;
}
}
if ( p_y = = target [ i ] ) right + + ;
}
2018-05-09 17:33:49 -07:00
std : : cout < < " Accuracy: " < < right / 128.0 < < std : : endl ;
2017-03-22 11:55:51 +08:00
}
2019-02-14 09:55:09 -08:00
void MLP ( int max_epoch ) {
2017-03-22 11:55:51 +08:00
auto sym_x = Symbol : : Variable ( " X " ) ;
auto sym_label = Symbol : : Variable ( " label " ) ;
const int nLayers = 2 ;
2018-05-09 17:33:49 -07:00
std : : vector < int > layerSizes ( { 512 , 10 } ) ;
std : : vector < Symbol > weights ( nLayers ) ;
std : : vector < Symbol > biases ( nLayers ) ;
std : : vector < Symbol > outputs ( nLayers ) ;
2017-03-22 11:55:51 +08:00
2018-02-10 14:20:33 -08:00
Symbol null_sym ;
2017-03-22 11:55:51 +08:00
for ( int i = 0 ; i < nLayers ; i + + ) {
2018-05-09 17:33:49 -07:00
std : : string istr = std : : to_string ( i ) ;
weights [ i ] = Symbol : : Variable ( std : : string ( " w " ) + istr ) ;
biases [ i ] = Symbol : : Variable ( std : : string ( " b " ) + istr ) ;
Symbol fc = FullyConnected ( std : : string ( " fc " ) + istr ,
2017-03-22 11:55:51 +08:00
i = = 0 ? sym_x : outputs [ i - 1 ] ,
weights [ i ] , biases [ i ] , layerSizes [ i ] ) ;
2018-05-09 17:33:49 -07:00
outputs [ i ] = LeakyReLU ( std : : string ( " act " ) + istr , fc , null_sym , LeakyReLUActType : : kLeaky ) ;
2017-03-22 11:55:51 +08:00
}
auto sym_out = SoftmaxOutput ( " softmax " , outputs [ nLayers - 1 ] , sym_label ) ;
Context ctx_dev ( DeviceType : : kCPU , 0 ) ;
NDArray array_x ( Shape ( 128 , 28 ) , ctx_dev , false ) ;
NDArray array_y ( Shape ( 128 ) , ctx_dev , false ) ;
mx_float * aptr_x = new mx_float [ 128 * 28 ] ;
mx_float * aptr_y = new mx_float [ 128 ] ;
// we make the data by hand, in 10 classes, with some pattern
for ( int i = 0 ; i < 128 ; i + + ) {
for ( int j = 0 ; j < 28 ; j + + ) {
aptr_x [ i * 28 + j ] = i % 10 * 1.0f ;
}
aptr_y [ i ] = i % 10 ;
}
array_x . SyncCopyFromCPU ( aptr_x , 128 * 28 ) ;
array_x . WaitToRead ( ) ;
array_y . SyncCopyFromCPU ( aptr_y , 128 ) ;
array_y . WaitToRead ( ) ;
// init the parameters
NDArray array_w_1 ( Shape ( 512 , 28 ) , ctx_dev , false ) ;
NDArray array_b_1 ( Shape ( 512 ) , ctx_dev , false ) ;
NDArray array_w_2 ( Shape ( 10 , 512 ) , ctx_dev , false ) ;
NDArray array_b_2 ( Shape ( 10 ) , ctx_dev , false ) ;
// the parameters should be initialized in some kind of distribution,
// so it learns fast
// but here just give a const value by hand
array_w_1 = 0.5f ;
array_b_1 = 0.0f ;
array_w_2 = 0.5f ;
array_b_2 = 0.0f ;
// the grads
NDArray array_w_1_g ( Shape ( 512 , 28 ) , ctx_dev , false ) ;
NDArray array_b_1_g ( Shape ( 512 ) , ctx_dev , false ) ;
NDArray array_w_2_g ( Shape ( 10 , 512 ) , ctx_dev , false ) ;
NDArray array_b_2_g ( Shape ( 10 ) , ctx_dev , false ) ;
// Bind the symolic network with the ndarray
// all the input args
std : : vector < NDArray > in_args ;
in_args . push_back ( array_x ) ;
in_args . push_back ( array_w_1 ) ;
in_args . push_back ( array_b_1 ) ;
in_args . push_back ( array_w_2 ) ;
in_args . push_back ( array_b_2 ) ;
in_args . push_back ( array_y ) ;
// all the grads
std : : vector < NDArray > arg_grad_store ;
arg_grad_store . push_back ( NDArray ( ) ) ; // we don't need the grad of the input
arg_grad_store . push_back ( array_w_1_g ) ;
arg_grad_store . push_back ( array_b_1_g ) ;
arg_grad_store . push_back ( array_w_2_g ) ;
arg_grad_store . push_back ( array_b_2_g ) ;
arg_grad_store . push_back (
NDArray ( ) ) ; // neither do we need the grad of the loss
// how to handle the grad
std : : vector < OpReqType > grad_req_type ;
grad_req_type . push_back ( kNullOp ) ;
grad_req_type . push_back ( kWriteTo ) ;
grad_req_type . push_back ( kWriteTo ) ;
grad_req_type . push_back ( kWriteTo ) ;
grad_req_type . push_back ( kWriteTo ) ;
grad_req_type . push_back ( kNullOp ) ;
std : : vector < NDArray > aux_states ;
2018-05-09 17:33:49 -07:00
std : : cout < < " make the Executor " < < std : : endl ;
2017-03-22 11:55:51 +08:00
Executor * exe = new Executor ( sym_out , ctx_dev , in_args , arg_grad_store ,
grad_req_type , aux_states ) ;
2018-05-09 17:33:49 -07:00
std : : cout < < " Training " < < std : : endl ;
2017-03-22 11:55:51 +08:00
mx_float learning_rate = 0.0001 ;
2018-12-07 19:57:54 -08:00
for ( int epoch_num = 0 ; epoch_num < max_epoch ; + + epoch_num ) {
2017-03-22 11:55:51 +08:00
exe - > Forward ( true ) ;
2018-12-07 19:57:54 -08:00
// print accuracy every 100 epoch
if ( epoch_num % 100 = = 0 ) {
std : : cout < < " epoch " < < epoch_num < < std : : endl ;
2017-03-22 11:55:51 +08:00
std : : vector < NDArray > & out = exe - > outputs ;
float * cptr = new float [ 128 * 10 ] ;
out [ 0 ] . SyncCopyToCPU ( cptr , 128 * 10 ) ;
NDArray : : WaitAll ( ) ;
OutputAccuracy ( cptr , aptr_y ) ;
delete [ ] cptr ;
}
// update the parameters
exe - > Backward ( ) ;
for ( int i = 1 ; i < 5 ; + + i ) {
in_args [ i ] - = arg_grad_store [ i ] * learning_rate ;
}
NDArray : : WaitAll ( ) ;
}
delete exe ;
delete [ ] aptr_x ;
delete [ ] aptr_y ;
}
int main ( int argc , char * * argv ) {
2020-02-09 02:50:49 +01:00
int max_epoch = argc > 1 ? strtol ( argv [ 1 ] , nullptr , 10 ) : 15000 ;
2019-04-02 16:23:54 -07:00
TRY
2019-02-14 09:55:09 -08:00
MLP ( max_epoch ) ;
2017-03-22 11:55:51 +08:00
MXNotifyShutdown ( ) ;
2019-04-02 16:23:54 -07:00
CATCH
2017-03-22 11:55:51 +08:00
return 0 ;
}