|
|
/*
|
||
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||
|
|
* or more contributor license agreements. See the NOTICE file
|
||
|
|
* distributed with this work for additional information
|
||
|
|
* regarding copyright ownership. The ASF licenses this file
|
||
|
|
* to you under the Apache License, Version 2.0 (the
|
||
|
|
* "License"); you may not use this file except in compliance
|
||
|
|
* with the License. You may obtain a copy of the License at
|
||
|
|
*
|
||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
*
|
||
|
|
* Unless required by applicable law or agreed to in writing,
|
||
|
|
* software distributed under the License is distributed on an
|
||
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
|
|
* KIND, either express or implied. See the License for the
|
||
|
|
* specific language governing permissions and limitations
|
||
|
|
* under the License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Example: mlp_csv
|
||
|
|
* Description:
|
||
|
|
* The following example demonstrates how to use CSVIter. This example creates
|
||
|
|
* mlp (multi-layer perceptron) model and trains the MNIST data which is in
|
||
|
|
* CSV format.
|
||
|
|
*/
|
||
|
|
#include <chrono>
|
||
|
|
#include <string>
|
||
|
|
#include "utils.h"
|
||
|
|
#include "mxnet-cpp/MxNetCpp.h"
|
||
|
|
|
||
|
|
using namespace mxnet::cpp;
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Implementing the mlp symbol with given hidden units configuration.
|
||
|
|
*/
|
||
|
|
Symbol mlp(const std::vector<int> &hidden_units) {
|
||
|
|
auto data = Symbol::Variable("data");
|
||
|
|
auto label = Symbol::Variable("label");
|
||
|
|
|
||
|
|
std::vector<Symbol> weights(hidden_units.size());
|
||
|
|
std::vector<Symbol> biases(hidden_units.size());
|
||
|
|
std::vector<Symbol> outputs(hidden_units.size());
|
||
|
|
|
||
|
|
for (size_t i = 0; i < hidden_units.size(); ++i) {
|
||
|
|
weights[i] = Symbol::Variable("w" + std::to_string(i));
|
||
|
|
biases[i] = Symbol::Variable("b" + std::to_string(i));
|
||
|
|
Symbol fc = FullyConnected(
|
||
|
|
i == 0? data : outputs[i-1], // data
|
||
|
|
weights[i],
|
||
|
|
biases[i],
|
||
|
|
hidden_units[i]);
|
||
|
|
outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
|
||
|
|
}
|
||
|
|
return SoftmaxOutput(outputs.back(), label);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Convert the input string of number of hidden units into the vector of integers.
|
||
|
|
*/
|
||
|
|
std::vector<int> getLayers(const std::string& hidden_units_string) {
|
||
|
|
std::vector<int> hidden_units;
|
||
|
|
char *pNext;
|
||
|
|
int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10);
|
||
|
|
hidden_units.push_back(num_unit);
|
||
|
|
while (*pNext) {
|
||
|
|
num_unit = strtol(pNext, &pNext, 10);
|
||
|
|
hidden_units.push_back(num_unit);
|
||
|
|
}
|
||
|
|
return hidden_units;
|
||
|
|
}
|
||
|
|
|
||
|
|
void printUsage() {
|
||
|
|
std::cout << "Usage:" << std::endl;
|
||
|
|
std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
|
||
|
|
<< "--batch_size 100 --hidden_units \"128 64 64\" --gpu" << std::endl;
|
||
|
|
std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
|
||
|
|
<< "the column 0 to be label and the rest 784 column to be data." << std::endl;
|
||
|
|
std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, "
|
||
|
|
<< "program uses 'gpu' context." <<std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
int main(int argc, char** argv) {
|
||
|
|
const int image_size = 28;
|
||
|
|
const int num_mnist_features = image_size * image_size;
|
||
|
|
int batch_size = 100;
|
||
|
|
int max_epoch = 10;
|
||
|
|
const float learning_rate = 0.1;
|
||
|
|
const float weight_decay = 1e-2;
|
||
|
|
bool isGpu = false;
|
||
|
|
|
||
|
|
std::string training_set;
|
||
|
|
std::string test_set;
|
||
|
|
std::string hidden_units_string;
|
||
|
|
int index = 1;
|
||
|
|
while (index < argc) {
|
||
|
|
if (strcmp("--train", argv[index]) == 0) {
|
||
|
|
index++;
|
||
|
|
training_set = argv[index];
|
||
|
|
} else if (strcmp("--test", argv[index]) == 0) {
|
||
|
|
index++;
|
||
|
|
test_set = argv[index];
|
||
|
|
} else if (strcmp("--epochs", argv[index]) == 0) {
|
||
|
|
index++;
|
||
|
|
max_epoch = strtol(argv[index], nullptr, 10);
|
||
|
|
} else if (strcmp("--batch_size", argv[index]) == 0) {
|
||
|
|
index++;
|
||
|
|
batch_size = strtol(argv[index], nullptr, 10);
|
||
|
|
} else if (strcmp("--hidden_units", argv[index]) == 0) {
|
||
|
|
index++;
|
||
|
|
hidden_units_string = argv[index];
|
||
|
|
} else if (strcmp("--gpu", argv[index]) == 0) {
|
||
|
|
isGpu = true;
|
||
|
|
index++;
|
||
|
|
} else if (strcmp("--help", argv[index]) == 0) {
|
||
|
|
printUsage();
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
index++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
|
||
|
|
std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
|
||
|
|
<< "number of hidden units for mlp are not specified." << std::endl << std::endl;
|
||
|
|
printUsage();
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::vector<int> hidden_units = getLayers(hidden_units_string);
|
||
|
|
|
||
|
|
if (hidden_units.empty()) {
|
||
|
|
std::cout << "ERROR: Number of hidden units are not provided in correct format."
|
||
|
|
<< "The numbers need to be separated by ' '." << std::endl << std::endl;
|
||
|
|
printUsage();
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* The MNIST data in CSV format has 785 columns.
|
||
|
|
* The first column is "Label" and rest of the columns contain data.
|
||
|
|
* The mnist_train.csv has 60000 records and mnist_test.csv has
|
||
|
|
* 10000 records.
|
||
|
|
*/
|
||
|
|
auto train_iter = MXDataIter("CSVIter")
|
||
|
|
.SetParam("data_csv", training_set)
|
||
|
|
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
|
||
|
|
.SetParam("batch_size", batch_size)
|
||
|
|
.SetParam("flat", 1)
|
||
|
|
.SetParam("shuffle", 0)
|
||
|
|
.CreateDataIter();
|
||
|
|
|
||
|
|
auto val_iter = MXDataIter("CSVIter")
|
||
|
|
.SetParam("data_csv", test_set)
|
||
|
|
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
|
||
|
|
.SetParam("batch_size", batch_size)
|
||
|
|
.SetParam("flat", 1)
|
||
|
|
.SetParam("shuffle", 0)
|
||
|
|
.CreateDataIter();
|
||
|
|
|
||
|
|
TRY
|
||
|
|
auto net = mlp(hidden_units);
|
||
|
|
|
||
|
|
Context ctx = Context::cpu();
|
||
|
|
if (isGpu) {
|
||
|
|
ctx = Context::gpu();
|
||
|
|
}
|
||
|
|
|
||
|
|
std::map<std::string, NDArray> args;
|
||
|
|
args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
|
||
|
|
args["label"] = NDArray(Shape(batch_size), ctx);
|
||
|
|
// Let MXNet infer shapes other parameters such as weights
|
||
|
|
net.InferArgsMap(ctx, &args, args);
|
||
|
|
|
||
|
|
// Initialize all parameters with uniform distribution U(-0.01, 0.01)
|
||
|
|
auto initializer = Uniform(0.01);
|
||
|
|
for (auto& arg : args) {
|
||
|
|
// arg.first is parameter name, and arg.second is the value
|
||
|
|
initializer(arg.first, &arg.second);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Create sgd optimiz er
|
||
|
|
Optimizer* opt = OptimizerRegistry::Find("sgd");
|
||
|
|
opt->SetParam("rescale_grad", 1.0/batch_size)
|
||
|
|
->SetParam("lr", learning_rate)
|
||
|
|
->SetParam("wd", weight_decay);
|
||
|
|
|
||
|
|
// Create executor by binding parameters to the model
|
||
|
|
auto *exec = net.SimpleBind(ctx, args);
|
||
|
|
auto arg_names = net.ListArguments();
|
||
|
|
|
||
|
|
// Start training
|
||
|
|
for (int iter = 0; iter < max_epoch; ++iter) {
|
||
|
|
int samples = 0;
|
||
|
|
train_iter.Reset();
|
||
|
|
|
||
|
|
auto tic = std::chrono::system_clock::now();
|
||
|
|
while (train_iter.Next()) {
|
||
|
|
samples += batch_size;
|
||
|
|
auto data_batch = train_iter.GetDataBatch();
|
||
|
|
|
||
|
|
/*
|
||
|
|
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
|
||
|
|
* Need to reshape this data so that label column can be extracted from this data.
|
||
|
|
*/
|
||
|
|
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
|
||
|
|
batch_size));
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Extract the label data by slicing the first column of the data and
|
||
|
|
* copy it to "label" arg.
|
||
|
|
*/
|
||
|
|
reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Extract the feature data by slicing the columns 1 to 785 of the data and
|
||
|
|
* copy it to "data" arg.
|
||
|
|
*/
|
||
|
|
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
|
||
|
|
num_mnist_features))
|
||
|
|
.CopyTo(&args["data"]);
|
||
|
|
|
||
|
|
exec->Forward(true);
|
||
|
|
|
||
|
|
// Compute gradients
|
||
|
|
exec->Backward();
|
||
|
|
// Update parameters
|
||
|
|
for (size_t i = 0; i < arg_names.size(); ++i) {
|
||
|
|
if (arg_names[i] == "data" || arg_names[i] == "label") continue;
|
||
|
|
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
auto toc = std::chrono::system_clock::now();
|
||
|
|
|
||
|
|
Accuracy acc;
|
||
|
|
val_iter.Reset();
|
||
|
|
while (val_iter.Next()) {
|
||
|
|
auto data_batch = val_iter.GetDataBatch();
|
||
|
|
|
||
|
|
/*
|
||
|
|
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
|
||
|
|
* Need to reshape this data so that label column can be extracted from this data.
|
||
|
|
*/
|
||
|
|
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
|
||
|
|
batch_size));
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Extract the label data by slicing the first column of the data and
|
||
|
|
* copy it to "label" arg.
|
||
|
|
*/
|
||
|
|
NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));
|
||
|
|
labelData.CopyTo(&args["label"]);
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Extract the feature data by slicing the columns 1 to 785 of the data and
|
||
|
|
* copy it to "data" arg.
|
||
|
|
*/
|
||
|
|
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
|
||
|
|
num_mnist_features))
|
||
|
|
.CopyTo(&args["data"]);
|
||
|
|
|
||
|
|
// Forward pass is enough as no gradient is needed when evaluating
|
||
|
|
exec->Forward(false);
|
||
|
|
acc.Update(labelData, exec->outputs[0]);
|
||
|
|
}
|
||
|
|
float duration = std::chrono::duration_cast<std::chrono::milliseconds>
|
||
|
|
(toc - tic).count() / 1000.0;
|
||
|
|
LG << "Epoch[" << iter << "] " << samples/duration << " samples/sec Accuracy: "
|
||
|
|
<< acc.Get();
|
||
|
|
}
|
||
|
|
|
||
|
|
delete exec;
|
||
|
|
delete opt;
|
||
|
|
MXNotifyShutdown();
|
||
|
|
CATCH
|
||
|
|
return 0;
|
||
|
|
}
|