Blame: cpp-package/example/alexnet.cpp - apache/mxnet

apache / mxnet UNCLAIMED

Lightweight, Portable, Flexible Distributed/Mobile Deep Learning with Dynamic, Mutation-aware Dataflow Dep Scheduler; for Python, R, Julia, Scala, Go, Javascript and more

0 0 0 C++

Normal View History Raw

Add more license files (#7429) * Add more licenses * Revert "Add more licenses" This reverts commit 8395a84d21a0cebaf909ec277b7b5d6feffa1412. * Add license files 2017-08-11 14:12:47 -07:00			`/*`
			`* Licensed to the Apache Software Foundation (ASF) under one`
			`* or more contributor license agreements. See the NOTICE file`
			`* distributed with this work for additional information`
			`* regarding copyright ownership. The ASF licenses this file`
			`* to you under the Apache License, Version 2.0 (the`
			`* "License"); you may not use this file except in compliance`
			`* with the License. You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing,`
			`* software distributed under the License is distributed on an`
			`* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY`
			`* KIND, either express or implied. See the License for the`
			`* specific language governing permissions and limitations`
			`* under the License.`
			`*/`

Integrate cpp package (#5251) * Copy mxnet.cpp to cpp-package sub directory * Add missing type name declaration for nnvm::Tuple<dmlc::optional<int>> * Add trigger to update operation wrapper in cpp package when building mxnet * Add test to travis scripts * Disable op generation on windows 2017-03-22 11:55:51 +08:00			`/*!`
			`*/`
			`#include <iostream>`
			`#include <map>`
			`#include <string>`
			`#include "mxnet-cpp/MxNetCpp.h"`
* [cpp-package] fix for issue #7725 (#8551) * remove some unused include (MxNetCpp.h already included op.h) * use same mnist data path avoiding duplication 2017-11-07 02:13:07 +08:00
Integrate cpp package (#5251) * Copy mxnet.cpp to cpp-package sub directory * Add missing type name declaration for nnvm::Tuple<dmlc::optional<int>> * Add trigger to update operation wrapper in cpp package when building mxnet * Add test to travis scripts * Disable op generation on windows 2017-03-22 11:55:51 +08:00
			`using namespace std;`
			`using namespace mxnet::cpp;`

			`Symbol AlexnetSymbol(int num_classes) {`
			`auto input_data = Symbol::Variable("data");`
			`auto target_label = Symbol::Variable("label");`
			`/stage 1/`
			`auto conv1 = Operator("Convolution")`
			`.SetParam("kernel", Shape(11, 11))`
			`.SetParam("num_filter", 96)`
			`.SetParam("stride", Shape(4, 4))`
			`.SetParam("dilate", Shape(1, 1))`
			`.SetParam("pad", Shape(0, 0))`
			`.SetParam("num_group", 1)`
			`.SetParam("workspace", 512)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", input_data)`
			`.CreateSymbol("conv1");`
			`auto relu1 = Operator("Activation")`
			`.SetParam("act_type", "relu") /relu,sigmoid,softrelu,tanh /`
			`.SetInput("data", conv1)`
			`.CreateSymbol("relu1");`
			`auto pool1 = Operator("Pooling")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("pool_type", "max") /avg,max,sum /`
			`.SetParam("global_pool", false)`
			`.SetParam("stride", Shape(2, 2))`
			`.SetParam("pad", Shape(0, 0))`
			`.SetInput("data", relu1)`
			`.CreateSymbol("pool1");`
			`auto lrn1 = Operator("LRN")`
			`.SetParam("nsize", 5)`
			`.SetParam("alpha", 0.0001)`
			`.SetParam("beta", 0.75)`
			`.SetParam("knorm", 1)`
			`.SetInput("data", pool1)`
			`.CreateSymbol("lrn1");`
			`/stage 2/`
			`auto conv2 = Operator("Convolution")`
			`.SetParam("kernel", Shape(5, 5))`
			`.SetParam("num_filter", 256)`
			`.SetParam("stride", Shape(1, 1))`
			`.SetParam("dilate", Shape(1, 1))`
			`.SetParam("pad", Shape(2, 2))`
			`.SetParam("num_group", 1)`
			`.SetParam("workspace", 512)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", lrn1)`
			`.CreateSymbol("conv2");`
			`auto relu2 = Operator("Activation")`
			`.SetParam("act_type", "relu") /relu,sigmoid,softrelu,tanh /`
			`.SetInput("data", conv2)`
			`.CreateSymbol("relu2");`
			`auto pool2 = Operator("Pooling")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("pool_type", "max") /avg,max,sum /`
			`.SetParam("global_pool", false)`
			`.SetParam("stride", Shape(2, 2))`
			`.SetParam("pad", Shape(0, 0))`
			`.SetInput("data", relu2)`
			`.CreateSymbol("pool2");`
			`auto lrn2 = Operator("LRN")`
			`.SetParam("nsize", 5)`
			`.SetParam("alpha", 0.0001)`
			`.SetParam("beta", 0.75)`
			`.SetParam("knorm", 1)`
			`.SetInput("data", pool2)`
			`.CreateSymbol("lrn2");`
			`/stage 3/`
			`auto conv3 = Operator("Convolution")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("num_filter", 384)`
			`.SetParam("stride", Shape(1, 1))`
			`.SetParam("dilate", Shape(1, 1))`
			`.SetParam("pad", Shape(1, 1))`
			`.SetParam("num_group", 1)`
			`.SetParam("workspace", 512)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", lrn2)`
			`.CreateSymbol("conv3");`
			`auto relu3 = Operator("Activation")`
			`.SetParam("act_type", "relu") /relu,sigmoid,softrelu,tanh /`
			`.SetInput("data", conv3)`
			`.CreateSymbol("relu3");`
			`auto conv4 = Operator("Convolution")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("num_filter", 384)`
			`.SetParam("stride", Shape(1, 1))`
			`.SetParam("dilate", Shape(1, 1))`
			`.SetParam("pad", Shape(1, 1))`
			`.SetParam("num_group", 1)`
			`.SetParam("workspace", 512)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", relu3)`
			`.CreateSymbol("conv4");`
			`auto relu4 = Operator("Activation")`
			`.SetParam("act_type", "relu") /relu,sigmoid,softrelu,tanh /`
			`.SetInput("data", conv4)`
			`.CreateSymbol("relu4");`
			`auto conv5 = Operator("Convolution")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("num_filter", 256)`
			`.SetParam("stride", Shape(1, 1))`
			`.SetParam("dilate", Shape(1, 1))`
			`.SetParam("pad", Shape(1, 1))`
			`.SetParam("num_group", 1)`
			`.SetParam("workspace", 512)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", relu4)`
			`.CreateSymbol("conv5");`
			`auto relu5 = Operator("Activation")`
			`.SetParam("act_type", "relu")`
			`.SetInput("data", conv5)`
			`.CreateSymbol("relu5");`
			`auto pool3 = Operator("Pooling")`
			`.SetParam("kernel", Shape(3, 3))`
			`.SetParam("pool_type", "max")`
			`.SetParam("global_pool", false)`
			`.SetParam("stride", Shape(2, 2))`
			`.SetParam("pad", Shape(0, 0))`
			`.SetInput("data", relu5)`
			`.CreateSymbol("pool3");`
			`/stage4/`
			`auto flatten =`
			`Operator("Flatten").SetInput("data", pool3).CreateSymbol("flatten");`
			`auto fc1 = Operator("FullyConnected")`
			`.SetParam("num_hidden", 4096)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", flatten)`
			`.CreateSymbol("fc1");`
			`auto relu6 = Operator("Activation")`
			`.SetParam("act_type", "relu")`
			`.SetInput("data", fc1)`
			`.CreateSymbol("relu6");`
			`auto dropout1 = Operator("Dropout")`
			`.SetParam("p", 0.5)`
			`.SetInput("data", relu6)`
			`.CreateSymbol("dropout1");`
			`/stage5/`
			`auto fc2 = Operator("FullyConnected")`
			`.SetParam("num_hidden", 4096)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", dropout1)`
			`.CreateSymbol("fc2");`
			`auto relu7 = Operator("Activation")`
			`.SetParam("act_type", "relu")`
			`.SetInput("data", fc2)`
			`.CreateSymbol("relu7");`
			`auto dropout2 = Operator("Dropout")`
			`.SetParam("p", 0.5)`
			`.SetInput("data", relu7)`
			`.CreateSymbol("dropout2");`
			`/stage6/`
			`auto fc3 = Operator("FullyConnected")`
			`.SetParam("num_hidden", num_classes)`
			`.SetParam("no_bias", false)`
			`.SetInput("data", dropout2)`
			`.CreateSymbol("fc3");`
			`auto softmax = Operator("SoftmaxOutput")`
			`.SetParam("grad_scale", 1)`
			`.SetParam("ignore_label", -1)`
			`.SetParam("multi_output", false)`
			`.SetParam("use_ignore", false)`
			`.SetParam("normalization", "null") /batch,null,valid /`
			`.SetInput("data", fc3)`
			`.SetInput("label", target_label)`
			`.CreateSymbol("softmax");`
			`return softmax;`
			`}`

			`int main(int argc, char const *argv[]) {`
			`/basic config/`
			`int batch_size = 256;`
			`int max_epo = 100;`
			`float learning_rate = 1e-4;`
			`float weight_decay = 1e-4;`

			`/context and net symbol/`
			`auto ctx = Context::gpu();`
			`auto Net = AlexnetSymbol(10);`

			`/args_map and aux_map is used for parameters' saving/`
			`map<string, NDArray> args_map;`
			`map<string, NDArray> aux_map;`

			`/we should tell mxnet the shape of data and label/`
			`args_map["data"] = NDArray(Shape(batch_size, 3, 256, 256), ctx);`
			`args_map["label"] = NDArray(Shape(batch_size), ctx);`

			`/with data and label, executor can be generated automatically/`
			`auto *exec = Net.SimpleBind(ctx, args_map);`
[cpp-package] add lr scheduler (#6885) * add lr scheduler * Update lr_scheduler.h * Update mlp_gpu.cpp * Update test_score.cpp * update optimizer.hpp 2017-08-04 04:18:07 +08:00			`auto arg_names = Net.ListArguments();`
Integrate cpp package (#5251) * Copy mxnet.cpp to cpp-package sub directory * Add missing type name declaration for nnvm::Tuple<dmlc::optional<int>> * Add trigger to update operation wrapper in cpp package when building mxnet * Add test to travis scripts * Disable op generation on windows 2017-03-22 11:55:51 +08:00			`aux_map = exec->aux_dict();`
			`args_map = exec->arg_dict();`

			`/if fine tune from some pre-trained model, we should load the parameters/`
			`// NDArray::Load("./model/alex_params_3", nullptr, &args_map);`
			`/else, we should use initializer Xavier to init the params/`
			`Xavier xavier = Xavier(Xavier::gaussian, Xavier::in, 2.34);`
			`for (auto &arg : args_map) {`
			`/*be careful here, the arg's name must has some specific ends or starts for`
			`* initializer to call*/`
			`xavier(arg.first, &arg.second);`
			`}`
			`/print out to check the shape of the net/`
			`for (const auto &s : Net.ListArguments()) {`
			`LG << s;`
			`const auto &k = args_map[s].GetShape();`
			`for (const auto &i : k) {`
			`cout << i << " ";`
			`}`
			`cout << endl;`
			`}`

			`/*these binary files should be generated using im2rc tools, which can be found`
			`* in mxnet/bin*/`
			`auto train_iter = MXDataIter("ImageRecordIter")`
			`.SetParam("path_imglist", "./data/train_rec.lst")`
			`.SetParam("path_imgrec", "./data/train_rec.bin")`
			`.SetParam("data_shape", Shape(3, 256, 256))`
			`.SetParam("batch_size", batch_size)`
			`.SetParam("shuffle", 1)`
			`.CreateDataIter();`
			`auto val_iter = MXDataIter("ImageRecordIter")`
			`.SetParam("path_imglist", "./data/val_rec.lst")`
			`.SetParam("path_imgrec", "./data/val_rec.bin")`
			`.SetParam("data_shape", Shape(3, 256, 256))`
			`.SetParam("batch_size", batch_size)`
			`.CreateDataIter();`

			`Optimizer* opt = OptimizerRegistry::Find("ccsgd");`
			`opt->SetParam("momentum", 0.9)`
			`->SetParam("rescale_grad", 1.0 / batch_size)`
[cpp-package] add lr scheduler (#6885) * add lr scheduler * Update lr_scheduler.h * Update mlp_gpu.cpp * Update test_score.cpp * update optimizer.hpp 2017-08-04 04:18:07 +08:00			`->SetParam("clip_gradient", 10)`
			`->SetParam("lr", learning_rate)`
			`->SetParam("wd", weight_decay);`
Integrate cpp package (#5251) * Copy mxnet.cpp to cpp-package sub directory * Add missing type name declaration for nnvm::Tuple<dmlc::optional<int>> * Add trigger to update operation wrapper in cpp package when building mxnet * Add test to travis scripts * Disable op generation on windows 2017-03-22 11:55:51 +08:00
			`Accuracy acu_train, acu_val;`
			`LogLoss logloss_val;`
			`for (int iter = 0; iter < max_epo; ++iter) {`
			`LG << "Train Epoch: " << iter;`
			`/reset the metric every epoch/`
			`acu_train.Reset();`
			`/reset the data iter every epoch/`
			`train_iter.Reset();`
			`while (train_iter.Next()) {`
			`auto batch = train_iter.GetDataBatch();`
			`LG << train_iter.GetDataBatch().index.size();`
			`/use copyto to feed new data and label to the executor/`
			`batch.data.CopyTo(&args_map["data"]);`
			`batch.label.CopyTo(&args_map["label"]);`
			`exec->Forward(true);`
			`exec->Backward();`
[cpp-package] add lr scheduler (#6885) * add lr scheduler * Update lr_scheduler.h * Update mlp_gpu.cpp * Update test_score.cpp * update optimizer.hpp 2017-08-04 04:18:07 +08:00			`for (size_t i = 0; i < arg_names.size(); ++i) {`
			`if (arg_names[i] == "data" \|\| arg_names[i] == "label") continue;`
			`opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);`
			`}`

Integrate cpp package (#5251) * Copy mxnet.cpp to cpp-package sub directory * Add missing type name declaration for nnvm::Tuple<dmlc::optional<int>> * Add trigger to update operation wrapper in cpp package when building mxnet * Add test to travis scripts * Disable op generation on windows 2017-03-22 11:55:51 +08:00			`NDArray::WaitAll();`
			`acu_train.Update(batch.label, exec->outputs[0]);`
			`}`
			`LG << "ITER: " << iter << " Train Accuracy: " << acu_train.Get();`

			`LG << "Val Epoch: " << iter;`
			`acu_val.Reset();`
			`val_iter.Reset();`
			`logloss_val.Reset();`
			`while (val_iter.Next()) {`
			`auto batch = val_iter.GetDataBatch();`
			`LG << val_iter.GetDataBatch().index.size();`
			`batch.data.CopyTo(&args_map["data"]);`
			`batch.label.CopyTo(&args_map["label"]);`
			`exec->Forward(false);`
			`NDArray::WaitAll();`
			`acu_val.Update(batch.label, exec->outputs[0]);`
			`logloss_val.Update(batch.label, exec->outputs[0]);`
			`}`
			`LG << "ITER: " << iter << " Val Accuracy: " << acu_val.Get();`
			`LG << "ITER: " << iter << " Val LogLoss: " << logloss_val.Get();`

			`/save the parameters/`
			`stringstream ss;`
			`ss << iter;`
			`string iter_str;`
			`ss >> iter_str;`
			`string save_path_param = "./model/alex_param_" + iter_str;`
			`auto save_args = args_map;`
			`/we do not want to save the data and label/`
			`save_args.erase(save_args.find("data"));`
			`save_args.erase(save_args.find("label"));`
			`/*the alexnet does not get any aux array, so we do not need to save`
			`* aux_map*/`
			`LG << "ITER: " << iter << " Saving to..." << save_path_param;`
			`NDArray::Save(save_path_param, save_args);`
			`}`
			`/don't foget to release the executor/`
			`delete exec;`
			`MXNotifyShutdown();`
			`return 0;`
			`}`