cv::Ptr<cv::ml::ANN_MLP> mlp = cv::ml::ANN_MLP::create();
int inputlayerSize = imagesData[0].total(); //28*28=784
int hiddenLayerSize = 100;
int outputLayerSize = 10;
cv::Mat layersSize = cv::Mat(3, 1, CV_16U);
layersSize.row(0) = cv::Scalar(trainingData.cols);
layersSize.row(1) = cv::Scalar(hiddenLayerSize);
layersSize.row(2) = cv::Scalar(labelData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM);
cv::TermCriteria termCrit(cv::TermCriteria::MAX_ITER + cv::TermCriteria::EPS, 10000, 0.001);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(cv::ml::ANN_MLP::BACKPROP,0.0001);
mlp->train(trainingData,cv::ml::ROW_SAMPLE,labelData);
after training, I check the weight matrices:
std::cout << "\nweights[0]:\n" << mlp->getWeights( 0 ).size() << std::endl;
std::cout << "\nweights[1]:\n" << mlp->getWeights( 1 ).size() << std::endl;
std::cout << "\nweights[2]:\n" << mlp->getWeights( 2 ).size() << std::endl;
std::cout << "\nweights[3]:\n" << mlp->getWeights( 3 ).size() << std::endl;
the result is following:
weights[0]: [1568 x 1]
weights[1]: [100 x 785]
weights[2]: [10 x 101]
weights[3]: [20 x 1]
I am pretty sure my inputlayerSize is 784 and outputlayerSize is 10.
I don't know why this MLP model's weights are like this. It is so weird. Has anyone known what is going on under the hood? Is this an internal implementation detail of OpenCV’s ANN_MLP (e.g., related to scaling and bias storage) and where can I find such info?