meta-digi/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/image-classification/files/tflite/wrapper_tfl.hpp

386 lines
11 KiB
C++

/*
* wrapper_tfl.hpp
*
* Author: Vincent Abriou <vincent.abriou@st.com> for STMicroelectronics.
*
* Copyright (c) 2020 STMicroelectronics. All rights reserved.
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
*
* http://www.opensource.org/licenses/BSD-3-Clause
*
*
*
* Inspired by:
* https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/examples/label_image
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
#ifndef WRAPPER_TFL_HPP_
#define WRAPPER_TFL_HPP_
#include <algorithm>
#include <functional>
#include <fstream>
#include <queue>
#include <memory>
#include <string>
#include <sys/time.h>
#include <vector>
#include <fstream>
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/optional_debug_tools.h"
#ifdef EDGETPU
#include "tflite/public/edgetpu.h"
#endif
#include "tensorflow/lite/delegates/external/external_delegate.h"
#include "tensorflow/lite/interpreter.h"
#ifdef VSI_OP
#include "VX/vsi_npu_custom_op.h"
#endif
#define LOG(x) std::cerr
namespace wrapper_tfl {
double get_ms(struct timeval t) { return (t.tv_sec * 1000 + t.tv_usec / 1000); }
struct Config {
bool verbose;
float input_mean = 127.5f;
float input_std = 127.5f;
int number_of_threads = 2;
int number_of_results = 5;
std::string model_name;
std::string labels_file_name;
bool edgetpu;
bool accel;
std::string external_delegate_path;
};
struct Label_Results {
float accuracy[10];
int index[10];
float inference_time;
};
class Tfl_Wrapper {
private:
// Taking a reference to the (const) model data avoids lifetime-related issues
// and complexity with the TFL_Model's existence.
#ifdef EDGETPU
std::shared_ptr<edgetpu::EdgeTpuContext> m_edgetpu_ctx;
#endif
std::unique_ptr<tflite::FlatBufferModel> m_model;
std::unique_ptr<tflite::Interpreter> m_interpreter;
bool m_verbose;
bool m_inputFloating;
bool m_allow_fp16;
float m_inputMean;
float m_inputStd;
float m_inferenceTime;
int m_numberOfThreads;
int m_numberOfResults;
bool m_edgetpu;
bool m_accel;
bool m_npu;
const char * m_external_delegate_path;
std::string m_vxdelegate;
public:
Tfl_Wrapper() {}
void Initialize(Config* conf)
{
m_inputFloating = false;
m_allow_fp16 = false;
m_inferenceTime = 0;
m_verbose = conf->verbose;
m_inputMean = conf->input_mean;
m_inputStd = conf->input_std;
m_numberOfThreads = conf->number_of_threads;
m_numberOfResults = conf->number_of_results;
m_edgetpu = conf->edgetpu;
if (m_edgetpu) {
/* Check if the Edge TPU is connected */
int status = system("lsusb -d 1a6e:");
status &= system("lsusb -d 18d1:");
if (status) {
std::cout << "ERROR: Edge TPU not connected.\n";
exit(-1);
}
/* Load EDGEPTU */
#ifdef EDGETPU
m_edgetpu_ctx = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
#endif
}
m_inputFloating = false;
m_allow_fp16 = false;
m_inferenceTime = 0;
m_verbose = conf->verbose;
m_inputMean = conf->input_mean;
m_inputStd = conf->input_std;
m_numberOfThreads = conf->number_of_threads;
m_numberOfResults = conf->number_of_results;
m_accel = conf->accel;
m_external_delegate_path = conf->external_delegate_path.c_str();
m_vxdelegate = "libvx_delegate";
m_npu = false;
/* Check which delegate is used */
std::size_t found = conf->external_delegate_path.find(m_vxdelegate);
if (found!=std::string::npos) {
/* vx_delegate found */
m_npu = true;
}
if (!conf->model_name.c_str()) {
LOG(ERROR) << "no model file name\n";
exit(-1);
}
std::unique_ptr<tflite::FlatBufferModel> model;
std::unique_ptr<tflite::Interpreter> interpreter;
model = tflite::FlatBufferModel::BuildFromFile(conf->model_name.c_str());
if (!model) {
LOG(FATAL) << "\nFailed to mmap model " << conf->model_name << "\n";
exit(-1);
}
LOG(INFO) << "Loaded model " << conf->model_name << "\n";
model->error_reporter();
tflite::ops::builtin::BuiltinOpResolver resolver;
if(m_edgetpu){
#ifdef EDGETPU
resolver.AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
#endif
}
#ifdef VSI_OP
if(m_accel && m_npu) {
resolver.AddCustom(kNbgCustomOp, tflite::ops::custom::Register_VSI_NPU_PRECOMPILED());
}
#endif
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if (!interpreter) {
LOG(FATAL) << "Failed to construct interpreter\n";
exit(-1);
}
#ifdef VSI_OP
if(m_accel) {
const char * delegate_path = m_external_delegate_path;
auto ext_delegate_option = TfLiteExternalDelegateOptionsDefault(delegate_path);
ext_delegate_option.insert(&ext_delegate_option, "cache_file_path", "/usr/local/demo-ai/image-classification/models/mobilenet/mobilenet_v3_large_100_224_quant.nb");
ext_delegate_option.insert(&ext_delegate_option, "allowed_cache_mode", "true");
auto ext_delegate_ptr = TfLiteExternalDelegateCreate(&ext_delegate_option);
interpreter->ModifyGraphWithDelegate(ext_delegate_ptr);
}
#endif
int input = interpreter->inputs()[0];
if (interpreter->tensor(input)->type == kTfLiteFloat32) {
m_inputFloating = true;
LOG(INFO) << "Floating point Tensorflow Lite Model\n";
}
if(m_edgetpu){
#ifdef EDGETPU
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, m_edgetpu_ctx.get());
#endif
} else {
interpreter->SetAllowFp16PrecisionForFp32(m_allow_fp16);
}
if (m_numberOfThreads != -1) {
interpreter->SetNumThreads(m_numberOfThreads);
}
m_interpreter = std::move(interpreter);
m_model = std::move(model);
}
void DisplaySettings()
{
LOG(INFO) << "input_floating " << m_inputFloating << "\n";
LOG(INFO) << "allow_fp16 " << m_allow_fp16 << "\n";
LOG(INFO) << "input_mean " << m_inputMean << "\n";
LOG(INFO) << "input_std " << m_inputStd << "\n";
LOG(INFO) << "number_of_threads " << m_numberOfThreads << "\n";
LOG(INFO) << "number_of_results " << m_numberOfResults << "\n";
LOG(INFO) << "edgetpu " << m_edgetpu << "\n";
}
void DisplayModelInformation()
{
LOG(INFO) << "tensors size: " << m_interpreter->tensors_size() << "\n";
LOG(INFO) << "nodes size: " << m_interpreter->nodes_size() << "\n";
LOG(INFO) << "inputs: " << m_interpreter->inputs().size() << "\n";
LOG(INFO) << "input(0) name: " << m_interpreter->GetInputName(0) << "\n";
int t_size = m_interpreter->tensors_size();
for (int i = 0; i < t_size; i++) {
if (m_interpreter->tensor(i)->name)
LOG(INFO) << i << ": " << m_interpreter->tensor(i)->name << ", "
<< m_interpreter->tensor(i)->bytes << ", "
<< m_interpreter->tensor(i)->type << ", "
<< m_interpreter->tensor(i)->params.scale << ", "
<< m_interpreter->tensor(i)->params.zero_point << "\n";
}
}
bool IsModelQuantized()
{
return !m_inputFloating;
}
int GetInputWidth()
{
int input = m_interpreter->inputs()[0];
TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
return input_dims->data[2];
}
int GetInputHeight()
{
int input = m_interpreter->inputs()[0];
TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
return input_dims->data[1];
}
int GetInputChannels()
{
int input = m_interpreter->inputs()[0];
TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
return input_dims->data[3];
}
unsigned int GetNumberOfInputs()
{
const std::vector<int> inputs = m_interpreter->inputs();
return inputs.size();
}
unsigned int GetNumberOfOutputs()
{
const std::vector<int> outputs = m_interpreter->outputs();
return outputs.size();
}
unsigned int GetOutputSize(int index)
{
int output = m_interpreter->outputs()[index];
TfLiteIntArray* output_dims = m_interpreter->tensor(output)->dims;
// assume output dims to be something like (1, 1, ... ,size)
return output_dims->data[output_dims->size - 1];
}
void RunInference(uint8_t* img, Label_Results* results)
{
if (m_inputFloating)
RunInference<float>(img, results);
else
RunInference<uint8_t>(img, results);
}
template <class T>
void RunInference(uint8_t* img, Label_Results* results)
{
int input_height = GetInputHeight();
int input_width = GetInputWidth();
int input_channels = GetInputChannels();
auto sizeInBytes = input_height * input_width * input_channels;
int input = m_interpreter->inputs()[0];
if (m_verbose) {
LOG(INFO) << "input: " << input << "\n";
LOG(INFO) << "number of inputs: " << GetNumberOfInputs() << "\n";
LOG(INFO) << "number of outputs: " << GetNumberOfOutputs() << "\n";
}
if (m_interpreter->AllocateTensors() != kTfLiteOk) {
LOG(FATAL) << "Failed to allocate tensors!";
}
if (m_verbose)
tflite::PrintInterpreterState(m_interpreter.get());
auto in = m_interpreter->typed_tensor<T>(input);
if (m_inputFloating) {
for (int i = 0; i < sizeInBytes; i++)
in[i] = (img[i] - m_inputMean) / m_inputStd;
} else {
for (int i = 0; i < sizeInBytes; i++)
in[i] = img[i];
}
struct timeval start_time, stop_time;
gettimeofday(&start_time, nullptr);
if (m_interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke tflite!\n";
}
gettimeofday(&stop_time, nullptr);
m_inferenceTime = (get_ms(stop_time) - get_ms(start_time));
/* Get results */
T* output = m_interpreter->typed_output_tensor<T>(0);
auto output_size = GetOutputSize(0);
for (int i = 0; i < m_numberOfResults; i++) {
results->index[i] = std::distance(&output[0], std::max_element(&output[0], &output[output_size]));
if (m_inputFloating)
results->accuracy[i] = output[results->index[i]];
else
results->accuracy[i] = output[results->index[i]] / 255.0;
output[results->index[i]] = 0;
}
results->inference_time = m_inferenceTime;
}
// Takes a file name, and loads a list of labels from it, one per line, and
// returns a vector of the strings. It pads with empty strings so the length
// of the result is a multiple of 16, because our model expects that.
TfLiteStatus ReadLabelsFile(const std::string& file_name,
std::vector<std::string>* result,
size_t* found_label_count)
{
std::ifstream file(file_name);
if (!file) {
LOG(FATAL) << "Labels file " << file_name << " not found\n";
return kTfLiteError;
}
result->clear();
std::string line;
while (std::getline(file, line)) {
result->push_back(line);
}
*found_label_count = result->size();
const int padding = 16;
while (result->size() % padding) {
result->emplace_back();
}
return kTfLiteOk;
}
};
} // namespace wrapper_tfl
#endif // WRAPPER_TFL_HPP_