meta-digi/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/image-classification/files/tflite/wrapper_tfl.hpp

/*
 * wrapper_tfl.hpp
 *
 * Author: Vincent Abriou <vincent.abriou@st.com> for STMicroelectronics.
 *
 * Copyright (c) 2020 STMicroelectronics. All rights reserved.
 *
 * This software component is licensed by ST under BSD 3-Clause license,
 * the "License"; You may not use this file except in compliance with the
 * License. You may obtain a copy of the License at:
 *
 *     http://www.opensource.org/licenses/BSD-3-Clause
 *
 *
 *
 * Inspired by:
 * https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/examples/label_image
 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 */

#ifndef WRAPPER_TFL_HPP_
#define WRAPPER_TFL_HPP_

#include <algorithm>
#include <functional>
#include <fstream>
#include <queue>
#include <memory>
#include <string>
#include <sys/time.h>
#include <vector>
#include <fstream>

#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/optional_debug_tools.h"

#ifdef EDGETPU
#include "tflite/public/edgetpu.h"
#endif

#include "tensorflow/lite/delegates/external/external_delegate.h"
#include "tensorflow/lite/interpreter.h"

#ifdef VSI_OP
#include "VX/vsi_npu_custom_op.h"
#endif

#define LOG(x) std::cerr

namespace wrapper_tfl {

	double get_ms(struct timeval t) { return (t.tv_sec * 1000 + t.tv_usec / 1000); }

	struct Config {
		bool verbose;
		float input_mean = 127.5f;
		float input_std = 127.5f;
		int number_of_threads = 2;
		int number_of_results = 5;
		std::string model_name;
		std::string labels_file_name;
		bool edgetpu;
		bool accel;
		std::string external_delegate_path;
	};

	struct Label_Results {
		float accuracy[10];
		int index[10];
		float inference_time;
	};

	class Tfl_Wrapper {
	private:
		// Taking a reference to the (const) model data avoids lifetime-related issues
		// and complexity with the TFL_Model's existence.
#ifdef EDGETPU
		std::shared_ptr<edgetpu::EdgeTpuContext> m_edgetpu_ctx;
#endif

		std::unique_ptr<tflite::FlatBufferModel> m_model;
		std::unique_ptr<tflite::Interpreter>     m_interpreter;
		bool                                     m_verbose;
		bool                                     m_inputFloating;
		bool                                     m_allow_fp16;
		float                                    m_inputMean;
		float                                    m_inputStd;
		float                                    m_inferenceTime;
		int                                      m_numberOfThreads;
		int                                      m_numberOfResults;
		bool                                     m_edgetpu;
		bool                                     m_accel;
		bool                                     m_npu;
		const char *                             m_external_delegate_path;
		std::string                              m_vxdelegate;

	public:
		Tfl_Wrapper() {}

		void Initialize(Config* conf)
		{
			m_inputFloating		= false;
			m_allow_fp16		= false;
			m_inferenceTime		= 0;
			m_verbose		= conf->verbose;
			m_inputMean		= conf->input_mean;
			m_inputStd		= conf->input_std;
			m_numberOfThreads	= conf->number_of_threads;
			m_numberOfResults	= conf->number_of_results;
			m_edgetpu		= conf->edgetpu;


			if (m_edgetpu) {
				/*  Check if the Edge TPU is connected */
				int status = system("lsusb -d 1a6e:");
				status &= system("lsusb -d 18d1:");
				if (status) {
					std::cout << "ERROR: Edge TPU not connected.\n";
					exit(-1);
				}
				/* Load EDGEPTU */
#ifdef EDGETPU
				m_edgetpu_ctx = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
#endif
			}
			m_inputFloating = false;
			m_allow_fp16 = false;
			m_inferenceTime = 0;
			m_verbose = conf->verbose;
			m_inputMean = conf->input_mean;
			m_inputStd = conf->input_std;
			m_numberOfThreads = conf->number_of_threads;
			m_numberOfResults = conf->number_of_results;
			m_accel = conf->accel;
			m_external_delegate_path = conf->external_delegate_path.c_str();
			m_vxdelegate = "libvx_delegate";
			m_npu = false;

			/*  Check which delegate is used  */
			std::size_t found = conf->external_delegate_path.find(m_vxdelegate);
			if (found!=std::string::npos) {
				/* vx_delegate found */
				m_npu = true;
			}

			if (!conf->model_name.c_str()) {
				LOG(ERROR) << "no model file name\n";
				exit(-1);
			}

			std::unique_ptr<tflite::FlatBufferModel> model;
			std::unique_ptr<tflite::Interpreter> interpreter;
			model = tflite::FlatBufferModel::BuildFromFile(conf->model_name.c_str());
			if (!model) {
				LOG(FATAL) << "\nFailed to mmap model " << conf->model_name << "\n";
				exit(-1);
			}
			LOG(INFO) << "Loaded model " << conf->model_name << "\n";
			model->error_reporter();

			tflite::ops::builtin::BuiltinOpResolver resolver;

			if(m_edgetpu){
#ifdef EDGETPU
				resolver.AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
#endif
			}

#ifdef VSI_OP
			if(m_accel && m_npu) {
				resolver.AddCustom(kNbgCustomOp, tflite::ops::custom::Register_VSI_NPU_PRECOMPILED());
			}
#endif

			tflite::InterpreterBuilder(*model, resolver)(&interpreter);
			if (!interpreter) {
				LOG(FATAL) << "Failed to construct interpreter\n";
				exit(-1);
			}

#ifdef VSI_OP
			if(m_accel) {
				const char * delegate_path = m_external_delegate_path;
				auto ext_delegate_option = TfLiteExternalDelegateOptionsDefault(delegate_path);
				ext_delegate_option.insert(&ext_delegate_option, "cache_file_path", "/usr/local/demo-ai/image-classification/models/mobilenet/mobilenet_v3_large_100_224_quant.nb");
				ext_delegate_option.insert(&ext_delegate_option, "allowed_cache_mode", "true");
				auto ext_delegate_ptr = TfLiteExternalDelegateCreate(&ext_delegate_option);
				interpreter->ModifyGraphWithDelegate(ext_delegate_ptr);
			}
#endif

			int input = interpreter->inputs()[0];
			if (interpreter->tensor(input)->type == kTfLiteFloat32) {
				m_inputFloating = true;
				LOG(INFO) << "Floating point Tensorflow Lite Model\n";
			}

			if(m_edgetpu){
#ifdef EDGETPU
				interpreter->SetExternalContext(kTfLiteEdgeTpuContext, m_edgetpu_ctx.get());
#endif
			} else {
				interpreter->SetAllowFp16PrecisionForFp32(m_allow_fp16);
			}

			if (m_numberOfThreads != -1) {
				interpreter->SetNumThreads(m_numberOfThreads);
			}

			m_interpreter = std::move(interpreter);
			m_model = std::move(model);
		}

		void DisplaySettings()
		{
			LOG(INFO) << "input_floating    " << m_inputFloating << "\n";
			LOG(INFO) << "allow_fp16        " << m_allow_fp16 << "\n";
			LOG(INFO) << "input_mean        " << m_inputMean << "\n";
			LOG(INFO) << "input_std         " << m_inputStd << "\n";
			LOG(INFO) << "number_of_threads " << m_numberOfThreads << "\n";
			LOG(INFO) << "number_of_results " << m_numberOfResults << "\n";
			LOG(INFO) << "edgetpu           " << m_edgetpu << "\n";
		}

		void DisplayModelInformation()
		{
			LOG(INFO) << "tensors size: " << m_interpreter->tensors_size() << "\n";
			LOG(INFO) << "nodes size: " << m_interpreter->nodes_size() << "\n";
			LOG(INFO) << "inputs: " << m_interpreter->inputs().size() << "\n";
			LOG(INFO) << "input(0) name: " << m_interpreter->GetInputName(0) << "\n";

			int t_size = m_interpreter->tensors_size();
			for (int i = 0; i < t_size; i++) {
				if (m_interpreter->tensor(i)->name)
					LOG(INFO) << i << ": " << m_interpreter->tensor(i)->name << ", "
						<< m_interpreter->tensor(i)->bytes << ", "
						<< m_interpreter->tensor(i)->type << ", "
						<< m_interpreter->tensor(i)->params.scale << ", "
						<< m_interpreter->tensor(i)->params.zero_point << "\n";
			}
		}

		bool IsModelQuantized()
		{
			return !m_inputFloating;
		}

		int GetInputWidth()
		{
			int input = m_interpreter->inputs()[0];
			TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
			return input_dims->data[2];
		}

		int GetInputHeight()
		{
			int input = m_interpreter->inputs()[0];
			TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
			return input_dims->data[1];
		}

		int GetInputChannels()
		{
			int input = m_interpreter->inputs()[0];
			TfLiteIntArray* input_dims = m_interpreter->tensor(input)->dims;
			return input_dims->data[3];
		}

		unsigned int GetNumberOfInputs()
		{
			const std::vector<int> inputs = m_interpreter->inputs();
			return inputs.size();
		}

		unsigned int GetNumberOfOutputs()
		{
			const std::vector<int> outputs = m_interpreter->outputs();
			return outputs.size();
		}

		unsigned int GetOutputSize(int index)
		{
			int output = m_interpreter->outputs()[index];
			TfLiteIntArray* output_dims = m_interpreter->tensor(output)->dims;
			// assume output dims to be something like (1, 1, ... ,size)
			return output_dims->data[output_dims->size - 1];
		}

		void RunInference(uint8_t* img, Label_Results* results)
		{
			if (m_inputFloating)
				RunInference<float>(img, results);
			else
				RunInference<uint8_t>(img, results);
		}

		template <class T>
		void RunInference(uint8_t* img, Label_Results* results)
		{
			int input_height = GetInputHeight();
			int input_width = GetInputWidth();
			int input_channels = GetInputChannels();
			auto sizeInBytes = input_height * input_width * input_channels;

			int input = m_interpreter->inputs()[0];
			if (m_verbose) {
				LOG(INFO) << "input: " << input << "\n";
				LOG(INFO) << "number of inputs: " << GetNumberOfInputs() << "\n";
				LOG(INFO) << "number of outputs: " << GetNumberOfOutputs() << "\n";
			}

			if (m_interpreter->AllocateTensors() != kTfLiteOk) {
				LOG(FATAL) << "Failed to allocate tensors!";
			}

			if (m_verbose)
				tflite::PrintInterpreterState(m_interpreter.get());

			auto in = m_interpreter->typed_tensor<T>(input);
			if (m_inputFloating) {
				for (int i = 0; i < sizeInBytes; i++)
					in[i] = (img[i] - m_inputMean) / m_inputStd;
			} else {
				for (int i = 0; i < sizeInBytes; i++)
					in[i] = img[i];
			}

			struct timeval start_time, stop_time;
			gettimeofday(&start_time, nullptr);
			if (m_interpreter->Invoke() != kTfLiteOk) {
				LOG(FATAL) << "Failed to invoke tflite!\n";
			}

			gettimeofday(&stop_time, nullptr);
			m_inferenceTime = (get_ms(stop_time) - get_ms(start_time));

			/* Get results */
			T* output = m_interpreter->typed_output_tensor<T>(0);
			auto output_size = GetOutputSize(0);
			for (int i = 0; i < m_numberOfResults; i++) {
				results->index[i] = std::distance(&output[0], std::max_element(&output[0], &output[output_size]));
				if (m_inputFloating)
					results->accuracy[i] = output[results->index[i]];
				else
					results->accuracy[i] = output[results->index[i]] / 255.0;

				output[results->index[i]] = 0;
			}
			results->inference_time = m_inferenceTime;
		}

		// Takes a file name, and loads a list of labels from it, one per line, and
		// returns a vector of the strings. It pads with empty strings so the length
		// of the result is a multiple of 16, because our model expects that.
		TfLiteStatus ReadLabelsFile(const std::string& file_name,
					    std::vector<std::string>* result,
					    size_t* found_label_count)
		{
			std::ifstream file(file_name);
			if (!file) {
				LOG(FATAL) << "Labels file " << file_name << " not found\n";
				return kTfLiteError;
			}
			result->clear();
			std::string line;
			while (std::getline(file, line)) {
				result->push_back(line);
			}
			*found_label_count = result->size();
			const int padding = 16;
			while (result->size() % padding) {
				result->emplace_back();
			}
			return kTfLiteOk;
		}
	};

}  // namespace wrapper_tfl

#endif  // WRAPPER_TFL_HPP_