stm32mpu-ai: add external delegate library to use NPU

Actually, the NPU demos were not making use of the NPU, just the CPU. To enable the NPU usage, it is required to add the external delegate library 'tflite-vx-delegate'to the system. This is done by forcing 'tensorflow-lite' to depend on it for the CCMP2 platform, which also requires 'tim-vx' library as a build dependency. Both the 'tflite-vx-delegate' and 'tim-vx' recipes have been verbatim copied from the ST Beta branch, using commit with SHA256: 68686850c75061f1c7c4e756a313a41ca810f6ae Signed-off-by: David Escalona <david.escalona@digi.com>
2024-10-01 11:00:15 +02:00 · 2024-10-01 11:00:15 +02:00 · 1de1fae447
parent d9ef21b3d0
commit 1de1fae447
6 changed files with 350 additions and 0 deletions
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tensorflow-lite/tensorflow-lite_git.bbappend
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tensorflow-lite/tensorflow-lite_git.bbappend
@ -45,3 +45,6 @@ do_compile() {

 	setuptools3_do_compile
 }
+
+# Require the external NPU delegate.
+RDEPENDS:${PN}:append:stm32mp25common = " tflite-vx-delegate "
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tflite-vx-delegate/tflite-vx-delegate/0001-tflite-vx-delegate-fix-to-support-tflite-2-11.patch
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tflite-vx-delegate/tflite-vx-delegate/0001-tflite-vx-delegate-fix-to-support-tflite-2-11.patch
@ -0,0 +1,111 @@
+commit 7ac938965051de91c493b75a8825b007e1f52599
+Author: Feiyue Chen <Feiyue.Chen@verisilicon.com>
+Date:   Thu Jul 6 10:41:35 2023 +0000
+
+    Fixed bugs for kernel test building
+
+diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt
+index 073b29c4860..67486fa183b 100644
+--- a/tensorflow/lite/CMakeLists.txt
+++ b/tensorflow/lite/CMakeLists.txt
+@@ -209,6 +209,8 @@ list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tflite_with_xnnpack\\.cc$")
+ # Exclude Flex related files.
+ list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*with_selected_ops\\.cc$")
+
+list(FILTER TFLITE_SRCS EXCLUDE REGEX "tensorflow_profiler_logger\\.cc$")
+
+ if(_TFLITE_ENABLE_MMAP)
+   list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*mmap_allocation_disabled\\.cc$")
+ else()
+diff --git a/tensorflow/lite/kernels/CMakeLists.txt b/tensorflow/lite/kernels/CMakeLists.txt
+index f5e22ee97da..d0da2680e7b 100644
+--- a/tensorflow/lite/kernels/CMakeLists.txt
+++ b/tensorflow/lite/kernels/CMakeLists.txt
+@@ -63,16 +63,16 @@ build_flatbuffers(
+ set(DELEGATE_PROVIDERS_SUPP
+   ${TFLITE_SOURCE_DIR}/nnapi/sl/SupportLibrary.cc
+   ${TFLITE_SOURCE_DIR}/tools/delegates/delegate_provider.cc
+-  ${TFLITE_SOURCE_DIR}/tools/evaluation/utils.cc
+  # ${TFLITE_SOURCE_DIR}/tools/evaluation/utils.cc
+ )
+
+ set(DELEGATE_PROVIDERS
+   ${DELEGATE_PROVIDERS_SUPP}
+   ${TFLITE_SOURCE_DIR}/tools/delegates/default_execution_provider.cc
+   # List of delegates referenced as options in the tensorflow/lite/CMakeLists.txt
+-  ${TFLITE_SOURCE_DIR}/tools/delegates/gpu_delegate_provider.cc
+  # ${TFLITE_SOURCE_DIR}/tools/delegates/gpu_delegate_provider.cc
+   ${TFLITE_SOURCE_DIR}/tools/delegates/nnapi_delegate_provider.cc
+-  ${TFLITE_SOURCE_DIR}/tools/delegates/xnnpack_delegate_provider.cc
+  # ${TFLITE_SOURCE_DIR}/tools/delegates/xnnpack_delegate_provider.cc
+ )
+
+ if(TFLITE_ENABLE_EXTERNAL_DELEGATE)
+@@ -92,9 +92,9 @@ set(TEST_FRAMEWORK_SRC
+   ${TFLITE_SOURCE_DIR}/tools/tool_params.cc
+   ${TFLITE_SOURCE_DIR}/tools/versioning/op_version.cc
+   ${TFLITE_SOURCE_DIR}/tools/versioning/op_signature.cc
+-  ${TF_SOURCE_DIR}/core/platform/default/env_time.cc
+-  ${TF_SOURCE_DIR}/core/platform/default/logging.cc
+-  ${TF_SOURCE_DIR}/core/platform/default/mutex.cc
+  ${TF_SOURCE_DIR}/tsl/platform/default/env_time.cc
+  ${TF_SOURCE_DIR}/tsl/platform/default/logging.cc
+  ${TF_SOURCE_DIR}/tsl/platform/default/mutex.cc
+   internal/test_util.cc
+   acceleration_test_util.cc
+   acceleration_test_util_internal.cc
+@@ -154,7 +154,8 @@ macro(add_kernel_test TEST_SRC TEST_LIB)
+     set(DELEGATE_TEST "${TEST_NAME}_delegate")
+     add_test(
+       NAME ${DELEGATE_TEST}
+-      COMMAND cmake -DTEST_EXECUTABLE=$<TARGET_FILE:${TEST_NAME}> -P run-tests.cmake
+      # COMMAND cmake -DTEST_EXECUTABLE=$<TARGET_FILE:${TEST_NAME}> -P run-tests.cmake
+      COMMAND cmake -DTEST_EXECUTABLE=$<TARGET_FILE:${TEST_NAME}> -P ${TFLITE_SOURCE_DIR}/tools/cmake/test_utils/run-tests.cmake
+     )
+     set_tests_properties(${DELEGATE_TEST} PROPERTIES LABELS "delegate")
+   endif()
+diff --git a/tensorflow/lite/kernels/test_main.cc b/tensorflow/lite/kernels/test_main.cc
+index 1887533399b..6e3958b77dc 100644
+--- a/tensorflow/lite/kernels/test_main.cc
+++ b/tensorflow/lite/kernels/test_main.cc
+@@ -16,7 +16,6 @@ limitations under the License.
+ #include <vector>
+
+ #include <gtest/gtest.h>
+-#include "benchmark/benchmark.h"  // from @com_google_benchmark
+ #include "tensorflow/lite/kernels/test_delegate_providers.h"
+ #include "tensorflow/lite/kernels/test_util.h"
+ #include "tensorflow/lite/testing/util.h"
+@@ -51,7 +50,6 @@ int main(int argc, char** argv) {
+   ::tflite::LogToStderr();
+   if (InitKernelTest(&argc, argv)) {
+     ::testing::InitGoogleTest(&argc, argv);
+-    benchmark::RunSpecifiedBenchmarks();
+     return RUN_ALL_TESTS();
+   } else {
+     return EXIT_FAILURE;
+diff --git a/tensorflow/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/lite/kernels/unidirectional_sequence_lstm_test.cc
+index cf3fd3a031a..e96f4e3f357 100644
+--- a/tensorflow/lite/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/lite/kernels/unidirectional_sequence_lstm_test.cc
+@@ -18,7 +18,6 @@ limitations under the License.
+
+ #include <gmock/gmock.h>
+ #include <gtest/gtest.h>
+-#include "benchmark/benchmark.h"  // from @com_google_benchmark
+ #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+ #include "tensorflow/lite/kernels/test_util.h"
+ #include "tensorflow/lite/kernels/unidirectional_sequence_lstm_test_util.h"
+diff --git a/tensorflow/lite/tools/cmake/modules/flatbuffers.cmake b/tensorflow/lite/tools/cmake/modules/flatbuffers.cmake
+index 54b413f69ac..d7a2f8ce0f2 100644
+--- a/tensorflow/lite/tools/cmake/modules/flatbuffers.cmake
+++ b/tensorflow/lite/tools/cmake/modules/flatbuffers.cmake
+@@ -23,7 +23,7 @@ OverridableFetchContent_Declare(
+   flatbuffers
+   GIT_REPOSITORY https://github.com/google/flatbuffers
+   # Sync with tensorflow/third_party/flatbuffers/workspace.bzl
+-  GIT_TAG v2.0.6
+  GIT_TAG v2.0.8
+   GIT_SHALLOW TRUE
+   GIT_PROGRESS TRUE
+   SOURCE_DIR "${CMAKE_BINARY_DIR}/flatbuffers"
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tflite-vx-delegate/tflite-vx-delegate_git.bb
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tflite-vx-delegate/tflite-vx-delegate_git.bb
@ -0,0 +1,71 @@
+# Copyright 2020-2021 STMicroelectronics
+DESCRIPTION = "Verisilicon TFLite VX Delegate for STM32 Devices"
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=7d6260e4f3f6f85de05af9c8f87e6fb5"
+
+SRCBRANCH_vx = "main"
+SRCREV_vx = "a975b7ed7a5797812d3c5bdb7310ee92164d0a35"
+
+SRCBRANCH_tf = "r2.11"
+SRCREV_tf = "5d37bd0350f0144632629c1aa2ebaef6ca76300b"
+
+SRC_URI =  "git://github.com/VeriSilicon/tflite-vx-delegate.git;branch=${SRCBRANCH_vx};name=vx;destsuffix=git_vx/;protocol=https \
+            git://github.com/tensorflow/tensorflow;branch=${SRCBRANCH_tf};name=tf;destsuffix=git_tf/;protocol=https "
+SRC_URI += "file://0001-tflite-vx-delegate-fix-to-support-tflite-2-11.patch;patchdir=${WORKDIR}/git_tf"
+
+PV = "2.11.0+git${SRCREV_vx}"
+S = "${WORKDIR}/git_vx"
+COMPATIBLE_MACHINE = "stm32mp25common"
+
+inherit cmake
+DEPENDS += "tim-vx patchelf-native"
+
+python () {
+    #Get major of the PV variable
+    version = d.getVar('PV')
+    version = version.split("+")
+    version_base = version[0]
+    version = version_base.split(".")
+    major = version[0]
+    d.setVar('MAJOR', major)
+    d.setVar('PVB', version_base)
+}
+
+do_configure[network] = "1"
+
+do_configure:prepend() {
+    if [ -n "${http_proxy}" ]; then
+        export HTTP_PROXY=${http_proxy}
+        export http_proxy=${http_proxy}
+    fi
+    if [ -n "${https_proxy}" ]; then
+        export HTTPS_PROXY=${https_proxy}
+        export https_proxy=${https_proxy}
+    fi
+    unset FC
+}
+
+EXTRA_OECMAKE += " -DFETCHCONTENT_SOURCE_DIR_TENSORFLOW=${WORKDIR}/git_tf \
+                   -DTIM_VX_INSTALL=${STAGING_DIR_TARGET}/usr \
+                   -DTFLITE_ENABLE_XNNPACK=OFF \
+                   -DTFLITE_ENABLE_EXTERNAL_DELEGATE=ON \
+                   -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \
+"
+
+do_install() {
+    # Install libvx_delegate.so into libdir
+    install -d ${D}${libdir}
+    install -d ${D}${includedir}/VX
+    install -m 0755 ${WORKDIR}/build/libvx_delegate.so ${D}${libdir}/libvx_delegate.so.${PVB}
+    patchelf --set-soname libvx_delegate.so ${D}${libdir}/libvx_delegate.so.${PVB}
+    ln -sf libvx_delegate.so.${PVB} ${D}${libdir}/libvx_delegate.so.${MAJOR}
+    ln -sf libvx_delegate.so.${PVB} ${D}${libdir}/libvx_delegate.so
+
+    # Install cusom static lib
+    install -m 0755 libvx_custom_op.a ${D}${libdir}/libvx_custom_op.a
+    install -m 0644 ${S}/vsi_npu_custom_op.h ${D}${includedir}/VX/vsi_npu_custom_op.h
+}
+
+FILES:${PN} += " ${libdir}/libvx_delegate.so.${MAJOR} \
+                 ${libdir}/libvx_delegate.so.${PVB} \
+"
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tim-vx/tim-vx/0001-tim-vx-tests-disable-AVG_ANDROID-tests.patch
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tim-vx/tim-vx/0001-tim-vx-tests-disable-AVG_ANDROID-tests.patch
@ -0,0 +1,59 @@
+From 23cdc22779175fea6d73848a98e85702d99051c9 Mon Sep 17 00:00:00 2001
+From: Maxence GUILHIN <maxence.guilhin@st.com>
+Date: Mon, 4 Dec 2023 11:19:21 +0100
+Subject: [PATCH 1/1] tim-vx-tests : disable AVG_ANDROID tests which fails with
+ tensor HDL OFF
+
+Signed-off-by: Maxence GUILHIN <maxence.guilhin@st.com>
+---
+ src/tim/transform/average_pool_layout_infer_test.cc | 2 +-
+ src/tim/vx/ops/avg_pool_test.cc                     | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/tim/transform/average_pool_layout_infer_test.cc b/src/tim/transform/average_pool_layout_infer_test.cc
+index 0ff8115..07048f3 100644
+--- a/src/tim/transform/average_pool_layout_infer_test.cc
+++ b/src/tim/transform/average_pool_layout_infer_test.cc
+@@ -4,7 +4,7 @@
+ #include "tim/transform/layout_inference.h"
+ 
+ #include "gtest/gtest.h"
+-TEST(AVG_ANDROID, layout_infer_) {
+TEST(AVG_ANDROID, DISABLED_layout_infer_) {
+     auto ctx = tim::vx::Context::Create();
+     auto graph = ctx->CreateGraph();
+ 
+diff --git a/src/tim/vx/ops/avg_pool_test.cc b/src/tim/vx/ops/avg_pool_test.cc
+index 3a9aed8..3f3f17d 100644
+--- a/src/tim/vx/ops/avg_pool_test.cc
+++ b/src/tim/vx/ops/avg_pool_test.cc
+@@ -463,7 +463,7 @@ TEST(AVG, shape_60_52_3_5_fp32_kernel_35_stride_5) {
+     ArraysMatch(golden, output,1e-4f);
+ }
+ 
+-TEST(AVG_ANDROID, shape_60_52_3_5_fp32_kernel_35_stride_5) {
+TEST(AVG_ANDROID, DISABLED_shape_60_52_3_5_fp32_kernel_35_stride_5) {
+     auto ctx = tim::vx::Context::Create();
+     auto graph = ctx->CreateGraph();
+ 
+@@ -529,7 +529,7 @@ TEST(AVG_ANDROID, shape_60_52_3_5_fp32_kernel_35_stride_5) {
+     ArraysMatch(golden, output, 1e-5f);
+ }
+ 
+-TEST(AVG_ANDROID, shape_60_52_3_5_fp32_kernel_50_stride_5) {
+TEST(AVG_ANDROID, DISABLED_shape_60_52_3_5_fp32_kernel_50_stride_5) {
+     auto ctx = tim::vx::Context::Create();
+     auto graph = ctx->CreateGraph();
+ 
+@@ -578,7 +578,7 @@ TEST(AVG_ANDROID, shape_60_52_3_5_fp32_kernel_50_stride_5) {
+     ArraysMatch(golden, output, 1e-5f);
+ }
+ 
+-TEST(AVG_ANDROID, shape_60_52_3_5_uint8_kernel_35_stride_5) {
+TEST(AVG_ANDROID, DISABLED_shape_60_52_3_5_uint8_kernel_35_stride_5) {
+     auto ctx = tim::vx::Context::Create();
+     auto graph = ctx->CreateGraph();
+ 
+-- 
+2.25.1
+
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tim-vx/tim-vx_git.bb
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-frameworks/tim-vx/tim-vx_git.bb
@ -0,0 +1,105 @@
+DESCRIPTION = "TIM-VX is a software integration module provided by VeriSilicon to facilitate \
+deployment of Neural-Networks on OpenVX enabled ML accelerators. It serves as the backend \
+binding for runtime frameworks such as Android NN, Tensorflow-Lite, MLIR, TVM and more."
+SUMMARY = "Tensor Interface Module for OpenVX"
+HOMEPAGE = "https://github.com/VeriSilicon/TIM-VX"
+LICENSE = "MIT"
+
+LIC_FILES_CHKSUM = "file://LICENSE;md5=d72cd187d764d96d91db827cb65b48a7"
+
+SRCBRANCH_tim_vx = "main"
+SRCREV_tim_vx = "33f3a4f176ff9c407479eaf6be78c52bb3c7a939"
+SRC_URI ="git://github.com/VeriSilicon/TIM-VX.git;branch=${SRCBRANCH_tim_vx};name=tim_vx;destsuffix=tim_vx_git/;protocol=https"
+SRC_URI += " file://0001-tim-vx-tests-disable-AVG_ANDROID-tests.patch"
+
+
+SRCBRANCH_googletest = "main"
+SRCREV_googletest = "eab0e7e289db13eabfc246809b0284dac02a369d"
+SRC_URI +="git://github.com/google/googletest;branch=${SRCBRANCH_googletest};name=googletest;destsuffix=googletest/;protocol=https "
+
+
+PV = "1.1.57+git${SRCREV_tim_vx}"
+PV_googletest = "1.14.0"
+
+S = "${WORKDIR}/tim_vx_git"
+
+# Only compatible with stm32mp25
+COMPATIBLE_MACHINE = "stm32mp25common"
+
+python () {
+    #Get major of the PV variable
+    version = d.getVar('PV')
+    version = version.split("+")
+    version_base = version[0]
+    version = version_base.split(".")
+    major = version[0]
+    d.setVar('MAJOR', major)
+    d.setVar('PVB', version_base)
+}
+
+inherit cmake
+DEPENDS += " patchelf-native \
+	     gcnano-driver-stm32mp \
+	     gcnano-userland \
+             gtest \
+             googletest \
+	"
+
+EXTRA_OECMAKE =  " \
+    -DCONFIG=YOCTO \
+    -DCMAKE_SYSROOT=${RECIPE_SYSROOT} \
+    -DTIM_VX_ENABLE_TEST=ON \
+    -DCMAKE_SKIP_RPATH=TRUE \
+    -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \
+    -DTIM_VX_USE_EXTERNAL_OVXLIB=ON \
+    -DTIM_VX_DBG_ENABLE_TENSOR_HNDL=OFF \
+    -DOVXLIB_INC=${S}/src/tim/vx/internal/include/ \
+    -DOVXLIB_LIB=${STAGING_LIBDIR}/libovxlib.so \
+    -DFETCHCONTENT_SOURCE_DIR_GOOGLETEST=${WORKDIR}/googletest \
+"
+do_configure[network] = "1"
+
+do_install() {
+    # Install libtim-vx.so into libdir
+    install -d ${D}${libdir}
+    install -d ${D}/usr/local/bin/${PN}-${PVB}
+    install -d ${D}/home/weston
+
+    install -m 0755 ${WORKDIR}/build/src/tim/libtim-vx.so ${D}${libdir}/libtim-vx.so.${PVB}
+    patchelf --set-soname libtim-vx.so ${D}${libdir}/libtim-vx.so.${PVB}
+
+    ln -sf libtim-vx.so.${PVB} ${D}${libdir}/libtim-vx.so.${MAJOR}
+    ln -sf libtim-vx.so.${PVB} ${D}${libdir}/libtim-vx.so
+
+    # Install other libraries for benchmark
+    install -m 0755 ${WORKDIR}/build/lib/libgtest_main.so ${D}${libdir}/libgtest_main.so.${PV_googletest}
+    install -m 0755 ${WORKDIR}/build/lib/libgtest.so      ${D}${libdir}/libgtest.so.${PV_googletest}
+    install -m 0755 ${WORKDIR}/build/lib/libgmock_main.so ${D}${libdir}/libgmock_main.so
+    install -m 0755 ${WORKDIR}/build/lib/libgmock.so      ${D}${libdir}/libgmock.so
+    install -m 0755 ${WORKDIR}/build/src/tim/unit_test    ${D}/usr/local/bin/${PN}-${PVB}/TIM-VX_test
+
+    # Include
+    install -d ${D}${includedir}
+    cp -r ${S}/include/tim ${D}${includedir}
+    cp -r ${STAGING_INCDIR}/CL/cl_viv_vx_ext.h ${D}/usr/local/bin/${PN}-${PVB}/cl_viv_vx_ext.h
+    cp -r ${STAGING_INCDIR}/CL/cl_viv_vx_ext.h ${D}/home/weston/cl_viv_vx_ext.h
+}
+
+PACKAGES =+ "${PN}-tools"
+FILES_SOLIBSDEV = ""
+
+FILES:${PN}-tools = "   /usr/local/bin/${PN}-${PVB}/TIM-VX_test \
+			/usr/local/bin/${PN}-${PVB}/cl_viv_vx_ext.h \
+			/home/weston/cl_viv_vx_ext.h \
+			${libdir}/libgtest_main.so.${PV_googletest} \
+			${libdir}/libgtest.so.${PV_googletest} \
+			${libdir}/libgmock_main.so \
+			${libdir}/libgmock.so \
+"
+
+FILES:${PN} += " ${libdir}/libtim-vx.so.${MAJOR} \
+                 ${libdir}/libtim-vx.so.${PVB}   \
+                 ${libdir}/libtim-vx.so \
+"
+
+INSANE_SKIP:${PN} += " dev-so "
--- a/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-st/packagegroups/packagegroup-x-linux-ai.bbappend
+++ b/meta-digi-dey/dynamic-layers/stm32mpu-ai/recipes-st/packagegroups/packagegroup-x-linux-ai.bbappend
@ -32,4 +32,5 @@ RDEPENDS:packagegroup-x-linux-ai-tflite:append:ccmp25 = " \
    tflite-object-detection-python \
    tflite-pose-estimation-python \
    tflite-semantic-segmentation-python \
+    tim-vx \
 "