x-linux-ai: recipes-samples: improvements to face-recognition

Add full support to face-recognition demo so it can be integrated in the
webkit demo.

Improvements are:
* support for USB camera
* use 640x480 resolution so it fits in our COG demo

https://onedigi.atlassian.net/browse/DEL-10133

Signed-off-by: Isaac Hermida <isaac.hermida@digi.com>
This commit is contained in:
Isaac Hermida 2026-05-14 12:27:44 +02:00
parent 2bbaa723b0
commit 2845b72ce9
3 changed files with 436 additions and 0 deletions

View File

@ -0,0 +1,228 @@
From: Isaac Hermida <isaac.hermida@digi.com>
Date: Thu, 14 May 2026 12:00:00 +0200
Subject: [PATCH] face-recognition: add V4L2SRC camera support
Add a V4L2SRC capture pipeline so USB cameras can be used on MP2x
platforms when config_board_npu.sh selects CAMERA_SRC=V4L2SRC.
Signed-off-by: Isaac Hermida <isaac.hermida@digi.com>
---
stai-mpu/stai_mpu_face_recognition.cc | 184 +++++++++++++++++++++++++-
1 file changed, 181 insertions(+), 3 deletions(-)
diff --git a/stai-mpu/stai_mpu_face_recognition.cc b/stai-mpu/stai_mpu_face_recognition.cc
index 945ace0..756139a 100644
--- a/stai-mpu/stai_mpu_face_recognition.cc
+++ b/stai-mpu/stai_mpu_face_recognition.cc
@@ -2110,8 +2110,11 @@ static GstFlowReturn gst_new_sample_fr_cb(GstElement *sink, CustomData *data)
}
#endif
- int width = data->window_width;
- int height = data->window_height;
+ GstCaps* caps = gst_sample_get_caps(sample);
+ GstStructure* structure = gst_caps_get_structure(caps, 0);
+ int width, height;
+ gst_structure_get_int(structure, "width", &width);
+ gst_structure_get_int(structure, "height", &height);
int channels = 3;
cv::Mat frame(height, width, CV_8UC3, info.data);
cv::Mat cropped_frame;
@@ -2380,6 +2383,171 @@ static int gst_dual_pipeline_camera_creation(CustomData *data)
}
/**
+ * Construct the Gstreamer pipeline used to stream USB camera frames and run
+ * NN model inference.
+ */
+static int gst_pipeline_camera_creation(CustomData *data)
+{
+ GstElement *pipeline, *v4l2src, *tee, *queue0, *queue1, *queue2;
+ GstElement *framerate, *scale0, *scale1, *convert0, *convert1;
+ GstElement *convert2, *dispsink, *appsink1, *appsink2, *fpsmeasure1;
+ GstBus *bus;
+
+ /* Create the pipeline */
+ pipeline = gst_pipeline_new("Face recognition live USB camera");
+ data->pipeline = pipeline;
+
+ /* Create gstreamer elements */
+ v4l2src = gst_element_factory_make("v4l2src", "source");
+ tee = gst_element_factory_make("tee", "frame-tee");
+ queue0 = gst_element_factory_make("queue", "queue0");
+ queue1 = gst_element_factory_make("queue", "queue1");
+ queue2 = gst_element_factory_make("queue", "queue2");
+ convert0 = gst_element_factory_make("videoconvert", "convert0");
+ convert1 = gst_element_factory_make("videoconvert", "convert1");
+ convert2 = gst_element_factory_make("videoconvert", "convert2");
+ scale0 = gst_element_factory_make("videoscale", "videoscale0");
+ scale1 = gst_element_factory_make("videoscale", "videoscale1");
+ dispsink = gst_element_factory_make_full("gtkwaylandsink",
+ "name", "gtkwsink",
+ "drm-device", NULL, NULL);
+ appsink1 = gst_element_factory_make("appsink", "app-sink");
+ appsink2 = gst_element_factory_make("appsink", "app-sink2");
+ framerate = gst_element_factory_make("videorate", "video-rate");
+ fpsmeasure1 = gst_element_factory_make("fpsdisplaysink", "fps-measure1");
+
+ GstCaps *caps_src = gst_caps_new_simple("video/x-raw",
+ "width", G_TYPE_INT,
+ data->frame_width,
+ "height", G_TYPE_INT,
+ data->frame_height,
+ "framerate", GST_TYPE_FRACTION,
+ std::stoi(camera_fps_str), 1,
+ NULL);
+
+ GstCaps *caps_preview = gst_caps_new_simple("video/x-raw",
+ "format", G_TYPE_STRING,
+ "RGB16",
+ "width", G_TYPE_INT,
+ data->frame_width,
+ "height", G_TYPE_INT,
+ data->frame_height,
+ NULL);
+
+ GstCaps *caps_fr = gst_caps_new_simple("video/x-raw",
+ "format", G_TYPE_STRING, "RGB",
+ "width", G_TYPE_INT,
+ data->frame_width,
+ "height", G_TYPE_INT,
+ data->frame_height,
+ NULL);
+
+ GstCaps *caps_nn = gst_caps_new_simple("video/x-raw",
+ "format", G_TYPE_STRING, "RGB",
+ "width", G_TYPE_INT,
+ data->nn_input_width,
+ "height", G_TYPE_INT,
+ data->nn_input_height,
+ NULL);
+
+ if (!pipeline || !v4l2src || !tee || !queue0 || !queue1 || !queue2 ||
+ !convert0 || !convert1 || !convert2 || !scale0 || !scale1 ||
+ !dispsink || !appsink1 || !appsink2 || !framerate ||
+ !fpsmeasure1) {
+ g_printerr("Not all elements could be created. Exiting.\n");
+ return -1;
+ }
+
+ std::string video_device = "/dev/" + data->camera_info.video_device;
+ g_object_set(G_OBJECT(v4l2src), "device", video_device.c_str(), NULL);
+
+ /* Configure the queue elements */
+ g_object_set(G_OBJECT(queue0), "max-size-buffers", 1,
+ "leaky", 2 /* downstream */, NULL);
+ g_object_set(G_OBJECT(queue1), "max-size-buffers", 1,
+ "leaky", 2 /* downstream */, NULL);
+ g_object_set(G_OBJECT(queue2), "max-size-buffers", 1,
+ "leaky", 2 /* downstream */, NULL);
+
+ /* Configure fspdisplaysink */
+ g_object_set(fpsmeasure1, "signal-fps-measurements", TRUE,
+ "fps-update-interval", 2000, "text-overlay", FALSE,
+ "video-sink", dispsink, NULL);
+ g_signal_connect(fpsmeasure1, "fps-measurements",
+ G_CALLBACK(gst_fps_measure_display_cb), NULL);
+
+ /* Configure the appsinks */
+ g_object_set(appsink1, "emit-signals", TRUE, "sync", FALSE,
+ "max-buffers", 1, "drop", TRUE, NULL);
+ g_signal_connect(appsink1, "new-sample",
+ G_CALLBACK(gst_new_sample_fr_cb), data);
+
+ g_object_set(appsink2, "emit-signals", TRUE, "sync", FALSE,
+ "max-buffers", 1, "drop", TRUE, NULL);
+ g_signal_connect(appsink2, "new-sample",
+ G_CALLBACK(gst_new_sample_cb), data);
+
+ /* Build the pipeline */
+ gst_bin_add_many(GST_BIN(pipeline), v4l2src, framerate, tee, queue0,
+ queue1, queue2, convert0, convert1, convert2, scale0,
+ scale1, dispsink, fpsmeasure1, appsink1, appsink2,
+ NULL);
+
+ if (!gst_element_link_filtered(framerate, tee, caps_src)) {
+ g_error("Failed to link elements (0)");
+ return -2;
+ }
+ if (!gst_element_link_many(v4l2src, framerate, NULL)) {
+ g_error("Failed to link elements (1)");
+ return -2;
+ }
+ if (!gst_element_link_many(tee, queue1, convert1, NULL)) {
+ g_error("Failed to link elements (2)");
+ return -2;
+ }
+ if (!gst_element_link_filtered(convert1, fpsmeasure1, caps_preview)) {
+ g_error("Failed to link elements (3)");
+ return -2;
+ }
+ if (!gst_element_link_many(tee, queue0, convert0, scale0, NULL)) {
+ g_error("Failed to link elements (4)");
+ return -2;
+ }
+ if (!gst_element_link_filtered(scale0, appsink1, caps_fr)) {
+ g_error("Failed to link elements (5)");
+ return -2;
+ }
+ if (!gst_element_link_many(tee, queue2, convert2, scale1, NULL)) {
+ g_error("Failed to link elements (6)");
+ return -2;
+ }
+ if (!gst_element_link_filtered(scale1, appsink2, caps_nn)) {
+ g_error("Failed to link elements (7)");
+ return -2;
+ }
+
+ gst_caps_unref(caps_src);
+ gst_caps_unref(caps_preview);
+ gst_caps_unref(caps_fr);
+ gst_caps_unref(caps_nn);
+
+ /* Instruct the bus to emit signals for each received message, and
+ * connect to the interesting signals */
+ bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
+ gst_bus_add_signal_watch(bus);
+ g_signal_connect(G_OBJECT(bus), "message::error",
+ (GCallback)gst_bus_error_cb, data);
+ g_signal_connect(G_OBJECT(bus), "message::eos",
+ (GCallback)gst_bus_error_cb, data);
+ g_signal_connect(G_OBJECT(bus), "message::application",
+ (GCallback)gst_application_cb, data);
+ g_signal_connect(G_OBJECT(bus), "message::state-changed",
+ (GCallback)gst_state_changed_cb, data);
+ gst_object_unref(bus);
+ return 0;
+}
+
+/**
* This function display the help when -h or --help is passed as parameter.
*/
static void print_help(int argc, char** argv)
@@ -2643,7 +2811,10 @@ int main(int argc, char *argv[])
g_print("no camera connected \n");
exit(1);
}
- // data.camera_info = setup_camera(nn_input_width,nn_input_height);
+ if(camera_src_str == "V4L2SRC"){
+ data.camera_info = setup_camera(nn_input_width,
+ nn_input_height);
+ }
} else {
data.preview_enabled = false;
/* Check if directory is empty */
@@ -2673,8 +2844,12 @@ int main(int argc, char *argv[])
ret = gst_dual_pipeline_camera_creation(&data);
if(ret)
exit(1);
+ } else if(camera_src_str == "V4L2SRC"){
+ ret = gst_pipeline_camera_creation(&data);
+ if(ret)
+ exit(1);
} else {
- g_print("Camera source used not supported use LIBCAMERA \n");
+ g_print("Camera source used not supported use LIBCAMERA or V4L2SRC \n");
}
}

View File

@ -0,0 +1,206 @@
From: Isaac Hermida <isaac.hermida@digi.com>
Date: Thu, 14 May 2026 17:00:00 +0200
Subject: [PATCH] face-recognition: set camera preview to 640x480
The face recognition sample lets the camera preview expand with the
display resolution. This makes the camera demo larger than the other
NPU demos, which keep their camera preview at 640x480.
Set the video widget and its overlay drawing area to 640x480, and keep
them centered in the existing maximized windows.
Signed-off-by: Isaac Hermida <isaac.hermida@digi.com>
---
stai-mpu/stai_mpu_face_recognition.cc | 70 ++++++++++++++++++++++++----------
1 file changed, 51 insertions(+), 19 deletions(-)
diff --git a/stai-mpu/stai_mpu_face_recognition.cc b/stai-mpu/stai_mpu_face_recognition.cc
index 756139a..e9a8360 100644
--- a/stai-mpu/stai_mpu_face_recognition.cc
+++ b/stai-mpu/stai_mpu_face_recognition.cc
@@ -48,6 +48,9 @@
#define FACE_IDENTITY_CLASSES 512
#define MAX_HISTORY_THUMBNAILS 11 /* for 720p display */
+#define CAMERA_PREVIEW_WIDTH 640
+#define CAMERA_PREVIEW_HEIGHT 480
+
#include "stai_mpu_wrapper.hpp"
#include "blazeface_pp.hpp"
#include "facenet_pp.hpp"
@@ -1126,6 +1129,17 @@ static gboolean gui_press_event_cb(GtkWidget *widget,
CustomData *data)
{
if (event->button == GDK_BUTTON_PRIMARY) {
+ /*
+ * In camera preview mode, click events are relative to the
+ * overlay drawing widget. In still image mode, drawing is
+ * translated by data->offset, so keep the legacy adjustment.
+ */
+ int click_x = event->x;
+ if (!data->preview_enabled)
+ click_x -= data->offset;
+
+ int click_y = event->y;
+
/* event occurs on one of the thumbnail face */
unsigned int nb_registered_faces = data->registered_faces.size();
if (nb_registered_faces != 0) {
@@ -1134,10 +1149,10 @@ static gboolean gui_press_event_cb(GtkWidget *widget,
data->nb_history_thumbnails);
std::vector<Position> thumb_pos = data->history_thumb_position[nb_thumbnails - 1];
for (unsigned int i = 0 ; i < thumb_pos.size() ; i++) {
- if ((event->x - data->offset > thumb_pos[i].x) &&
- (event->x - data->offset< thumb_pos[i].x + thumb_pos[i].width) &&
- (event->y > thumb_pos[i].y) &&
- (event->y < thumb_pos[i].y + thumb_pos[i].height)) {
+ if ((click_x > thumb_pos[i].x) &&
+ (click_x < thumb_pos[i].x + thumb_pos[i].width) &&
+ (click_y > thumb_pos[i].y) &&
+ (click_y < thumb_pos[i].y + thumb_pos[i].height)) {
gui_delete_registered_face(i, data);
if (!data->preview_enabled) {
data->new_inference = true;
@@ -1151,10 +1166,10 @@ static gboolean gui_press_event_cb(GtkWidget *widget,
* not been registered */
for (unsigned int i = 0 ; i < data->screen_face_position.size() ; i++) {
if (!data->screen_face_position[i].registered &&
- (event->x - data->offset > data->screen_face_position[i].pos.x) &&
- (event->x - data->offset < data->screen_face_position[i].pos.x + data->screen_face_position[i].pos.width) &&
- (event->y > data->screen_face_position[i].pos.y) &&
- (event->y < data->screen_face_position[i].pos.y + data->screen_face_position[i].pos.height)) {
+ (click_x > data->screen_face_position[i].pos.x) &&
+ (click_x < data->screen_face_position[i].pos.x + data->screen_face_position[i].pos.width) &&
+ (click_y > data->screen_face_position[i].pos.y) &&
+ (click_y < data->screen_face_position[i].pos.y + data->screen_face_position[i].pos.height)) {
gui_register_new_face(i, data);
goto end;
}
@@ -1194,18 +1209,17 @@ static void gui_compute_history_thumbnail_position(CustomData *data)
{
if(data->preview_enabled){
- float ratio = (float)data->frame_width/(float)data->frame_height;
- float width_preview = (ratio*(float)(data->widget_draw_height));
- data->face_banner.width = width_preview;
+ data->face_banner.width = data->widget_draw_ov_width;
data->face_banner.height = data->ui_face_thumb_size + data->ui_face_thumb_spacing ;
data->face_banner.x = 0;
- data->face_banner.y = data->widget_draw_height - data->face_banner.height;
+ data->face_banner.y = data->widget_draw_ov_height -
+ data->face_banner.height;
} else {
data->face_banner.width = data->frame_disp_pos.width;
data->face_banner.height = data->ui_face_thumb_size + data->ui_face_thumb_spacing;
data->face_banner.x = 0;
data->face_banner.y = data->frame_disp_pos.height - data->face_banner.height;
}
- data->nb_history_thumbnails = (data->widget_draw_width - data->ui_face_thumb_spacing)
+ data->nb_history_thumbnails = (data->face_banner.width - data->ui_face_thumb_spacing)
/ (data->ui_face_thumb_size + data->ui_face_thumb_spacing);
/* Clip to the MAX_HISTORY_THUMBNAILS value if needed */
@@ -1254,20 +1265,15 @@ static void gui_draw_face_positions(cairo_t *cr,
new_screen_face_position.registered = false;
if (data->preview_enabled){
- /* Get drawing area informations */
- float ratio = (float)data->frame_width/(float)data->frame_height;
- float width_preview = (ratio*(float)(data->widget_draw_height));
/* compute rectangle position and dimensions */
- /* Offset to draw on the preview */
- data->offset = ((data->widget_draw_ov_width - (int)width_preview)/2);
- new_screen_face_position.pos.x = width_preview *
- data->detected_faces[i].bbox.top_left.x + data->offset;
- new_screen_face_position.pos.y = data->frame_disp_pos.height *
+ new_screen_face_position.pos.x = data->widget_draw_ov_width *
+ data->detected_faces[i].bbox.top_left.x;
+ new_screen_face_position.pos.y = data->widget_draw_ov_height *
data->detected_faces[i].bbox.top_left.y;
- new_screen_face_position.pos.width = width_preview *
+ new_screen_face_position.pos.width = data->widget_draw_ov_width *
(data->detected_faces[i].bbox.bot_right.x -
data->detected_faces[i].bbox.top_left.x);
- new_screen_face_position.pos.height = data->frame_disp_pos.height *
+ new_screen_face_position.pos.height = data->widget_draw_ov_height *
(data->detected_faces[i].bbox.bot_right.y -
data->detected_faces[i].bbox.top_left.y);
} else {
@@ -1380,7 +1393,7 @@ static void gui_set_ui_parameters(CustomData *data)
data->ui_face_thumb_spacing = 20;
data->ui_box_line_width = 2.0;
data->ui_thumb_box_line_width = 3.0;
- data->keyboard_config = "window_1080p";
+ data->keyboard_config = "window_720p";
} else {
/* Default UI parameter */
g_print("Display config fallback \n");
@@ -1492,5 +1505,5 @@ static gboolean gui_draw_overlay_cb(GtkWidget *widget,
0);
} else {
- data->offset = ((data->widget_draw_ov_width - (int)width_preview)/2);
+ data->offset = 0;
}
@@ -1504,11 +1511,8 @@ static gboolean gui_draw_overlay_cb(GtkWidget *widget,
/* Draw a black transparent banner to display the registered faces */
cairo_set_source_rgba(cr, 0.0, 0.0, 0.0, 0.60);
- if (!data->preview_enabled){
- cairo_rectangle(cr,data->face_banner.x, data->face_banner.y,data->face_banner.width, data->face_banner.height);
- } else {
- cairo_rectangle(cr,data->face_banner.x + data->offset, data->face_banner.y,data->face_banner.width, data->face_banner.height);
- }
+ cairo_rectangle(cr, data->face_banner.x, data->face_banner.y,
+ data->widget_draw_ov_width, data->face_banner.height);
cairo_fill_preserve(cr);
cairo_stroke(cr);
@@ -1644,6 +1652,8 @@ static void gui_create_overlay(CustomData *data)
/* Create the drawing area to draw text on it using cairo */
drawing_area = gtk_drawing_area_new();
gtk_widget_set_app_paintable(drawing_area, TRUE);
+ gtk_widget_set_size_request(drawing_area, CAMERA_PREVIEW_WIDTH,
+ CAMERA_PREVIEW_HEIGHT);
gtk_widget_add_events(drawing_area, GDK_BUTTON_PRESS_MASK);
g_signal_connect(G_OBJECT(drawing_area), "draw",G_CALLBACK(gui_draw_overlay_cb), data);
g_signal_connect(G_OBJECT(drawing_area), "button-press-event",G_CALLBACK(gui_press_event_cb), data);
@@ -1696,7 +1701,7 @@ static void gui_create_overlay(CustomData *data)
gtk_widget_set_name(drawing_box, "gui_overlay_draw");
if (data->preview_enabled){
/* Camera preview use case */
- gtk_box_pack_start(GTK_BOX(drawing_box), drawing_area, TRUE, TRUE, 0);
+ gtk_box_set_center_widget(GTK_BOX(drawing_box), drawing_area);
} else {
/* Still picture use case */
gtk_box_pack_start(GTK_BOX(drawing_box), still_pict_draw, TRUE, TRUE, 0);
@@ -1828,6 +1834,8 @@ static void gui_create_main(CustomData *data)
g_assert (!g_strcmp0 (G_OBJECT_TYPE_NAME (sink), "GstGtkWaylandSink"));
g_object_get (sink, "widget", &data->video, NULL);
gtk_widget_set_app_paintable(GTK_WIDGET(data->video), TRUE);
+ gtk_widget_set_size_request(GTK_WIDGET(data->video),
+ CAMERA_PREVIEW_WIDTH, CAMERA_PREVIEW_HEIGHT);
data->info_inf_time_main = gtk_label_new(NULL);
gtk_label_set_justify(GTK_LABEL(data->info_inf_time_main),GTK_JUSTIFY_CENTER);
@@ -1871,7 +1879,7 @@ static void gui_create_main(CustomData *data)
gtk_widget_set_name(video_box, "gui_main_video");
if (data->preview_enabled){
/* Camera preview use case */
- gtk_box_pack_start(GTK_BOX(video_box), GTK_WIDGET(data->video), TRUE, TRUE, 0);
+ gtk_box_set_center_widget(GTK_BOX(video_box), GTK_WIDGET(data->video));
} else {
/* Still picture use case */
gtk_box_pack_start(GTK_BOX(video_box), still_pict_draw, TRUE, TRUE, 0);
@@ -2424,9 +2435,9 @@ static int gst_pipeline_camera_creation(CustomData *data)
"format", G_TYPE_STRING,
"RGB16",
"width", G_TYPE_INT,
- data->frame_width,
+ CAMERA_PREVIEW_WIDTH,
"height", G_TYPE_INT,
- data->frame_height,
+ CAMERA_PREVIEW_HEIGHT,
NULL);
GstCaps *caps_fr = gst_caps_new_simple("video/x-raw",

View File

@ -5,6 +5,8 @@ FILESEXTRAPATHS:prepend := "${THISDIR}/../common:${THISDIR}/files:"
SRC_URI += " \
file://scripts/launch_npu_demo.sh \
file://patches/0001-face-recognition-remove-weston-user-check-from-launc.patch \
file://patches/0002-face-recognition-add-V4L2SRC-camera-support.patch \
file://patches/0003-face-recognition-set-camera-preview-to-640x480.patch \
"
do_install:append () {