From 2845b72ce981dc5c2be28b413d0ca83b5ea87070 Mon Sep 17 00:00:00 2001 From: Isaac Hermida Date: Thu, 14 May 2026 12:27:44 +0200 Subject: [PATCH] x-linux-ai: recipes-samples: improvements to face-recognition Add full support to face-recognition demo so it can be integrated in the webkit demo. Improvements are: * support for USB camera * use 640x480 resolution so it fits in our COG demo https://onedigi.atlassian.net/browse/DEL-10133 Signed-off-by: Isaac Hermida --- ...cognition-add-V4L2SRC-camera-support.patch | 228 ++++++++++++++++++ ...nition-set-camera-preview-to-640x480.patch | 206 ++++++++++++++++ ...stai-mpu-face-recognition-cpp_6.%.bbappend | 2 + 3 files changed, 436 insertions(+) create mode 100644 meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0002-face-recognition-add-V4L2SRC-camera-support.patch create mode 100644 meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0003-face-recognition-set-camera-preview-to-640x480.patch diff --git a/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0002-face-recognition-add-V4L2SRC-camera-support.patch b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0002-face-recognition-add-V4L2SRC-camera-support.patch new file mode 100644 index 000000000..da6f5963f --- /dev/null +++ b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0002-face-recognition-add-V4L2SRC-camera-support.patch @@ -0,0 +1,228 @@ +From: Isaac Hermida +Date: Thu, 14 May 2026 12:00:00 +0200 +Subject: [PATCH] face-recognition: add V4L2SRC camera support + +Add a V4L2SRC capture pipeline so USB cameras can be used on MP2x +platforms when config_board_npu.sh selects CAMERA_SRC=V4L2SRC. + +Signed-off-by: Isaac Hermida +--- + stai-mpu/stai_mpu_face_recognition.cc | 184 +++++++++++++++++++++++++- + 1 file changed, 181 insertions(+), 3 deletions(-) + +diff --git a/stai-mpu/stai_mpu_face_recognition.cc b/stai-mpu/stai_mpu_face_recognition.cc +index 945ace0..756139a 100644 +--- a/stai-mpu/stai_mpu_face_recognition.cc ++++ b/stai-mpu/stai_mpu_face_recognition.cc +@@ -2110,8 +2110,11 @@ static GstFlowReturn gst_new_sample_fr_cb(GstElement *sink, CustomData *data) + } + #endif + +- int width = data->window_width; +- int height = data->window_height; ++ GstCaps* caps = gst_sample_get_caps(sample); ++ GstStructure* structure = gst_caps_get_structure(caps, 0); ++ int width, height; ++ gst_structure_get_int(structure, "width", &width); ++ gst_structure_get_int(structure, "height", &height); + int channels = 3; + cv::Mat frame(height, width, CV_8UC3, info.data); + cv::Mat cropped_frame; +@@ -2380,6 +2383,171 @@ static int gst_dual_pipeline_camera_creation(CustomData *data) + } + + /** ++ * Construct the Gstreamer pipeline used to stream USB camera frames and run ++ * NN model inference. ++ */ ++static int gst_pipeline_camera_creation(CustomData *data) ++{ ++ GstElement *pipeline, *v4l2src, *tee, *queue0, *queue1, *queue2; ++ GstElement *framerate, *scale0, *scale1, *convert0, *convert1; ++ GstElement *convert2, *dispsink, *appsink1, *appsink2, *fpsmeasure1; ++ GstBus *bus; ++ ++ /* Create the pipeline */ ++ pipeline = gst_pipeline_new("Face recognition live USB camera"); ++ data->pipeline = pipeline; ++ ++ /* Create gstreamer elements */ ++ v4l2src = gst_element_factory_make("v4l2src", "source"); ++ tee = gst_element_factory_make("tee", "frame-tee"); ++ queue0 = gst_element_factory_make("queue", "queue0"); ++ queue1 = gst_element_factory_make("queue", "queue1"); ++ queue2 = gst_element_factory_make("queue", "queue2"); ++ convert0 = gst_element_factory_make("videoconvert", "convert0"); ++ convert1 = gst_element_factory_make("videoconvert", "convert1"); ++ convert2 = gst_element_factory_make("videoconvert", "convert2"); ++ scale0 = gst_element_factory_make("videoscale", "videoscale0"); ++ scale1 = gst_element_factory_make("videoscale", "videoscale1"); ++ dispsink = gst_element_factory_make_full("gtkwaylandsink", ++ "name", "gtkwsink", ++ "drm-device", NULL, NULL); ++ appsink1 = gst_element_factory_make("appsink", "app-sink"); ++ appsink2 = gst_element_factory_make("appsink", "app-sink2"); ++ framerate = gst_element_factory_make("videorate", "video-rate"); ++ fpsmeasure1 = gst_element_factory_make("fpsdisplaysink", "fps-measure1"); ++ ++ GstCaps *caps_src = gst_caps_new_simple("video/x-raw", ++ "width", G_TYPE_INT, ++ data->frame_width, ++ "height", G_TYPE_INT, ++ data->frame_height, ++ "framerate", GST_TYPE_FRACTION, ++ std::stoi(camera_fps_str), 1, ++ NULL); ++ ++ GstCaps *caps_preview = gst_caps_new_simple("video/x-raw", ++ "format", G_TYPE_STRING, ++ "RGB16", ++ "width", G_TYPE_INT, ++ data->frame_width, ++ "height", G_TYPE_INT, ++ data->frame_height, ++ NULL); ++ ++ GstCaps *caps_fr = gst_caps_new_simple("video/x-raw", ++ "format", G_TYPE_STRING, "RGB", ++ "width", G_TYPE_INT, ++ data->frame_width, ++ "height", G_TYPE_INT, ++ data->frame_height, ++ NULL); ++ ++ GstCaps *caps_nn = gst_caps_new_simple("video/x-raw", ++ "format", G_TYPE_STRING, "RGB", ++ "width", G_TYPE_INT, ++ data->nn_input_width, ++ "height", G_TYPE_INT, ++ data->nn_input_height, ++ NULL); ++ ++ if (!pipeline || !v4l2src || !tee || !queue0 || !queue1 || !queue2 || ++ !convert0 || !convert1 || !convert2 || !scale0 || !scale1 || ++ !dispsink || !appsink1 || !appsink2 || !framerate || ++ !fpsmeasure1) { ++ g_printerr("Not all elements could be created. Exiting.\n"); ++ return -1; ++ } ++ ++ std::string video_device = "/dev/" + data->camera_info.video_device; ++ g_object_set(G_OBJECT(v4l2src), "device", video_device.c_str(), NULL); ++ ++ /* Configure the queue elements */ ++ g_object_set(G_OBJECT(queue0), "max-size-buffers", 1, ++ "leaky", 2 /* downstream */, NULL); ++ g_object_set(G_OBJECT(queue1), "max-size-buffers", 1, ++ "leaky", 2 /* downstream */, NULL); ++ g_object_set(G_OBJECT(queue2), "max-size-buffers", 1, ++ "leaky", 2 /* downstream */, NULL); ++ ++ /* Configure fspdisplaysink */ ++ g_object_set(fpsmeasure1, "signal-fps-measurements", TRUE, ++ "fps-update-interval", 2000, "text-overlay", FALSE, ++ "video-sink", dispsink, NULL); ++ g_signal_connect(fpsmeasure1, "fps-measurements", ++ G_CALLBACK(gst_fps_measure_display_cb), NULL); ++ ++ /* Configure the appsinks */ ++ g_object_set(appsink1, "emit-signals", TRUE, "sync", FALSE, ++ "max-buffers", 1, "drop", TRUE, NULL); ++ g_signal_connect(appsink1, "new-sample", ++ G_CALLBACK(gst_new_sample_fr_cb), data); ++ ++ g_object_set(appsink2, "emit-signals", TRUE, "sync", FALSE, ++ "max-buffers", 1, "drop", TRUE, NULL); ++ g_signal_connect(appsink2, "new-sample", ++ G_CALLBACK(gst_new_sample_cb), data); ++ ++ /* Build the pipeline */ ++ gst_bin_add_many(GST_BIN(pipeline), v4l2src, framerate, tee, queue0, ++ queue1, queue2, convert0, convert1, convert2, scale0, ++ scale1, dispsink, fpsmeasure1, appsink1, appsink2, ++ NULL); ++ ++ if (!gst_element_link_filtered(framerate, tee, caps_src)) { ++ g_error("Failed to link elements (0)"); ++ return -2; ++ } ++ if (!gst_element_link_many(v4l2src, framerate, NULL)) { ++ g_error("Failed to link elements (1)"); ++ return -2; ++ } ++ if (!gst_element_link_many(tee, queue1, convert1, NULL)) { ++ g_error("Failed to link elements (2)"); ++ return -2; ++ } ++ if (!gst_element_link_filtered(convert1, fpsmeasure1, caps_preview)) { ++ g_error("Failed to link elements (3)"); ++ return -2; ++ } ++ if (!gst_element_link_many(tee, queue0, convert0, scale0, NULL)) { ++ g_error("Failed to link elements (4)"); ++ return -2; ++ } ++ if (!gst_element_link_filtered(scale0, appsink1, caps_fr)) { ++ g_error("Failed to link elements (5)"); ++ return -2; ++ } ++ if (!gst_element_link_many(tee, queue2, convert2, scale1, NULL)) { ++ g_error("Failed to link elements (6)"); ++ return -2; ++ } ++ if (!gst_element_link_filtered(scale1, appsink2, caps_nn)) { ++ g_error("Failed to link elements (7)"); ++ return -2; ++ } ++ ++ gst_caps_unref(caps_src); ++ gst_caps_unref(caps_preview); ++ gst_caps_unref(caps_fr); ++ gst_caps_unref(caps_nn); ++ ++ /* Instruct the bus to emit signals for each received message, and ++ * connect to the interesting signals */ ++ bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); ++ gst_bus_add_signal_watch(bus); ++ g_signal_connect(G_OBJECT(bus), "message::error", ++ (GCallback)gst_bus_error_cb, data); ++ g_signal_connect(G_OBJECT(bus), "message::eos", ++ (GCallback)gst_bus_error_cb, data); ++ g_signal_connect(G_OBJECT(bus), "message::application", ++ (GCallback)gst_application_cb, data); ++ g_signal_connect(G_OBJECT(bus), "message::state-changed", ++ (GCallback)gst_state_changed_cb, data); ++ gst_object_unref(bus); ++ return 0; ++} ++ ++/** + * This function display the help when -h or --help is passed as parameter. + */ + static void print_help(int argc, char** argv) +@@ -2643,7 +2811,10 @@ int main(int argc, char *argv[]) + g_print("no camera connected \n"); + exit(1); + } +- // data.camera_info = setup_camera(nn_input_width,nn_input_height); ++ if(camera_src_str == "V4L2SRC"){ ++ data.camera_info = setup_camera(nn_input_width, ++ nn_input_height); ++ } + } else { + data.preview_enabled = false; + /* Check if directory is empty */ +@@ -2673,8 +2844,12 @@ int main(int argc, char *argv[]) + ret = gst_dual_pipeline_camera_creation(&data); + if(ret) + exit(1); ++ } else if(camera_src_str == "V4L2SRC"){ ++ ret = gst_pipeline_camera_creation(&data); ++ if(ret) ++ exit(1); + } else { +- g_print("Camera source used not supported use LIBCAMERA \n"); ++ g_print("Camera source used not supported use LIBCAMERA or V4L2SRC \n"); + } + + } diff --git a/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0003-face-recognition-set-camera-preview-to-640x480.patch b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0003-face-recognition-set-camera-preview-to-640x480.patch new file mode 100644 index 000000000..35a011b68 --- /dev/null +++ b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/files/patches/0003-face-recognition-set-camera-preview-to-640x480.patch @@ -0,0 +1,206 @@ +From: Isaac Hermida +Date: Thu, 14 May 2026 17:00:00 +0200 +Subject: [PATCH] face-recognition: set camera preview to 640x480 + +The face recognition sample lets the camera preview expand with the +display resolution. This makes the camera demo larger than the other +NPU demos, which keep their camera preview at 640x480. + +Set the video widget and its overlay drawing area to 640x480, and keep +them centered in the existing maximized windows. + +Signed-off-by: Isaac Hermida +--- + stai-mpu/stai_mpu_face_recognition.cc | 70 ++++++++++++++++++++++++---------- + 1 file changed, 51 insertions(+), 19 deletions(-) + +diff --git a/stai-mpu/stai_mpu_face_recognition.cc b/stai-mpu/stai_mpu_face_recognition.cc +index 756139a..e9a8360 100644 +--- a/stai-mpu/stai_mpu_face_recognition.cc ++++ b/stai-mpu/stai_mpu_face_recognition.cc +@@ -48,6 +48,9 @@ + #define FACE_IDENTITY_CLASSES 512 + #define MAX_HISTORY_THUMBNAILS 11 /* for 720p display */ + ++#define CAMERA_PREVIEW_WIDTH 640 ++#define CAMERA_PREVIEW_HEIGHT 480 ++ + #include "stai_mpu_wrapper.hpp" + #include "blazeface_pp.hpp" + #include "facenet_pp.hpp" +@@ -1126,6 +1129,17 @@ static gboolean gui_press_event_cb(GtkWidget *widget, + CustomData *data) + { + if (event->button == GDK_BUTTON_PRIMARY) { ++ /* ++ * In camera preview mode, click events are relative to the ++ * overlay drawing widget. In still image mode, drawing is ++ * translated by data->offset, so keep the legacy adjustment. ++ */ ++ int click_x = event->x; ++ if (!data->preview_enabled) ++ click_x -= data->offset; ++ ++ int click_y = event->y; ++ + /* event occurs on one of the thumbnail face */ + unsigned int nb_registered_faces = data->registered_faces.size(); + if (nb_registered_faces != 0) { +@@ -1134,10 +1149,10 @@ static gboolean gui_press_event_cb(GtkWidget *widget, + data->nb_history_thumbnails); + std::vector thumb_pos = data->history_thumb_position[nb_thumbnails - 1]; + for (unsigned int i = 0 ; i < thumb_pos.size() ; i++) { +- if ((event->x - data->offset > thumb_pos[i].x) && +- (event->x - data->offset< thumb_pos[i].x + thumb_pos[i].width) && +- (event->y > thumb_pos[i].y) && +- (event->y < thumb_pos[i].y + thumb_pos[i].height)) { ++ if ((click_x > thumb_pos[i].x) && ++ (click_x < thumb_pos[i].x + thumb_pos[i].width) && ++ (click_y > thumb_pos[i].y) && ++ (click_y < thumb_pos[i].y + thumb_pos[i].height)) { + gui_delete_registered_face(i, data); + if (!data->preview_enabled) { + data->new_inference = true; +@@ -1151,10 +1166,10 @@ static gboolean gui_press_event_cb(GtkWidget *widget, + * not been registered */ + for (unsigned int i = 0 ; i < data->screen_face_position.size() ; i++) { + if (!data->screen_face_position[i].registered && +- (event->x - data->offset > data->screen_face_position[i].pos.x) && +- (event->x - data->offset < data->screen_face_position[i].pos.x + data->screen_face_position[i].pos.width) && +- (event->y > data->screen_face_position[i].pos.y) && +- (event->y < data->screen_face_position[i].pos.y + data->screen_face_position[i].pos.height)) { ++ (click_x > data->screen_face_position[i].pos.x) && ++ (click_x < data->screen_face_position[i].pos.x + data->screen_face_position[i].pos.width) && ++ (click_y > data->screen_face_position[i].pos.y) && ++ (click_y < data->screen_face_position[i].pos.y + data->screen_face_position[i].pos.height)) { + gui_register_new_face(i, data); + goto end; + } +@@ -1194,18 +1209,17 @@ static void gui_compute_history_thumbnail_position(CustomData *data) + { + if(data->preview_enabled){ +- float ratio = (float)data->frame_width/(float)data->frame_height; +- float width_preview = (ratio*(float)(data->widget_draw_height)); +- data->face_banner.width = width_preview; ++ data->face_banner.width = data->widget_draw_ov_width; + data->face_banner.height = data->ui_face_thumb_size + data->ui_face_thumb_spacing ; + data->face_banner.x = 0; +- data->face_banner.y = data->widget_draw_height - data->face_banner.height; ++ data->face_banner.y = data->widget_draw_ov_height - ++ data->face_banner.height; + } else { + data->face_banner.width = data->frame_disp_pos.width; + data->face_banner.height = data->ui_face_thumb_size + data->ui_face_thumb_spacing; + data->face_banner.x = 0; + data->face_banner.y = data->frame_disp_pos.height - data->face_banner.height; + } +- data->nb_history_thumbnails = (data->widget_draw_width - data->ui_face_thumb_spacing) ++ data->nb_history_thumbnails = (data->face_banner.width - data->ui_face_thumb_spacing) + / (data->ui_face_thumb_size + data->ui_face_thumb_spacing); + + /* Clip to the MAX_HISTORY_THUMBNAILS value if needed */ +@@ -1254,20 +1265,15 @@ static void gui_draw_face_positions(cairo_t *cr, + new_screen_face_position.registered = false; + + if (data->preview_enabled){ +- /* Get drawing area informations */ +- float ratio = (float)data->frame_width/(float)data->frame_height; +- float width_preview = (ratio*(float)(data->widget_draw_height)); + /* compute rectangle position and dimensions */ +- /* Offset to draw on the preview */ +- data->offset = ((data->widget_draw_ov_width - (int)width_preview)/2); +- new_screen_face_position.pos.x = width_preview * +- data->detected_faces[i].bbox.top_left.x + data->offset; +- new_screen_face_position.pos.y = data->frame_disp_pos.height * ++ new_screen_face_position.pos.x = data->widget_draw_ov_width * ++ data->detected_faces[i].bbox.top_left.x; ++ new_screen_face_position.pos.y = data->widget_draw_ov_height * + data->detected_faces[i].bbox.top_left.y; +- new_screen_face_position.pos.width = width_preview * ++ new_screen_face_position.pos.width = data->widget_draw_ov_width * + (data->detected_faces[i].bbox.bot_right.x - + data->detected_faces[i].bbox.top_left.x); +- new_screen_face_position.pos.height = data->frame_disp_pos.height * ++ new_screen_face_position.pos.height = data->widget_draw_ov_height * + (data->detected_faces[i].bbox.bot_right.y - + data->detected_faces[i].bbox.top_left.y); + } else { +@@ -1380,7 +1393,7 @@ static void gui_set_ui_parameters(CustomData *data) + data->ui_face_thumb_spacing = 20; + data->ui_box_line_width = 2.0; + data->ui_thumb_box_line_width = 3.0; +- data->keyboard_config = "window_1080p"; ++ data->keyboard_config = "window_720p"; + } else { + /* Default UI parameter */ + g_print("Display config fallback \n"); + +@@ -1492,5 +1505,5 @@ static gboolean gui_draw_overlay_cb(GtkWidget *widget, + 0); + } else { +- data->offset = ((data->widget_draw_ov_width - (int)width_preview)/2); ++ data->offset = 0; + } + +@@ -1504,11 +1511,8 @@ static gboolean gui_draw_overlay_cb(GtkWidget *widget, + + /* Draw a black transparent banner to display the registered faces */ + cairo_set_source_rgba(cr, 0.0, 0.0, 0.0, 0.60); +- if (!data->preview_enabled){ +- cairo_rectangle(cr,data->face_banner.x, data->face_banner.y,data->face_banner.width, data->face_banner.height); +- } else { +- cairo_rectangle(cr,data->face_banner.x + data->offset, data->face_banner.y,data->face_banner.width, data->face_banner.height); +- } ++ cairo_rectangle(cr, data->face_banner.x, data->face_banner.y, ++ data->widget_draw_ov_width, data->face_banner.height); + cairo_fill_preserve(cr); + cairo_stroke(cr); + +@@ -1644,6 +1652,8 @@ static void gui_create_overlay(CustomData *data) + /* Create the drawing area to draw text on it using cairo */ + drawing_area = gtk_drawing_area_new(); + gtk_widget_set_app_paintable(drawing_area, TRUE); ++ gtk_widget_set_size_request(drawing_area, CAMERA_PREVIEW_WIDTH, ++ CAMERA_PREVIEW_HEIGHT); + gtk_widget_add_events(drawing_area, GDK_BUTTON_PRESS_MASK); + g_signal_connect(G_OBJECT(drawing_area), "draw",G_CALLBACK(gui_draw_overlay_cb), data); + g_signal_connect(G_OBJECT(drawing_area), "button-press-event",G_CALLBACK(gui_press_event_cb), data); +@@ -1696,7 +1701,7 @@ static void gui_create_overlay(CustomData *data) + gtk_widget_set_name(drawing_box, "gui_overlay_draw"); + if (data->preview_enabled){ + /* Camera preview use case */ +- gtk_box_pack_start(GTK_BOX(drawing_box), drawing_area, TRUE, TRUE, 0); ++ gtk_box_set_center_widget(GTK_BOX(drawing_box), drawing_area); + } else { + /* Still picture use case */ + gtk_box_pack_start(GTK_BOX(drawing_box), still_pict_draw, TRUE, TRUE, 0); +@@ -1828,6 +1834,8 @@ static void gui_create_main(CustomData *data) + g_assert (!g_strcmp0 (G_OBJECT_TYPE_NAME (sink), "GstGtkWaylandSink")); + g_object_get (sink, "widget", &data->video, NULL); + gtk_widget_set_app_paintable(GTK_WIDGET(data->video), TRUE); ++ gtk_widget_set_size_request(GTK_WIDGET(data->video), ++ CAMERA_PREVIEW_WIDTH, CAMERA_PREVIEW_HEIGHT); + + data->info_inf_time_main = gtk_label_new(NULL); + gtk_label_set_justify(GTK_LABEL(data->info_inf_time_main),GTK_JUSTIFY_CENTER); +@@ -1871,7 +1879,7 @@ static void gui_create_main(CustomData *data) + gtk_widget_set_name(video_box, "gui_main_video"); + if (data->preview_enabled){ + /* Camera preview use case */ +- gtk_box_pack_start(GTK_BOX(video_box), GTK_WIDGET(data->video), TRUE, TRUE, 0); ++ gtk_box_set_center_widget(GTK_BOX(video_box), GTK_WIDGET(data->video)); + } else { + /* Still picture use case */ + gtk_box_pack_start(GTK_BOX(video_box), still_pict_draw, TRUE, TRUE, 0); +@@ -2424,9 +2435,9 @@ static int gst_pipeline_camera_creation(CustomData *data) + "format", G_TYPE_STRING, + "RGB16", + "width", G_TYPE_INT, +- data->frame_width, ++ CAMERA_PREVIEW_WIDTH, + "height", G_TYPE_INT, +- data->frame_height, ++ CAMERA_PREVIEW_HEIGHT, + NULL); + + GstCaps *caps_fr = gst_caps_new_simple("video/x-raw", diff --git a/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/stai-mpu-face-recognition-cpp_6.%.bbappend b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/stai-mpu-face-recognition-cpp_6.%.bbappend index ac278d2d2..207058cce 100644 --- a/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/stai-mpu-face-recognition-cpp_6.%.bbappend +++ b/meta-digi-dey/dynamic-layers/x-linux-ai/recipes-samples/face-recognition/stai-mpu-face-recognition-cpp_6.%.bbappend @@ -5,6 +5,8 @@ FILESEXTRAPATHS:prepend := "${THISDIR}/../common:${THISDIR}/files:" SRC_URI += " \ file://scripts/launch_npu_demo.sh \ file://patches/0001-face-recognition-remove-weston-user-check-from-launc.patch \ + file://patches/0002-face-recognition-add-V4L2SRC-camera-support.patch \ + file://patches/0003-face-recognition-set-camera-preview-to-640x480.patch \ " do_install:append () {