408 lines
15 KiB
Diff
408 lines
15 KiB
Diff
From: Isaac Hermida <isaac.hermida@digi.com>
|
|
Date: Fri, 15 Sep 2023 17:02:43 +0200
|
|
Subject: [PATCH] Customize EiQ demos
|
|
|
|
Just do the changes in the scripts to:
|
|
* Always use the NPU
|
|
* Get a better performance with USB Cameras
|
|
* Add option to use a bigger camera resolution
|
|
* Option to set the window in full screen
|
|
* Fix some exceptions of the demos
|
|
|
|
Signed-off-by: Isaac Hermida <isaac.hermida@digi.com>
|
|
---
|
|
dms/main.py | 79 +++++++++++++++++++++++++++------------
|
|
face_recognition/main.py | 44 ++++++++++++++++++----
|
|
gesture_detection/main.py | 32 ++++++++++++++--
|
|
object_detection/main.py | 40 ++++++++++++++++----
|
|
4 files changed, 153 insertions(+), 42 deletions(-)
|
|
|
|
diff --git a/dms/main.py b/dms/main.py
|
|
index 6e04dc3..8ba99b2 100644
|
|
--- a/dms/main.py
|
|
+++ b/dms/main.py
|
|
@@ -14,10 +14,16 @@ from eye_landmark import EyeMesher
|
|
from face_landmark import FaceMesher
|
|
from utils import *
|
|
|
|
-MODEL_PATH = pathlib.Path("../models/")
|
|
-DETECT_MODEL = "face_detection_front_128_full_integer_quant.tflite"
|
|
-LANDMARK_MODEL = "face_landmark_192_integer_quant.tflite"
|
|
-EYE_MODEL = "iris_landmark_quant.tflite"
|
|
+WIDTH=640
|
|
+HEIGH=480
|
|
+FLIP=None # None, skip, 0: Flip vertically, 1: Flip horizontally (around the y-axis), -1: Flip both vertically and horizontally
|
|
+FORMAT=0 # None, skip (YUYV, default), 0 MJPG (for usb camera)
|
|
+
|
|
+# Always enforce the Ethos NPU, use the converted vela models
|
|
+MODEL_PATH = pathlib.Path("../vela_models/")
|
|
+DETECT_MODEL = "face_detection_front_128_full_integer_quant_vela.tflite"
|
|
+LANDMARK_MODEL = "face_landmark_192_integer_quant_vela.tflite"
|
|
+EYE_MODEL = "iris_landmark_quant_vela.tflite"
|
|
|
|
# turn on camera
|
|
parser = argparse.ArgumentParser()
|
|
@@ -29,16 +35,31 @@ parser.add_argument(
|
|
parser.add_argument(
|
|
'-d',
|
|
'--delegate',
|
|
- default='',
|
|
+ default='/usr/lib/libethosu_delegate.so',
|
|
help='delegate path')
|
|
+parser.add_argument("-f", "--fullscreen", action="store_true", help='run on full screen mode')
|
|
args = parser.parse_args()
|
|
|
|
if args.input.isdigit():
|
|
cap_input = int(args.input)
|
|
else:
|
|
cap_input = args.input
|
|
+
|
|
+# This pipeline for the OV5640 camera in case the other command fails
|
|
+# cap = cv2.VideoCapture("v4l2src device=%s ! imxvideoconvert_pxp ! video/x-raw,format=RGB16,width=%d,height=%d " \
|
|
+# "! videoconvert ! appsink" % (args.input, WIDTH, HEIGH))
|
|
+
|
|
cap = cv2.VideoCapture(cap_input)
|
|
+cap.set(cv2.CAP_PROP_FRAME_WIDTH, WIDTH)
|
|
+cap.set(cv2.CAP_PROP_FRAME_HEIGHT, HEIGH)
|
|
+
|
|
+if FORMAT == 0:
|
|
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
|
|
+ cap.set(cv2.CAP_PROP_FOURCC, fourcc)
|
|
+
|
|
ret, image = cap.read()
|
|
+if FLIP is not None:
|
|
+ image = cv2.flip(image, FLIP)
|
|
if not ret:
|
|
print("Can't read frame from source file ", args.input)
|
|
sys.exit(-1)
|
|
@@ -66,9 +87,9 @@ def draw_face_box(image, bboxes, landmarks, scores):
|
|
label_btmleft = bbox[:2].copy() + 10
|
|
label_btmleft[0] += label_width
|
|
label_btmleft[1] += label_height
|
|
- cv2.rectangle(image, tuple(bbox[:2]), tuple(label_btmleft), color=(255, 0, 0), thickness=cv2.FILLED)
|
|
- cv2.putText(image, score_label, (bbox[0] + 5, label_btmleft[1] - 5),
|
|
- cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.0, color=(255, 255, 255), thickness=2)
|
|
+ #cv2.rectangle(image, tuple(bbox[:2]), tuple(label_btmleft), color=(255, 0, 0), thickness=cv2.FILLED)
|
|
+ #cv2.putText(image, score_label, (bbox[0] + 5, label_btmleft[1] - 5),
|
|
+ #cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.0, color=(255, 255, 255), thickness=2)
|
|
return image
|
|
|
|
# detect single frame
|
|
@@ -111,8 +132,8 @@ def main(image):
|
|
right_eye_img = padded[right_box[0][1]:right_box[1][1], right_box[0][0]:right_box[1][0]]
|
|
left_eye_landmarks, left_iris_landmarks = eye_mesher.inference(left_eye_img)
|
|
right_eye_landmarks, right_iris_landmarks = eye_mesher.inference(right_eye_img)
|
|
- #cv2.rectangle(image_show, left_box[0], left_box[1], color=(255, 0, 0), thickness=2)
|
|
- #cv2.rectangle(image_show, right_box[0], right_box[1], color=(255, 0, 0), thickness=2)
|
|
+ cv2.rectangle(image_show, left_box[0], left_box[1], color=(255, 0, 0), thickness=2)
|
|
+ cv2.rectangle(image_show, right_box[0], right_box[1], color=(255, 0, 0), thickness=2)
|
|
left_eye_ratio = get_eye_ratio(left_eye_landmarks, image_show, left_box[0])
|
|
right_eye_ratio = get_eye_ratio(right_eye_landmarks, image_show, right_box[0])
|
|
|
|
@@ -155,20 +176,32 @@ def main(image):
|
|
|
|
|
|
# endless loop
|
|
+window_name = "EiQ DMS demo"
|
|
while ret:
|
|
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
- # detect single
|
|
- image_show = main(image)
|
|
-
|
|
- # put fps
|
|
- result = cv2.cvtColor(image_show, cv2.COLOR_RGB2BGR)
|
|
-
|
|
- # display the result
|
|
- cv2.imshow('demo', result)
|
|
-
|
|
- ret, image = cap.read()
|
|
- if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
- break
|
|
+ try:
|
|
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
+ # detect single
|
|
+ image_show = main(image)
|
|
+
|
|
+ # put fps
|
|
+ result = cv2.cvtColor(image_show, cv2.COLOR_RGB2BGR)
|
|
+
|
|
+ cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
|
|
+ if args.fullscreen:
|
|
+ cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
|
+
|
|
+ # display the result
|
|
+ cv2.imshow(window_name, result)
|
|
+
|
|
+ ret, image = cap.read()
|
|
+ if FLIP is not None:
|
|
+ image = cv2.flip(image, FLIP)
|
|
+ if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
+ break
|
|
+ except Exception as err:
|
|
+ # Ignore exceptions
|
|
+ time.sleep(0.2)
|
|
+ print("Exception catched:%s\n... continuing with test" % repr(err))
|
|
|
|
time.sleep(2)
|
|
cap.release()
|
|
diff --git a/face_recognition/main.py b/face_recognition/main.py
|
|
index acc838e..33ffa71 100644
|
|
--- a/face_recognition/main.py
|
|
+++ b/face_recognition/main.py
|
|
@@ -13,6 +13,11 @@ from face_detection import YoloFace
|
|
from face_recognition import Facenet
|
|
from face_database import FaceDatabase
|
|
|
|
+WIDTH=640
|
|
+HEIGH=480
|
|
+FLIP=None # None, skip, 0: Flip vertically, 1: Flip horizontally (around the y-axis), -1: Flip both vertically and horizontally
|
|
+FORMAT=0 # None, skip (YUYV, default), 0 MJPG (for usb camera)
|
|
+
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'-i',
|
|
@@ -22,12 +27,14 @@ parser.add_argument(
|
|
parser.add_argument(
|
|
'-d',
|
|
'--delegate',
|
|
- default='',
|
|
+ default='/usr/lib/libethosu_delegate.so',
|
|
help='delegate path')
|
|
+parser.add_argument("-f", "--fullscreen", action="store_true", help='run on full screen mode')
|
|
args = parser.parse_args()
|
|
|
|
-detector = YoloFace("../models/yoloface_int8.tflite", args.delegate)
|
|
-recognizer = Facenet("../models/facenet_512_int_quantized.tflite", args.delegate)
|
|
+# Always enforce the Ethos NPU, use the converted vela models
|
|
+detector = YoloFace("../vela_models/yoloface_int8_vela.tflite", args.delegate)
|
|
+recognizer = Facenet("../vela_models/facenet_512_int_quantized_vela.tflite", args.delegate)
|
|
database = FaceDatabase()
|
|
|
|
def ischar(c):
|
|
@@ -39,7 +46,7 @@ def get_inputs(img, msg):
|
|
cv2.rectangle(img, (0, 0), (img.shape[1], 40), (0, 0, 0), -1)
|
|
cv2.putText(img, msg + inputs, (30, 30),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
|
- cv2.imshow('img', img)
|
|
+ cv2.imshow(window_name, img)
|
|
key = cv2.waitKey(20) & 0xFF
|
|
if key == 13 or key == 141:
|
|
break
|
|
@@ -68,13 +75,28 @@ if args.input.isdigit():
|
|
cap_input = int(args.input)
|
|
else:
|
|
cap_input = args.input
|
|
+
|
|
+# This pipeline for the OV5640 camera in case the other command fails
|
|
+# vid = cv2.VideoCapture("v4l2src device=%s ! imxvideoconvert_pxp ! video/x-raw,format=RGB16,width=%d,height=%d " \
|
|
+# "! videoconvert ! appsink" % (args.input, WIDTH, HEIGH))
|
|
vid = cv2.VideoCapture(cap_input)
|
|
+vid.set(cv2.CAP_PROP_FRAME_WIDTH, WIDTH)
|
|
+vid.set(cv2.CAP_PROP_FRAME_HEIGHT, HEIGH)
|
|
+
|
|
+if FORMAT == 0:
|
|
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
|
|
+ vid.set(cv2.CAP_PROP_FOURCC, fourcc)
|
|
+
|
|
PADDING = 10
|
|
tips = "Press 'a' to add person, 'd' to delete person, 'p' to print database"
|
|
+
|
|
+window_name = "Face recognition Demo"
|
|
while True:
|
|
embeddings = None
|
|
|
|
ret, img = vid.read()
|
|
+ if FLIP is not None:
|
|
+ img = cv2.flip(img, FLIP)
|
|
if (ret == False):
|
|
break
|
|
boxes = detector.detect(img)
|
|
@@ -97,12 +119,20 @@ while True:
|
|
|
|
cv2.putText(img, tips, (30, 30),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
|
|
- cv2.imshow('img', img)
|
|
+
|
|
+ cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
|
|
+ if args.fullscreen:
|
|
+ cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
|
+
|
|
+ cv2.imshow(window_name, img)
|
|
key = cv2.waitKey(1) & 0xFF
|
|
if (key == ord('a')):
|
|
msg = "ADD. Please input name:"
|
|
name = get_inputs(img, msg)
|
|
- database.add_name(name, embeddings)
|
|
+ if embeddings:
|
|
+ database.add_name(name, embeddings)
|
|
+ else:
|
|
+ print ("Not a valid face, not adding user to database, ignoring...")
|
|
elif (key == ord('d')):
|
|
msg = "DEL. Please input name:"
|
|
name = get_inputs(img, msg)
|
|
@@ -110,7 +140,7 @@ while True:
|
|
elif (key == ord('p')):
|
|
names = ",".join(database.get_names())
|
|
print_longtext(img, names + " Press any key to continue.")
|
|
- cv2.imshow('img', img)
|
|
+ cv2.imshow(window_name, img)
|
|
while cv2.waitKey(100) & 0xFF == 0xFF:
|
|
pass
|
|
|
|
diff --git a/gesture_detection/main.py b/gesture_detection/main.py
|
|
index da83ce0..15b8597 100644
|
|
--- a/gesture_detection/main.py
|
|
+++ b/gesture_detection/main.py
|
|
@@ -9,8 +9,9 @@ import time
|
|
import argparse
|
|
from hand_tracker import HandTracker
|
|
|
|
-PALM_MODEL_PATH = "../models/palm_detection_builtin_256_integer_quant.tflite"
|
|
-LANDMARK_MODEL_PATH = "../models/hand_landmark_3d_256_integer_quant.tflite"
|
|
+# Always enforce the Ethos NPU, use the converted vela models
|
|
+PALM_MODEL_PATH = "../vela_models/palm_detection_builtin_256_integer_quant_vela.tflite"
|
|
+LANDMARK_MODEL_PATH = "../vela_models/hand_landmark_3d_256_integer_quant_vela.tflite"
|
|
ANCHORS_PATH = "anchors.csv"
|
|
|
|
def draw_landmarks(points, frame):
|
|
@@ -52,15 +53,33 @@ parser.add_argument(
|
|
parser.add_argument(
|
|
'-d',
|
|
'--delegate',
|
|
- default='',
|
|
+ default='/usr/lib/libethosu_delegate.so',
|
|
help='delegate path')
|
|
+parser.add_argument("-f", "--fullscreen", action="store_true", help='run on full screen mode')
|
|
args = parser.parse_args()
|
|
|
|
if args.input.isdigit():
|
|
cap_input = int(args.input)
|
|
else:
|
|
cap_input = args.input
|
|
+
|
|
+WIDTH=640
|
|
+HEIGH=480
|
|
+FLIP=None # None, skip, 0: Flip vertically, 1: Flip horizontally (around the y-axis), -1: Flip both vertically and horizontally
|
|
+FORMAT=0 # None, skip (YUYV, default), 0 MJPG (for usb camera)
|
|
+
|
|
+# This pipeline for the OV5640 camera in case the other command fails
|
|
+# capture = cv2.VideoCapture("v4l2src device=%s ! imxvideoconvert_pxp ! video/x-raw,format=RGB16,width=%d,height=%d " \
|
|
+# "! videoconvert ! appsink" % (args.input, WIDTH, HEIGH))
|
|
+
|
|
capture = cv2.VideoCapture(cap_input)
|
|
+capture.set(cv2.CAP_PROP_FRAME_WIDTH, WIDTH)
|
|
+capture.set(cv2.CAP_PROP_FRAME_HEIGHT, HEIGH)
|
|
+
|
|
+if FORMAT == 0:
|
|
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
|
|
+ capture.set(cv2.CAP_PROP_FOURCC, fourcc)
|
|
+
|
|
ret, frame = capture.read()
|
|
if (frame is None):
|
|
print("Can't read frame from source file ", args.input)
|
|
@@ -68,11 +87,16 @@ if (frame is None):
|
|
|
|
detector = HandTracker(PALM_MODEL_PATH, LANDMARK_MODEL_PATH, ANCHORS_PATH, args.delegate, box_shift=0.2, box_enlarge=1.3)
|
|
|
|
+window_name = "Hand Gesture Demo"
|
|
while ret:
|
|
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
points, _ = detector(image)
|
|
draw_landmarks(points, frame)
|
|
- cv2.imshow("hand", frame)
|
|
+
|
|
+ cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
|
|
+ if args.fullscreen:
|
|
+ cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
|
+ cv2.imshow(window_name, frame)
|
|
|
|
ret, frame = capture.read()
|
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
diff --git a/object_detection/main.py b/object_detection/main.py
|
|
index 1356111..efa614e 100644
|
|
--- a/object_detection/main.py
|
|
+++ b/object_detection/main.py
|
|
@@ -13,7 +13,13 @@ import argparse
|
|
|
|
from labels import label2string
|
|
|
|
-MODEL_PATH = "../models/ssd_mobilenet_v1_quant.tflite"
|
|
+WIDTH=640
|
|
+HEIGH=480
|
|
+FLIP=None # None, skip, 0: Flip vertically, 1: Flip horizontally (around the y-axis), -1: Flip both vertically and horizontally
|
|
+FORMAT=0 # None, skip (YUYV, default), 0 MJPG (for usb camera)
|
|
+
|
|
+# Always enforce the Ethos NPU, use the converted vela models
|
|
+MODEL_PATH = "../vela_models/ssd_mobilenet_v1_quant_vela.tflite"
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
@@ -24,21 +30,31 @@ parser.add_argument(
|
|
parser.add_argument(
|
|
'-d',
|
|
'--delegate',
|
|
- default='',
|
|
+ default='/usr/lib/libethosu_delegate.so',
|
|
help='delegate path')
|
|
+parser.add_argument("-f", "--fullscreen", action="store_true", help='run on full screen mode')
|
|
args = parser.parse_args()
|
|
|
|
if args.input.isdigit():
|
|
cap_input = int(args.input)
|
|
else:
|
|
cap_input = args.input
|
|
+
|
|
+# This pipeline for the OV5640 camera in case the other command fails
|
|
+# vid = cv2.VideoCapture("v4l2src device=%s ! imxvideoconvert_pxp ! video/x-raw,format=RGB16,width=%d,height=%d " \
|
|
+# "! videoconvert ! appsink" % (args.input, WIDTH, HEIGH))
|
|
+
|
|
vid = cv2.VideoCapture(cap_input)
|
|
+vid.set(cv2.CAP_PROP_FRAME_WIDTH, WIDTH)
|
|
+vid.set(cv2.CAP_PROP_FRAME_HEIGHT, HEIGH)
|
|
|
|
-if(args.delegate):
|
|
- ext_delegate = [tflite.load_delegate(args.delegate)]
|
|
- interpreter = tflite.Interpreter(model_path=MODEL_PATH, experimental_delegates=ext_delegate)
|
|
-else:
|
|
- interpreter = tflite.Interpreter(model_path=MODEL_PATH)
|
|
+if FORMAT == 0:
|
|
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
|
|
+ vid.set(cv2.CAP_PROP_FOURCC, fourcc)
|
|
+
|
|
+# Always enforce the Ethos NPU
|
|
+ext_delegate = [tflite.load_delegate(args.delegate)]
|
|
+interpreter = tflite.Interpreter(model_path=MODEL_PATH, experimental_delegates=ext_delegate)
|
|
interpreter.allocate_tensors()
|
|
|
|
input_details = interpreter.get_input_details()
|
|
@@ -52,10 +68,13 @@ total_fps = 0
|
|
total_time = 0
|
|
|
|
ret, frame = vid.read()
|
|
+if FLIP is not None:
|
|
+ frame = cv2.flip(frame, FLIP)
|
|
if (frame is None):
|
|
print("Can't read frame from source file ", args.input)
|
|
exit(0)
|
|
|
|
+window_name = "Object Detection Demo"
|
|
while ret:
|
|
total_fps += 1
|
|
loop_start = time.time()
|
|
@@ -94,9 +113,14 @@ while ret:
|
|
msg = "FPS:" + str(fps) + " Invoke time:" + str(invoke_time) + "ms"
|
|
cv2.putText(frame, msg, (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3)
|
|
|
|
- cv2.imshow("image", frame)
|
|
+ cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
|
|
+ if args.fullscreen:
|
|
+ cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
|
+ cv2.imshow(window_name, frame)
|
|
|
|
ret, frame = vid.read()
|
|
+ if FLIP is not None:
|
|
+ frame = cv2.flip(frame, FLIP)
|
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
break
|
|
|