From c7ebabfc115055d62b47910cb136ea2abe6b5428 Mon Sep 17 00:00:00 2001 From: Nicolas Dufresne Date: Tue, 31 Oct 2023 16:24:33 -0400 Subject: [PATCH 54/68] v4l2codecs: encoder: Implement zero-copy support In this implementation we switch encoders to always run in memory type V4L2_MEMORY_DMABUF. To alllow fallback, we allocated 4 frame using V4L2_MEMORY_MMAP at setup time and detach the dmabuf from the driver. If upstream provides dmabuf, we try importating that, and if that fails, we make a copy and continue. Upstream-Status: Pending --- sys/v4l2codecs/gstv4l2codecallocator.c | 14 +- sys/v4l2codecs/gstv4l2codech264enc.c | 35 +++-- sys/v4l2codecs/gstv4l2codecvp8enc.c | 21 ++- sys/v4l2codecs/gstv4l2encoder.c | 198 ++++++++++++++++++++++--- sys/v4l2codecs/gstv4l2encoder.h | 8 +- 5 files changed, 228 insertions(+), 48 deletions(-) diff --git a/sys/v4l2codecs/gstv4l2codecallocator.c b/sys/v4l2codecs/gstv4l2codecallocator.c index e0fb371..fd77347 100644 --- a/sys/v4l2codecs/gstv4l2codecallocator.c +++ b/sys/v4l2codecs/gstv4l2codecallocator.c @@ -245,7 +245,8 @@ gst_v4l2_codec_encoder_allocator_prepare (GstV4l2CodecAllocator * self) gint ret; guint i; - ret = gst_v4l2_encoder_request_buffers (encoder, direction, self->pool_size); + ret = gst_v4l2_encoder_request_buffers (encoder, direction, self->pool_size, + V4L2_MEMORY_MMAP); if (ret < self->pool_size) { if (ret >= 0) GST_ERROR_OBJECT (self, @@ -261,10 +262,16 @@ gst_v4l2_codec_encoder_allocator_prepare (GstV4l2CodecAllocator * self) g_queue_push_tail (&self->pool, buf); } + if (direction == GST_PAD_SINK) { + gst_v4l2_codec_allocator_detach (self); + gst_v4l2_encoder_request_buffers (encoder, direction, + VIDEO_MAX_FRAME, V4L2_MEMORY_DMABUF); + } + return TRUE; failed: - gst_v4l2_encoder_request_buffers (encoder, direction, 0); + gst_v4l2_encoder_request_buffers (encoder, direction, 0, V4L2_MEMORY_MMAP); return FALSE; } @@ -446,7 +453,8 @@ gst_v4l2_codec_allocator_detach (GstV4l2CodecAllocator * self) gst_v4l2_decoder_request_buffers (self->decoder, self->direction, 0); } if (self->encoder) { - gst_v4l2_encoder_request_buffers (self->encoder, self->direction, 0); + gst_v4l2_encoder_request_buffers (self->encoder, self->direction, 0, + V4L2_MEMORY_MMAP); } } GST_OBJECT_UNLOCK (self); diff --git a/sys/v4l2codecs/gstv4l2codech264enc.c b/sys/v4l2codecs/gstv4l2codech264enc.c index 070c51a..e7a6e07 100644 --- a/sys/v4l2codecs/gstv4l2codech264enc.c +++ b/sys/v4l2codecs/gstv4l2codech264enc.c @@ -270,8 +270,16 @@ gst_v4l2_codec_h264_enc_propose_allocation (GstVideoEncoder * encoder, GstQuery * query) { GstV4l2CodecH264Enc *self = GST_V4L2_CODEC_H264_ENC (encoder); + GstV4l2CodecPool *pool = NULL; + gboolean need_pool; - gst_query_add_allocation_pool (query, NULL, self->vinfo.size, 2, 0); + gst_query_parse_allocation (query, NULL, &need_pool); + + if (need_pool) + pool = gst_v4l2_codec_pool_new (self->sink_allocator, &self->vinfo); + + gst_query_add_allocation_pool (query, GST_BUFFER_POOL (pool), + self->vinfo.size, 2, 0); gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); return GST_VIDEO_ENCODER_CLASS (parent_class)->propose_allocation (encoder, @@ -964,12 +972,6 @@ gst_v4l2_codec_h264_enc_encode_frame (GstH264Encoder * encoder, goto done; } - if (!gst_v4l2_codec_h264_enc_copy_input_buffer (self, frame)) { - GST_ELEMENT_ERROR (self, RESOURCE, NO_SPACE_LEFT, - ("Failed to allocate/copy input buffer."), (NULL)); - goto done; - } - request = gst_v4l2_encoder_alloc_request (self->encoder, frame->system_frame_number, frame->input_buffer, frame->output_buffer); @@ -991,13 +993,22 @@ gst_v4l2_codec_h264_enc_encode_frame (GstH264Encoder * encoder, } if (!gst_v4l2_encoder_request_queue (request, 0)) { - GST_ELEMENT_ERROR (self, RESOURCE, WRITE, - ("Driver did not accept the encode request."), (NULL)); - goto done; + if (!gst_v4l2_codec_h264_enc_copy_input_buffer (self, frame)) { + GST_ELEMENT_ERROR (self, RESOURCE, NO_SPACE_LEFT, + ("Failed to allocate/copy input buffer."), (NULL)); + goto done; + } + + gst_v4l2_encoder_request_replace_pic_buf (request, frame->input_buffer); + + if (!gst_v4l2_encoder_request_queue (request, 0)) { + GST_ELEMENT_ERROR (self, RESOURCE, WRITE, + ("Driver did not accept the encode request."), (NULL)); + goto done; + } } - if (!gst_v4l2_encoder_request_set_done(request, &bytesused, &flags)) - { + if (!gst_v4l2_encoder_request_set_done(request, &bytesused, &flags)) { GST_ELEMENT_ERROR (self, RESOURCE, WRITE, ("Driver did not ack the request."), (NULL)); goto done; diff --git a/sys/v4l2codecs/gstv4l2codecvp8enc.c b/sys/v4l2codecs/gstv4l2codecvp8enc.c index 24e8540..5990c73 100644 --- a/sys/v4l2codecs/gstv4l2codecvp8enc.c +++ b/sys/v4l2codecs/gstv4l2codecvp8enc.c @@ -580,11 +580,6 @@ gst_v4l2_codec_vp8_enc_encode_frame (GstVp8Encoder * encoder, goto done; } - if (!gst_v4l2_codec_vp8_enc_copy_input_buffer (self, frame)) { - GST_ELEMENT_ERROR (self, RESOURCE, NO_SPACE_LEFT, - ("Failed to allocate/copy input buffer."), (NULL)); - goto done; - } request = gst_v4l2_encoder_alloc_request (self->encoder, frame->system_frame_number, frame->input_buffer, frame->output_buffer); @@ -605,9 +600,19 @@ gst_v4l2_codec_vp8_enc_encode_frame (GstVp8Encoder * encoder, } if (!gst_v4l2_encoder_request_queue (request, 0)) { - GST_ELEMENT_ERROR (self, RESOURCE, WRITE, - ("Driver did not accept the encode request."), (NULL)); - goto done; + if (!gst_v4l2_codec_vp8_enc_copy_input_buffer (self, frame)) { + GST_ELEMENT_ERROR (self, RESOURCE, NO_SPACE_LEFT, + ("Failed to allocate/copy input buffer."), (NULL)); + goto done; + } + + gst_v4l2_encoder_request_replace_pic_buf (request, frame->input_buffer); + + if (!gst_v4l2_encoder_request_queue (request, 0)) { + GST_ELEMENT_ERROR (self, RESOURCE, WRITE, + ("Driver did not accept the encode request."), (NULL)); + goto done; + } } if (!gst_v4l2_encoder_request_set_done (request, &bytesused, &flags)) { diff --git a/sys/v4l2codecs/gstv4l2encoder.c b/sys/v4l2codecs/gstv4l2encoder.c index a9e0939..85f0457 100644 --- a/sys/v4l2codecs/gstv4l2encoder.c +++ b/sys/v4l2codecs/gstv4l2encoder.c @@ -66,6 +66,12 @@ struct _GstV4l2Request gboolean sub_request; }; +struct BufState +{ + gboolean queued; + gint dmabuf_fd; +}; + struct _GstV4l2Encoder { GstObject parent; @@ -82,6 +88,9 @@ struct _GstV4l2Encoder enum v4l2_buf_type src_buf_type; gboolean mplane; + /* Rember which FD was used with which index */ + struct BufState buffer_state[VIDEO_MAX_FRAME]; + /* properties */ gchar *media_device; gchar *video_device; @@ -106,6 +115,67 @@ direction_to_buffer_type (GstV4l2Encoder * self, GstPadDirection direction) return self->sink_buf_type; } +static void +buffer_state_init (GstV4l2Encoder * self) +{ + gint i; + + for (i = 0; i < VIDEO_MAX_FRAME; i++) + self->buffer_state[i].dmabuf_fd = -1; +} + +static void +buffer_state_streamoff (GstV4l2Encoder * self) +{ + gint i; + + for (i = 0; i < VIDEO_MAX_FRAME; i++) + self->buffer_state[i].queued = FALSE; +} + +static gint +buffer_state_find_buffer (GstV4l2Encoder * self, gint dmabuf_fd) +{ + gint outstanding_index = -1; + gint i; + + for (i = 0; i < VIDEO_MAX_FRAME; i++) { + if (self->buffer_state[i].queued) + continue; + + if (outstanding_index == -1) + outstanding_index = i; + + if (self->buffer_state[i].dmabuf_fd == dmabuf_fd) + break; + } + + if (i == VIDEO_MAX_FRAME) { + if (outstanding_index == -1) { + return -1; + } + + i = outstanding_index; + } + + return i; +} + +static void +buffer_state_queue (GstV4l2Encoder * self, gint index, gint dmabuf_fd) +{ + g_assert (!self->buffer_state[index].queued); + + self->buffer_state[index].queued = TRUE; + self->buffer_state[index].dmabuf_fd = dmabuf_fd; +} + +static void +buffer_state_dequeue (GstV4l2Encoder * self, gint index) +{ + self->buffer_state[index].queued = FALSE; +} + static void gst_v4l2_encoder_finalize (GObject * obj) { @@ -126,6 +196,8 @@ gst_v4l2_encoder_init (GstV4l2Encoder * self) { self->request_pool = gst_queue_array_new (16); self->pending_requests = gst_queue_array_new (16); + + buffer_state_init (self); } static void @@ -268,6 +340,8 @@ gst_v4l2_encoder_streamoff (GstV4l2Encoder * self, GstPadDirection direction) pending_req->pending = FALSE; gst_v4l2_encoder_request_unref (pending_req); } + } else { + buffer_state_streamoff (self); } ret = ioctl (self->video_fd, VIDIOC_STREAMOFF, &type); @@ -619,8 +693,8 @@ gst_v4l2_encoder_set_src_fmt (GstV4l2Encoder * self, GstVideoInfo * info, return FALSE; } - if (!gst_v4l2_format_equivalent (pix_fmt, fmt.fmt.pix_mp.pixelformat) - || fmt.fmt.pix_mp.width != width || fmt.fmt.pix_mp.height != height) { + if (pix_fmt != fmt.fmt.pix_mp.pixelformat || fmt.fmt.pix_mp.width != width + || fmt.fmt.pix_mp.height != height) { GST_DEBUG_OBJECT (self, "Trying to use peer format: %" GST_FOURCC_FORMAT " %ix%i", GST_FOURCC_ARGS (pix_fmt), width, height); @@ -628,6 +702,7 @@ gst_v4l2_encoder_set_src_fmt (GstV4l2Encoder * self, GstVideoInfo * info, fmt.fmt.pix_mp.pixelformat = pix_fmt; fmt.fmt.pix_mp.width = width; fmt.fmt.pix_mp.height = height; + fmt.fmt.pix_mp.plane_fmt[0].sizeimage = 4 * 1024 * 1024;//FIXME ret = ioctl (self->video_fd, VIDIOC_S_FMT, &fmt); if (ret < 0) { @@ -648,12 +723,12 @@ gst_v4l2_encoder_set_src_fmt (GstV4l2Encoder * self, GstVideoInfo * info, gint gst_v4l2_encoder_request_buffers (GstV4l2Encoder * self, - GstPadDirection direction, guint num_buffers) + GstPadDirection direction, guint num_buffers, guint mem_type) { gint ret; struct v4l2_requestbuffers reqbufs = { .count = num_buffers, - .memory = V4L2_MEMORY_MMAP, + .memory = mem_type, .type = direction_to_buffer_type (self, direction), }; @@ -779,38 +854,104 @@ gst_v4l2_encoder_queue_src_buffer (GstV4l2Encoder * self, return TRUE; } +static gboolean +gst_v4l2_encoder_import_buffer (GstV4l2Encoder * self, GstBuffer * buffer, + struct v4l2_buffer *v4l2_buffer) +{ + GstVideoMeta *vmeta = NULL; + gint i; + + if (!self->mplane) { + GST_INFO_OBJECT (self, + "zero copy is only support on MPLANE drivers implementation."); + return FALSE; + } + + vmeta = gst_buffer_get_video_meta (buffer); + if (!vmeta) { + GST_INFO_OBJECT (self, "a GstVideoMeta is required for zero copy."); + return FALSE; + } + + if (self->sink_fmt.fmt.pix_mp.num_planes != vmeta->n_planes) { + GST_INFO_OBJECT (self, + "planar format into single plane is not support in zero copy."); + return FALSE; + } + + for (i = 0; i < vmeta->n_planes; i++) { + struct v4l2_plane *planes = v4l2_buffer->m.planes; + struct v4l2_plane_pix_format *plane_fmt = + self->sink_fmt.fmt.pix_mp.plane_fmt; + gsize size, offset, maxsize; + guint length, mem_idx; + gsize mem_skip; + GstMemory *mem; + + if (vmeta->stride[i] != plane_fmt[i].bytesperline) { + GST_INFO_OBJECT (self, + "Stride miss-match at plane %i: got %i but expect %u", + i, vmeta->stride[i], plane_fmt[i].bytesperline); + return FALSE; + } + + if (!gst_buffer_find_memory (buffer, vmeta->offset[i], 1, &mem_idx, + &length, &mem_skip)) { + GST_INFO_OBJECT (self, "no memory found for plane %i at offset %" + G_GSIZE_FORMAT ".", i, vmeta->offset[i]); + return FALSE; + } + + mem = gst_buffer_peek_memory (buffer, mem_idx); + if (!gst_is_dmabuf_memory (mem)) { + GST_INFO_OBJECT (self, "cannot import non-dmabuf memory"); + return FALSE; + } + + size = gst_memory_get_sizes (mem, &offset, &maxsize); + + /* *INDENT-OFF* */ + planes[i] = (struct v4l2_plane) { + .bytesused = offset + size, + .length = offset + maxsize, + .data_offset = offset + mem_skip, + .m.fd = gst_dmabuf_memory_get_fd (mem), + }; + /* *INDENT-ON* */ + } + + v4l2_buffer->length = vmeta->n_planes; + v4l2_buffer->index = buffer_state_find_buffer (self, + v4l2_buffer->m.planes[0].m.fd); + + if (v4l2_buffer->index < 0) { + GST_INFO_OBJECT (self, "no more free buffer to queue the picture."); + return FALSE; + } + + return TRUE; +} + static gboolean gst_v4l2_encoder_queue_sink_buffer (GstV4l2Encoder * self, GstV4l2Request * request, GstBuffer * buffer, guint32 frame_num, guint flags) { - gint i, ret; + gint ret; struct v4l2_plane planes[GST_VIDEO_MAX_PLANES]; struct v4l2_buffer buf = { .type = self->sink_buf_type, - .memory = V4L2_MEMORY_MMAP, - .index = gst_v4l2_codec_buffer_get_index (buffer), + .memory = V4L2_MEMORY_DMABUF, .timestamp.tv_usec = frame_num, .request_fd = request->fd, .flags = V4L2_BUF_FLAG_REQUEST_FD | flags, + .m.planes = planes, }; GST_TRACE_OBJECT (self, "Queuing bitstream buffer %i", buf.index); - if (self->mplane) { - buf.length = gst_buffer_n_memory (buffer); - buf.m.planes = planes; - for (i = 0; i < buf.length; i++) { - GstMemory *mem = gst_buffer_peek_memory (buffer, i); - /* *INDENT-OFF* */ - planes[i] = (struct v4l2_plane) { - .bytesused = gst_memory_get_sizes (mem, NULL, NULL), - }; - /* *INDENT-ON* */ - } - } else { - buf.bytesused = gst_buffer_get_size (buffer); - } + if (!gst_v4l2_encoder_import_buffer (self, buffer, &buf)) + return FALSE; ret = ioctl (self->video_fd, VIDIOC_QBUF, &buf); if (ret < 0) { @@ -818,6 +959,9 @@ gst_v4l2_encoder_queue_sink_buffer (GstV4l2Encoder * self, return FALSE; } + buffer_state_queue (self, buf.index, + gst_dmabuf_memory_get_fd (gst_buffer_peek_memory (buffer, 0))); + return TRUE; } @@ -828,7 +972,7 @@ gst_v4l2_encoder_dequeue_sink (GstV4l2Encoder * self) struct v4l2_plane planes[GST_VIDEO_MAX_PLANES] = { {0} }; struct v4l2_buffer buf = { .type = self->sink_buf_type, - .memory = V4L2_MEMORY_MMAP, + .memory = V4L2_MEMORY_DMABUF, }; if (self->mplane) { @@ -843,6 +987,7 @@ gst_v4l2_encoder_dequeue_sink (GstV4l2Encoder * self) } GST_TRACE_OBJECT (self, "Dequeued picture buffer %i", buf.index); + buffer_state_dequeue (self, buf.index); return TRUE; } @@ -1256,7 +1401,7 @@ gst_v4l2_encoder_request_queue (GstV4l2Request * request, guint flags) if (!gst_v4l2_encoder_queue_sink_buffer (encoder, request, request->pic_buf, request->frame_num, flags)) { - GST_ERROR_OBJECT (encoder, "Driver did not accept the picture buffer."); + GST_INFO_OBJECT (encoder, "Driver did not accept the picture buffer."); return FALSE; } @@ -1344,6 +1489,13 @@ gst_v4l2_encoder_request_set_done (GstV4l2Request * request, return ret; } +void +gst_v4l2_encoder_request_replace_pic_buf (GstV4l2Request * request, + GstBuffer * pic_buf) +{ + gst_buffer_replace (&request->pic_buf, pic_buf); +} + gboolean gst_v4l2_codec_vp8_enc_get_qp_range (GstV4l2Encoder * self, guint * qp_min, guint * qp_max) diff --git a/sys/v4l2codecs/gstv4l2encoder.h b/sys/v4l2codecs/gstv4l2encoder.h index e7f44ba..09016e1 100644 --- a/sys/v4l2codecs/gstv4l2encoder.h +++ b/sys/v4l2codecs/gstv4l2encoder.h @@ -64,7 +64,8 @@ gboolean gst_v4l2_encoder_set_src_fmt (GstV4l2Encoder * self, GstVideoI gint gst_v4l2_encoder_request_buffers (GstV4l2Encoder * self, GstPadDirection direction, - guint num_buffers); + guint num_buffers, + guint mem_type); gboolean gst_v4l2_encoder_export_buffer (GstV4l2Encoder * self, GstPadDirection directon, @@ -130,12 +131,15 @@ gboolean gst_v4l2_encoder_request_queue (GstV4l2Request * request, gint gst_v4l2_encoder_request_set_done (GstV4l2Request * request, guint32 * bytesused, guint32 * flags); -gboolean gst_v4l2_encoder_request_failed (GstV4l2Request * request); +void gst_v4l2_encoder_request_replace_pic_buf (GstV4l2Request * request, + GstBuffer * pic_buf); gboolean gst_v4l2_codec_vp8_enc_get_qp_range (GstV4l2Encoder * self, guint * qp_min, guint * qp_max); gboolean gst_v4l2_codec_h264_enc_get_qp_range (GstV4l2Encoder * self, guint * qp_min, guint * qp_max); +gboolean gst_v4l2_encoder_uses_zero_copy (GstV4l2Encoder * self); + G_END_DECLS #endif /* __GST_V4L2_ENCODER_H__ */ -- 2.25.1