From f2d3f04d9871e1cbd5c387f3a4b71c4fd4e0271b Mon Sep 17 00:00:00 2001 From: Junrou Nishida Date: Fri, 11 Aug 2023 19:24:43 +0900 Subject: [PATCH] feat: implement FaceLandmarker --- .../Scripts/Tasks/Vision/FaceLandmarker.meta | 8 + .../Vision/FaceLandmarker/FaceLandmarker.cs | 216 ++++++++++++++++++ .../FaceLandmarker/FaceLandmarker.cs.meta | 11 + .../FaceLandmarker/FaceLandmarkerOptions.cs | 82 +++++++ .../FaceLandmarkerOptions.cs.meta | 11 + .../FaceLandmarker/FaceLandmarkerResult.cs | 39 ++++ .../FaceLandmarkerResult.cs.meta | 11 + mediapipe_api/BUILD | 4 +- 8 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta new file mode 100644 index 000000000..b66f4b0d6 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 7bf0b0db324419cd7bbcdb856d68acb2 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs new file mode 100644 index 000000000..9641b5de6 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs @@ -0,0 +1,216 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +using System.Collections.Generic; +using Mediapipe.Tasks.Components.Containers; + +namespace Mediapipe.Tasks.Vision.FaceLandmarker +{ + public sealed class FaceLandmarker : Core.BaseVisionTaskApi + { + private const string _IMAGE_IN_STREAM_NAME = "image_in"; + private const string _IMAGE_OUT_STREAM_NAME = "image_out"; + private const string _IMAGE_TAG = "IMAGE"; + private const string _NORM_RECT_STREAM_NAME = "norm_rect_in"; + private const string _NORM_RECT_TAG = "NORM_RECT"; + private const string _NORM_LANDMARKS_STREAM_NAME = "norm_landmarks"; + private const string _NORM_LANDMARKS_TAG = "NORM_LANDMARKS"; + private const string _BLENDSHAPES_STREAM_NAME = "blendshapes"; + private const string _BLENDSHAPES_TAG = "BLENDSHAPES"; + private const string _FACE_GEOMETRY_STREAM_NAME = "face_geometry"; + private const string _FACE_GEOMETRY_TAG = "FACE_GEOMETRY"; + private const string _TASK_GRAPH_NAME = "mediapipe.tasks.vision.face_landmarker.FaceLandmarkerGraph"; + + private const int _MICRO_SECONDS_PER_MILLISECOND = 1000; + +#pragma warning disable IDE0052 // Remove unread private members + /// + /// keep reference to prevent GC from collecting the callback instance. + /// + private readonly Tasks.Core.TaskRunner.PacketsCallback _packetCallback; +#pragma warning restore IDE0052 + + private FaceLandmarker( + CalculatorGraphConfig graphConfig, + Core.RunningMode runningMode, + Tasks.Core.TaskRunner.PacketsCallback packetCallback) : base(graphConfig, runningMode, packetCallback) + { + _packetCallback = packetCallback; + } + + /// + /// Creates an object from a TensorFlow Lite model and the default . + /// + /// Note that the created instance is in image mode, + /// for detecting face landmarks on single image inputs. + /// + /// Path to the model. + /// + /// object that's created from the model and the default . + /// + public static FaceLandmarker CreateFromModelPath(string modelPath) + { + var baseOptions = new Tasks.Core.BaseOptions(modelAssetPath: modelPath); + var options = new FaceLandmarkerOptions(baseOptions, runningMode: Core.RunningMode.IMAGE); + return CreateFromOptions(options); + } + + /// + /// Creates the object from . + /// + /// Options for the face landmarker task. + /// + /// object that's created from . + /// + public static FaceLandmarker CreateFromOptions(FaceLandmarkerOptions options) + { + var outputStreams = new List { + string.Join(":", _NORM_LANDMARKS_TAG, _NORM_LANDMARKS_STREAM_NAME), + string.Join(":", _IMAGE_TAG, _IMAGE_OUT_STREAM_NAME), + }; + if (options.outputFaceBlendshapes) + { + outputStreams.Add(string.Join(":", _BLENDSHAPES_TAG, _BLENDSHAPES_STREAM_NAME)); + } + if (options.outputFaceTransformationMatrixes) + { + outputStreams.Add(string.Join(":", _FACE_GEOMETRY_TAG, _FACE_GEOMETRY_STREAM_NAME)); + } + var taskInfo = new Tasks.Core.TaskInfo( + taskGraph: _TASK_GRAPH_NAME, + inputStreams: new List { + string.Join(":", _IMAGE_TAG, _IMAGE_IN_STREAM_NAME), + string.Join(":", _NORM_RECT_TAG, _NORM_RECT_STREAM_NAME), + }, + outputStreams: outputStreams, + taskOptions: options); + + return new FaceLandmarker( + taskInfo.GenerateGraphConfig(options.runningMode == Core.RunningMode.LIVE_STREAM), + options.runningMode, + BuildPacketsCallback(options.resultCallback)); + } + + /// + /// Performs face landmarks detection on the provided MediaPipe Image. + /// + /// Only use this method when the is created with the image running mode. + /// The image can be of any size with format RGB or RGBA. + /// + /// MediaPipe Image. + /// Options for image processing. + /// + /// A face landmarks detection results. + /// + public FaceLandmarkerResult Detect(Image image, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect)); + var outputPackets = ProcessImageData(packetMap); + + return BuildFaceLandmarkerResult(outputPackets); + } + + /// + /// Performs face landmarks detection on the provided video frames. + /// + /// Only use this method when the FaceLandmarker is created with the video + /// running mode. It's required to provide the video frame's timestamp (in + /// milliseconds) along with the video frame. The input timestamps should be + /// monotonically increasing for adjacent calls of this method. + /// + /// + /// The face landmarks detection results. + /// + public FaceLandmarkerResult DetectForVideo(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + PacketMap outputPackets = null; + using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND)) + { + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp)); + outputPackets = ProcessVideoData(packetMap); + } + + return BuildFaceLandmarkerResult(outputPackets); + } + + /// + /// Sends live image data to perform face landmarks detection. + /// + /// Only use this method when the FaceLandmarker is created with the live stream + /// running mode. The input timestamps should be monotonically increasing for + /// adjacent calls of this method. This method will return immediately after the + /// input image is accepted. The results will be available via the + /// provided in the . + /// The method is designed to process live stream data such as camera + /// input. To lower the overall latency, face landmarker may drop the input + /// images if needed. In other words, it's not guaranteed to have output per + /// input image. + public void DetectAsync(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND)) + { + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp)); + + SendLiveStreamData(packetMap); + } + } + + private static Tasks.Core.TaskRunner.PacketsCallback BuildPacketsCallback(FaceLandmarkerOptions.ResultCallback resultCallback) + { + if (resultCallback == null) + { + return null; + } + + return (PacketMap outputPackets) => + { + var outImagePacket = outputPackets.At(_IMAGE_OUT_STREAM_NAME); + if (outImagePacket == null || outImagePacket.IsEmpty()) + { + return; + } + + var image = outImagePacket.Get(); + var faceLandmarkerResult = BuildFaceLandmarkerResult(outputPackets); + var timestamp = outImagePacket.Timestamp().Microseconds() / _MICRO_SECONDS_PER_MILLISECOND; + + resultCallback(faceLandmarkerResult, image, (int)timestamp); + }; + } + + private static FaceLandmarkerResult BuildFaceLandmarkerResult(PacketMap outputPackets) + { + var faceLandmarksProtoListPacket = + outputPackets.At>(_NORM_LANDMARKS_STREAM_NAME); + if (faceLandmarksProtoListPacket.IsEmpty()) + { + return new FaceLandmarkerResult(new List(), new List(), new List()); + } + + var faceLandmarksProtoList = faceLandmarksProtoListPacket.Get(); + + var faceBlendshapesProtoList = + outputPackets.At>(_BLENDSHAPES_STREAM_NAME)?.Get(); + + var faceTransformationMatrixesProtoList = + outputPackets.At>(_FACE_GEOMETRY_STREAM_NAME)?.Get(); + + return FaceLandmarkerResult.CreateFrom(faceLandmarksProtoList, faceBlendshapesProtoList, faceTransformationMatrixesProtoList); + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta new file mode 100644 index 000000000..2abf8a7ea --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 5cabeca8cb4152c919f35add8f700148 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs new file mode 100644 index 000000000..19f71eda8 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs @@ -0,0 +1,82 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +namespace Mediapipe.Tasks.Vision.FaceLandmarker +{ + public sealed class FaceLandmarkerOptions : Tasks.Core.ITaskOptions + { + /// + /// The face landmarks detection results. + /// + /// + /// The input image that the face landmarker runs on. + /// + /// + /// The input timestamp in milliseconds. + /// + public delegate void ResultCallback(FaceLandmarkerResult faceLandmarksResult, Image image, int timestampMs); + + public Tasks.Core.BaseOptions baseOptions { get; } + public Core.RunningMode runningMode { get; } + public int numFaces { get; } + public float minFaceDetectionConfidence { get; } + public float minFacePresenceConfidence { get; } + public float minTrackingConfidence { get; } + public bool outputFaceBlendshapes { get; } + public bool outputFaceTransformationMatrixes { get; } + public ResultCallback resultCallback { get; } + + public FaceLandmarkerOptions( + Tasks.Core.BaseOptions baseOptions, + Core.RunningMode runningMode = Core.RunningMode.IMAGE, + int numFaces = 1, + float minFaceDetectionConfidence = 0.5f, + float minFacePresenceConfidence = 0.5f, + float minTrackingConfidence = 0.5f, + bool outputFaceBlendshapes = false, + bool outputFaceTransformationMatrixes = false, + ResultCallback resultCallback = null) + { + this.baseOptions = baseOptions; + this.runningMode = runningMode; + this.numFaces = numFaces; + this.minFaceDetectionConfidence = minFaceDetectionConfidence; + this.minFacePresenceConfidence = minFacePresenceConfidence; + this.minTrackingConfidence = minTrackingConfidence; + this.outputFaceBlendshapes = outputFaceBlendshapes; + this.outputFaceTransformationMatrixes = outputFaceTransformationMatrixes; + this.resultCallback = resultCallback; + } + + internal Proto.FaceLandmarkerGraphOptions ToProto() + { + var baseOptionsProto = baseOptions.ToProto(); + baseOptionsProto.UseStreamMode = runningMode != Core.RunningMode.IMAGE; + + return new Proto.FaceLandmarkerGraphOptions + { + BaseOptions = baseOptionsProto, + FaceDetectorGraphOptions = new FaceDetector.Proto.FaceDetectorGraphOptions + { + MinDetectionConfidence = minFaceDetectionConfidence, + NumFaces = numFaces, + }, + FaceLandmarksDetectorGraphOptions = new Proto.FaceLandmarksDetectorGraphOptions + { + MinDetectionConfidence = minFacePresenceConfidence, + }, + MinTrackingConfidence = minTrackingConfidence, + }; + } + + CalculatorOptions Tasks.Core.ITaskOptions.ToCalculatorOptions() + { + var options = new CalculatorOptions(); + options.SetExtension(Proto.FaceLandmarkerGraphOptions.Extensions.Ext, ToProto()); + return options; + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta new file mode 100644 index 000000000..cad6e82b1 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: fa2495ecb17bfb8988739ee25f8fa615 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs new file mode 100644 index 000000000..528045400 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs @@ -0,0 +1,39 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +using System.Collections.Generic; +using System.Linq; +using Mediapipe.Tasks.Components.Containers; + +namespace Mediapipe.Tasks.Vision.FaceLandmarker +{ + public readonly struct FaceLandmarkerResult + { + public readonly IReadOnlyList faceLandmarks; + public readonly IReadOnlyList faceBlendshapes; + public readonly IReadOnlyList facialTransformationMatrixes; + + internal FaceLandmarkerResult(IReadOnlyList faceLandmarks, + IReadOnlyList faceBlendshapes, IReadOnlyList facialTransformationMatrixes) + { + this.faceLandmarks = faceLandmarks; + this.faceBlendshapes = faceBlendshapes; + this.facialTransformationMatrixes = facialTransformationMatrixes; + } + + internal static FaceLandmarkerResult CreateFrom(IReadOnlyList faceLandmarksProto, + IReadOnlyList faceBlendshapesProto, IReadOnlyList facialTransformationMatrixesProto) + { + var faceLandmarks = faceLandmarksProto.Select(NormalizedLandmarks.CreateFrom).ToList(); + var faceBlendshapes = faceBlendshapesProto == null ? new List() : + faceBlendshapesProto.Select(x => Classifications.CreateFrom(x)).ToList(); + var facialTransformationMatrixes = facialTransformationMatrixesProto == null ? new List() : + facialTransformationMatrixesProto.Select(x => x.PoseTransformMatrix.PackedData.ToArray()).ToList(); + + return new FaceLandmarkerResult(faceLandmarks, faceBlendshapes, facialTransformationMatrixes); + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta new file mode 100644 index 000000000..aafd1fdab --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: d4482255d96e0403aa0c07751673bd15 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/mediapipe_api/BUILD b/mediapipe_api/BUILD index 2fd255cfc..0684b0b1e 100644 --- a/mediapipe_api/BUILD +++ b/mediapipe_api/BUILD @@ -324,7 +324,9 @@ cc_library( cc_library( name = "face_mesh_calculators", - deps = select({ + deps = [ + "@com_google_mediapipe//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker", + ] + select({ "@com_google_mediapipe//mediapipe/gpu:disable_gpu": [ "@com_google_mediapipe//mediapipe/graphs/face_mesh:desktop_live_calculators", ],