feat: implement FaceLandmarker

homuler · Aug 11, 2023 · f2d3f04 · f2d3f04
1 parent a41dd24
commit f2d3f04
Show file tree

Hide file tree

Showing 8 changed files with 381 additions and 1 deletion.
diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker.meta
diff --git a/...om.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs b/...om.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs
@@ -0,0 +1,216 @@
+// Copyright (c) 2023 homuler
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+using System.Collections.Generic;
+using Mediapipe.Tasks.Components.Containers;
+
+namespace Mediapipe.Tasks.Vision.FaceLandmarker
+{
+  public sealed class FaceLandmarker : Core.BaseVisionTaskApi
+  {
+    private const string _IMAGE_IN_STREAM_NAME = "image_in";
+    private const string _IMAGE_OUT_STREAM_NAME = "image_out";
+    private const string _IMAGE_TAG = "IMAGE";
+    private const string _NORM_RECT_STREAM_NAME = "norm_rect_in";
+    private const string _NORM_RECT_TAG = "NORM_RECT";
+    private const string _NORM_LANDMARKS_STREAM_NAME = "norm_landmarks";
+    private const string _NORM_LANDMARKS_TAG = "NORM_LANDMARKS";
+    private const string _BLENDSHAPES_STREAM_NAME = "blendshapes";
+    private const string _BLENDSHAPES_TAG = "BLENDSHAPES";
+    private const string _FACE_GEOMETRY_STREAM_NAME = "face_geometry";
+    private const string _FACE_GEOMETRY_TAG = "FACE_GEOMETRY";
+    private const string _TASK_GRAPH_NAME = "mediapipe.tasks.vision.face_landmarker.FaceLandmarkerGraph";
+
+    private const int _MICRO_SECONDS_PER_MILLISECOND = 1000;
+
+#pragma warning disable IDE0052 // Remove unread private members
+    /// <remarks>
+    ///   keep reference to prevent GC from collecting the callback instance.
+    /// </remarks>
+    private readonly Tasks.Core.TaskRunner.PacketsCallback _packetCallback;
+#pragma warning restore IDE0052
+
+    private FaceLandmarker(
+      CalculatorGraphConfig graphConfig,
+      Core.RunningMode runningMode,
+      Tasks.Core.TaskRunner.PacketsCallback packetCallback) : base(graphConfig, runningMode, packetCallback)
+    {
+      _packetCallback = packetCallback;
+    }
+
+    /// <summary>
+    ///   Creates an <see cref="FaceLandmarker" /> object from a TensorFlow Lite model and the default <see cref="FaceLandmarkerOptions" />.
+    ///
+    ///   Note that the created <see cref="FaceLandmarker" /> instance is in image mode,
+    ///   for detecting face landmarks on single image inputs.
+    /// </summary>
+    /// <param name="modelPath">Path to the model.</param>
+    /// <returns>
+    ///   <see cref="FaceLandmarker" /> object that's created from the model and the default <see cref="FaceLandmarkerOptions" />.
+    /// </returns>
+    public static FaceLandmarker CreateFromModelPath(string modelPath)
+    {
+      var baseOptions = new Tasks.Core.BaseOptions(modelAssetPath: modelPath);
+      var options = new FaceLandmarkerOptions(baseOptions, runningMode: Core.RunningMode.IMAGE);
+      return CreateFromOptions(options);
+    }
+
+    /// <summary>
+    ///   Creates the <see cref="FaceLandmarker" /> object from <paramref name="FaceLandmarkerOptions" />.
+    /// </summary>
+    /// <param name="options">Options for the face landmarker task.</param>
+    /// <returns>
+    ///   <see cref="FaceLandmarker" /> object that's created from <paramref name="options" />.
+    /// </returns>
+    public static FaceLandmarker CreateFromOptions(FaceLandmarkerOptions options)
+    {
+      var outputStreams = new List<string> {
+        string.Join(":", _NORM_LANDMARKS_TAG, _NORM_LANDMARKS_STREAM_NAME),
+        string.Join(":", _IMAGE_TAG, _IMAGE_OUT_STREAM_NAME),
+      };
+      if (options.outputFaceBlendshapes)
+      {
+        outputStreams.Add(string.Join(":", _BLENDSHAPES_TAG, _BLENDSHAPES_STREAM_NAME));
+      }
+      if (options.outputFaceTransformationMatrixes)
+      {
+        outputStreams.Add(string.Join(":", _FACE_GEOMETRY_TAG, _FACE_GEOMETRY_STREAM_NAME));
+      }
+      var taskInfo = new Tasks.Core.TaskInfo<FaceLandmarkerOptions>(
+        taskGraph: _TASK_GRAPH_NAME,
+        inputStreams: new List<string> {
+          string.Join(":", _IMAGE_TAG, _IMAGE_IN_STREAM_NAME),
+          string.Join(":", _NORM_RECT_TAG, _NORM_RECT_STREAM_NAME),
+        },
+        outputStreams: outputStreams,
+        taskOptions: options);
+
+      return new FaceLandmarker(
+        taskInfo.GenerateGraphConfig(options.runningMode == Core.RunningMode.LIVE_STREAM),
+        options.runningMode,
+        BuildPacketsCallback(options.resultCallback));
+    }
+
+    /// <summary>
+    ///   Performs face landmarks detection on the provided MediaPipe Image.
+    ///
+    ///   Only use this method when the <see cref="FaceLandmarker" /> is created with the image running mode.
+    ///   The image can be of any size with format RGB or RGBA.
+    /// </summary>
+    /// <param name="image">MediaPipe Image.</param>
+    /// <param name="imageProcessingOptions">Options for image processing.</param>
+    /// <returns>
+    ///   A face landmarks detection results.
+    /// </returns>
+    public FaceLandmarkerResult Detect(Image image, Core.ImageProcessingOptions? imageProcessingOptions = null)
+    {
+      var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);
+
+      var packetMap = new PacketMap();
+      packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image));
+      packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect));
+      var outputPackets = ProcessImageData(packetMap);
+
+      return BuildFaceLandmarkerResult(outputPackets);
+    }
+
+    /// <summary>
+    ///   Performs face landmarks detection on the provided video frames.
+    ///
+    ///   Only use this method when the FaceLandmarker is created with the video
+    ///   running mode. It's required to provide the video frame's timestamp (in
+    ///   milliseconds) along with the video frame. The input timestamps should be
+    ///   monotonically increasing for adjacent calls of this method.
+    /// </summary>
+    /// <returns>
+    ///   The face landmarks detection results.
+    /// </returns>
+    public FaceLandmarkerResult DetectForVideo(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null)
+    {
+      var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);
+
+      PacketMap outputPackets = null;
+      using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND))
+      {
+        var packetMap = new PacketMap();
+        packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp));
+        packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp));
+        outputPackets = ProcessVideoData(packetMap);
+      }
+
+      return BuildFaceLandmarkerResult(outputPackets);
+    }
+
+    /// <summary>
+    ///   Sends live image data to perform face landmarks detection.
+    ///
+    ///   Only use this method when the FaceLandmarker is created with the live stream
+    ///   running mode. The input timestamps should be monotonically increasing for
+    ///   adjacent calls of this method. This method will return immediately after the
+    ///   input image is accepted. The results will be available via the
+    ///   <see cref="FaceLandmarkerOptions.ResultCallback" /> provided in the <see cref="FaceLandmarkerOptions" />.
+    ///   The <see cref="DetectAsync" /> method is designed to process live stream data such as camera
+    ///   input. To lower the overall latency, face landmarker may drop the input
+    ///   images if needed. In other words, it's not guaranteed to have output per
+    ///   input image.
+    public void DetectAsync(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null)
+    {
+      var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);
+
+      using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND))
+      {
+        var packetMap = new PacketMap();
+        packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp));
+        packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp));
+
+        SendLiveStreamData(packetMap);
+      }
+    }
+
+    private static Tasks.Core.TaskRunner.PacketsCallback BuildPacketsCallback(FaceLandmarkerOptions.ResultCallback resultCallback)
+    {
+      if (resultCallback == null)
+      {
+        return null;
+      }
+
+      return (PacketMap outputPackets) =>
+      {
+        var outImagePacket = outputPackets.At<ImagePacket, Image>(_IMAGE_OUT_STREAM_NAME);
+        if (outImagePacket == null || outImagePacket.IsEmpty())
+        {
+          return;
+        }
+
+        var image = outImagePacket.Get();
+        var faceLandmarkerResult = BuildFaceLandmarkerResult(outputPackets);
+        var timestamp = outImagePacket.Timestamp().Microseconds() / _MICRO_SECONDS_PER_MILLISECOND;
+
+        resultCallback(faceLandmarkerResult, image, (int)timestamp);
+      };
+    }
+
+    private static FaceLandmarkerResult BuildFaceLandmarkerResult(PacketMap outputPackets)
+    {
+      var faceLandmarksProtoListPacket =
+        outputPackets.At<NormalizedLandmarkListVectorPacket, List<NormalizedLandmarkList>>(_NORM_LANDMARKS_STREAM_NAME);
+      if (faceLandmarksProtoListPacket.IsEmpty())
+      {
+        return new FaceLandmarkerResult(new List<NormalizedLandmarks>(), new List<Classifications>(), new List<float[]>());
+      }
+
+      var faceLandmarksProtoList = faceLandmarksProtoListPacket.Get();
+
+      var faceBlendshapesProtoList =
+        outputPackets.At<ClassificationListVectorPacket, List<ClassificationList>>(_BLENDSHAPES_STREAM_NAME)?.Get();
+
+      var faceTransformationMatrixesProtoList =
+        outputPackets.At<FaceGeometry.FaceGeometryVectorPacket, List<FaceGeometry.Proto.FaceGeometry>>(_FACE_GEOMETRY_STREAM_NAME)?.Get();
+
+      return FaceLandmarkerResult.CreateFrom(faceLandmarksProtoList, faceBlendshapesProtoList, faceTransformationMatrixesProtoList);
+    }
+  }
+}
diff --git a/...thub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta b/...thub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs.meta
diff --git a/...ub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs b/...ub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs
@@ -0,0 +1,82 @@
+// Copyright (c) 2023 homuler
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+namespace Mediapipe.Tasks.Vision.FaceLandmarker
+{
+  public sealed class FaceLandmarkerOptions : Tasks.Core.ITaskOptions
+  {
+    /// <param name="faceLandmarksResult">
+    ///   The face landmarks detection results.
+    /// </param>
+    /// <param name="image">
+    ///   The input image that the face landmarker runs on.
+    /// </param>
+    /// <param name="timestampMs">
+    ///   The input timestamp in milliseconds.
+    /// </param>
+    public delegate void ResultCallback(FaceLandmarkerResult faceLandmarksResult, Image image, int timestampMs);
+
+    public Tasks.Core.BaseOptions baseOptions { get; }
+    public Core.RunningMode runningMode { get; }
+    public int numFaces { get; }
+    public float minFaceDetectionConfidence { get; }
+    public float minFacePresenceConfidence { get; }
+    public float minTrackingConfidence { get; }
+    public bool outputFaceBlendshapes { get; }
+    public bool outputFaceTransformationMatrixes { get; }
+    public ResultCallback resultCallback { get; }
+
+    public FaceLandmarkerOptions(
+      Tasks.Core.BaseOptions baseOptions,
+      Core.RunningMode runningMode = Core.RunningMode.IMAGE,
+      int numFaces = 1,
+      float minFaceDetectionConfidence = 0.5f,
+      float minFacePresenceConfidence = 0.5f,
+      float minTrackingConfidence = 0.5f,
+      bool outputFaceBlendshapes = false,
+      bool outputFaceTransformationMatrixes = false,
+      ResultCallback resultCallback = null)
+    {
+      this.baseOptions = baseOptions;
+      this.runningMode = runningMode;
+      this.numFaces = numFaces;
+      this.minFaceDetectionConfidence = minFaceDetectionConfidence;
+      this.minFacePresenceConfidence = minFacePresenceConfidence;
+      this.minTrackingConfidence = minTrackingConfidence;
+      this.outputFaceBlendshapes = outputFaceBlendshapes;
+      this.outputFaceTransformationMatrixes = outputFaceTransformationMatrixes;
+      this.resultCallback = resultCallback;
+    }
+
+    internal Proto.FaceLandmarkerGraphOptions ToProto()
+    {
+      var baseOptionsProto = baseOptions.ToProto();
+      baseOptionsProto.UseStreamMode = runningMode != Core.RunningMode.IMAGE;
+
+      return new Proto.FaceLandmarkerGraphOptions
+      {
+        BaseOptions = baseOptionsProto,
+        FaceDetectorGraphOptions = new FaceDetector.Proto.FaceDetectorGraphOptions
+        {
+          MinDetectionConfidence = minFaceDetectionConfidence,
+          NumFaces = numFaces,
+        },
+        FaceLandmarksDetectorGraphOptions = new Proto.FaceLandmarksDetectorGraphOptions
+        {
+          MinDetectionConfidence = minFacePresenceConfidence,
+        },
+        MinTrackingConfidence = minTrackingConfidence,
+      };
+    }
+
+    CalculatorOptions Tasks.Core.ITaskOptions.ToCalculatorOptions()
+    {
+      var options = new CalculatorOptions();
+      options.SetExtension(Proto.FaceLandmarkerGraphOptions.Extensions.Ext, ToProto());
+      return options;
+    }
+  }
+}
diff --git a/...muler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta b/...muler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerOptions.cs.meta
diff --git a/...hub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs b/...hub.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs
@@ -0,0 +1,39 @@
+// Copyright (c) 2023 homuler
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+using System.Collections.Generic;
+using System.Linq;
+using Mediapipe.Tasks.Components.Containers;
+
+namespace Mediapipe.Tasks.Vision.FaceLandmarker
+{
+  public readonly struct FaceLandmarkerResult
+  {
+    public readonly IReadOnlyList<NormalizedLandmarks> faceLandmarks;
+    public readonly IReadOnlyList<Classifications> faceBlendshapes;
+    public readonly IReadOnlyList<float[]> facialTransformationMatrixes;
+
+    internal FaceLandmarkerResult(IReadOnlyList<NormalizedLandmarks> faceLandmarks,
+        IReadOnlyList<Classifications> faceBlendshapes, IReadOnlyList<float[]> facialTransformationMatrixes)
+    {
+      this.faceLandmarks = faceLandmarks;
+      this.faceBlendshapes = faceBlendshapes;
+      this.facialTransformationMatrixes = facialTransformationMatrixes;
+    }
+
+    internal static FaceLandmarkerResult CreateFrom(IReadOnlyList<NormalizedLandmarkList> faceLandmarksProto,
+        IReadOnlyList<ClassificationList> faceBlendshapesProto, IReadOnlyList<FaceGeometry.Proto.FaceGeometry> facialTransformationMatrixesProto)
+    {
+      var faceLandmarks = faceLandmarksProto.Select(NormalizedLandmarks.CreateFrom).ToList();
+      var faceBlendshapes = faceBlendshapesProto == null ? new List<Classifications>() :
+          faceBlendshapesProto.Select(x => Classifications.CreateFrom(x)).ToList();
+      var facialTransformationMatrixes = facialTransformationMatrixesProto == null ? new List<float[]>() :
+          facialTransformationMatrixesProto.Select(x => x.PoseTransformMatrix.PackedData.ToArray()).ToList();
+
+      return new FaceLandmarkerResult(faceLandmarks, faceBlendshapes, facialTransformationMatrixes);
+    }
+  }
+}
diff --git a/...omuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta b/...omuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarkerResult.cs.meta
diff --git a/mediapipe_api/BUILD b/mediapipe_api/BUILD
@@ -324,7 +324,9 @@ cc_library(
 
 cc_library(
     name = "face_mesh_calculators",
-    deps = select({
+    deps = [
+        "@com_google_mediapipe//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker",
+    ] + select({
         "@com_google_mediapipe//mediapipe/gpu:disable_gpu": [
             "@com_google_mediapipe//mediapipe/graphs/face_mesh:desktop_live_calculators",
         ],