diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs index 9b6206325..55bcbcdf6 100644 --- a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/FaceLandmarker/FaceLandmarker.cs @@ -102,7 +102,7 @@ public static FaceLandmarker CreateFromOptions(FaceLandmarkerOptions options) /// MediaPipe Image. /// Options for image processing. /// - /// A face landmarks detection results. + /// The face landmarks detection results. /// public FaceLandmarkerResult Detect(Image image, Core.ImageProcessingOptions? imageProcessingOptions = null) { diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker.meta new file mode 100644 index 000000000..6f87fe121 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 95696c46f7d6889c9922b58543284e7f +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs new file mode 100644 index 000000000..d667701ec --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs @@ -0,0 +1,204 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +using System.Collections.Generic; + +namespace Mediapipe.Tasks.Vision.HandLandmarker +{ + public sealed class HandLandmarker : Core.BaseVisionTaskApi + { + private const string _IMAGE_IN_STREAM_NAME = "image_in"; + private const string _IMAGE_OUT_STREAM_NAME = "image_out"; + private const string _IMAGE_TAG = "IMAGE"; + private const string _NORM_RECT_STREAM_NAME = "norm_rect_in"; + private const string _NORM_RECT_TAG = "NORM_RECT"; + private const string _HANDEDNESS_STREAM_NAME = "handedness"; + private const string _HANDEDNESS_TAG = "HANDEDNESS"; + private const string _HAND_LANDMARKS_STREAM_NAME = "landmarks"; + private const string _HAND_LANDMARKS_TAG = "LANDMARKS"; + private const string _HAND_WORLD_LANDMARKS_STREAM_NAME = "world_landmarks"; + private const string _HAND_WORLD_LANDMARKS_TAG = "WORLD_LANDMARKS"; + private const string _TASK_GRAPH_NAME = "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph"; + + private const int _MICRO_SECONDS_PER_MILLISECOND = 1000; + +#pragma warning disable IDE0052 // Remove unread private members + /// + /// keep reference to prevent GC from collecting the callback instance. + /// + private readonly Tasks.Core.TaskRunner.PacketsCallback _packetCallback; +#pragma warning restore IDE0052 + + private HandLandmarker( + CalculatorGraphConfig graphConfig, + Core.RunningMode runningMode, + Tasks.Core.TaskRunner.PacketsCallback packetCallback) : base(graphConfig, runningMode, packetCallback) + { + _packetCallback = packetCallback; + } + + /// + /// Creates an object from a TensorFlow Lite model and the default . + /// + /// Note that the created instance is in image mode, + /// for detecting hand landmarks on single image inputs. + /// + /// Path to the model. + /// + /// object that's created from the model and the default . + /// + public static HandLandmarker CreateFromModelPath(string modelPath) + { + var baseOptions = new Tasks.Core.BaseOptions(modelAssetPath: modelPath); + var options = new HandLandmarkerOptions(baseOptions, runningMode: Core.RunningMode.IMAGE); + return CreateFromOptions(options); + } + + /// + /// Creates the object from . + /// + /// Options for the face landmarker task. + /// + /// object that's created from . + /// + public static HandLandmarker CreateFromOptions(HandLandmarkerOptions options) + { + var taskInfo = new Tasks.Core.TaskInfo( + taskGraph: _TASK_GRAPH_NAME, + inputStreams: new List { + string.Join(":", _IMAGE_TAG, _IMAGE_IN_STREAM_NAME), + string.Join(":", _NORM_RECT_TAG, _NORM_RECT_STREAM_NAME), + }, + outputStreams: new List { + string.Join(":", _HANDEDNESS_TAG, _HANDEDNESS_STREAM_NAME), + string.Join(":", _HAND_LANDMARKS_TAG, _HAND_LANDMARKS_STREAM_NAME), + string.Join(":", _HAND_WORLD_LANDMARKS_TAG, _HAND_WORLD_LANDMARKS_STREAM_NAME), + string.Join(":", _IMAGE_TAG, _IMAGE_OUT_STREAM_NAME), + }, + taskOptions: options); + + return new HandLandmarker( + taskInfo.GenerateGraphConfig(options.runningMode == Core.RunningMode.LIVE_STREAM), + options.runningMode, + BuildPacketsCallback(options.resultCallback)); + } + + /// + /// Performs hand landmarks detection on the provided MediaPipe Image. + /// + /// Only use this method when the is created with the image running mode. + /// The image can be of any size with format RGB or RGBA. + /// + /// MediaPipe Image. + /// Options for image processing. + /// + /// The hand landmarks detection results. + /// + public HandLandmarkerResult Detect(Image image, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect)); + var outputPackets = ProcessImageData(packetMap); + + return BuildHandLandmarkerResult(outputPackets); + } + + /// + /// Performs hand landmarks detection on the provided video frames. + /// + /// Only use this method when the HandLandmarker is created with the video + /// running mode. It's required to provide the video frame's timestamp (in + /// milliseconds) along with the video frame. The input timestamps should be + /// monotonically increasing for adjacent calls of this method. + /// + /// + /// The hand landmarks detection results. + /// + public HandLandmarkerResult DetectForVideo(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + PacketMap outputPackets = null; + using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND)) + { + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp)); + outputPackets = ProcessVideoData(packetMap); + } + + return BuildHandLandmarkerResult(outputPackets); + } + + /// + /// Sends live image data to perform hand landmarks detection. + /// + /// Only use this method when the HandLandmarker is created with the live stream + /// running mode. The input timestamps should be monotonically increasing for + /// adjacent calls of this method. This method will return immediately after the + /// input image is accepted. The results will be available via the + /// provided in the . + /// The method is designed to process live stream data such as camera + /// input. To lower the overall latency, hand landmarker may drop the input + /// images if needed. In other words, it's not guaranteed to have output per + /// input image. + public void DetectAsync(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null) + { + var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false); + + using (var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND)) + { + var packetMap = new PacketMap(); + packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp)); + packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp)); + + SendLiveStreamData(packetMap); + } + } + + private static Tasks.Core.TaskRunner.PacketsCallback BuildPacketsCallback(HandLandmarkerOptions.ResultCallback resultCallback) + { + if (resultCallback == null) + { + return null; + } + + return (PacketMap outputPackets) => + { + var outImagePacket = outputPackets.At(_IMAGE_OUT_STREAM_NAME); + if (outImagePacket == null || outImagePacket.IsEmpty()) + { + return; + } + + var image = outImagePacket.Get(); + var handLandmarkerResult = BuildHandLandmarkerResult(outputPackets); + var timestamp = outImagePacket.Timestamp().Microseconds() / _MICRO_SECONDS_PER_MILLISECOND; + + resultCallback(handLandmarkerResult, image, (int)timestamp); + }; + } + + private static HandLandmarkerResult BuildHandLandmarkerResult(PacketMap outputPackets) + { + var handLandmarksProtoPacket = + outputPackets.At>(_HAND_LANDMARKS_STREAM_NAME); + if (handLandmarksProtoPacket.IsEmpty()) + { + return HandLandmarkerResult.Empty(); + } + + var handLandmarksProto = handLandmarksProtoPacket.Get(); + var handednessProto = outputPackets.At>(_HANDEDNESS_STREAM_NAME).Get(); + var handWorldLandmarksProto = outputPackets.At>(_HAND_WORLD_LANDMARKS_STREAM_NAME).Get(); + + return HandLandmarkerResult.CreateFrom(handednessProto, handLandmarksProto, handWorldLandmarksProto); + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs.meta new file mode 100644 index 000000000..58bc4cfbf --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarker.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: fc6b0c7add4d9ad2b90736ca102b54f7 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs new file mode 100644 index 000000000..356b5e688 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs @@ -0,0 +1,116 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +namespace Mediapipe.Tasks.Vision.HandLandmarker +{ + /// + /// Options for the hand landmarker task. + /// + public sealed class HandLandmarkerOptions : Tasks.Core.ITaskOptions + { + /// + /// The hand landmarks detection results. + /// + /// + /// The input image that the hand landmarker runs on. + /// + /// + /// The input timestamp in milliseconds. + /// + public delegate void ResultCallback(HandLandmarkerResult handLandmarksResult, Image image, int timestampMs); + + /// + /// Base options for the hand landmarker task. + /// + public Tasks.Core.BaseOptions baseOptions { get; } + /// + /// The running mode of the task. Default to the image mode. + /// HandLandmarker has three running modes: + /// + /// + /// The image mode for detecting hand landmarks on single image inputs. + /// + /// + /// The video mode for detecting hand landmarks on the decoded frames of a video. + /// + /// + /// + /// The live stream mode or detecting hand landmarks on the live stream of input data, such as from camera. + /// In this mode, the below must be specified to receive the detection results asynchronously. + /// + /// + /// + /// + public Core.RunningMode runningMode { get; } + /// + /// The maximum number of hands can be detected by the hand landmarker. + /// + public int numHands { get; } + /// + /// The minimum confidence score for the hand detection to be considered successful. + /// + public float minHandDetectionConfidence { get; } + /// + /// The minimum confidence score of hand presence score in the hand landmark detection. + /// + public float minHandPresenceConfidence { get; } + /// + /// The minimum confidence score for the hand tracking to be considered successful. + /// + public float minTrackingConfidence { get; } + /// + /// The user-defined result callback for processing live stream data. + /// The result callback should only be specified when the running mode is set to the live stream mode. + /// + public ResultCallback resultCallback { get; } + + public HandLandmarkerOptions( + Tasks.Core.BaseOptions baseOptions, + Core.RunningMode runningMode = Core.RunningMode.IMAGE, + int numHands = 1, + float minHandDetectionConfidence = 0.5f, + float minHandPresenceConfidence = 0.5f, + float minTrackingConfidence = 0.5f, + ResultCallback resultCallback = null) + { + this.baseOptions = baseOptions; + this.runningMode = runningMode; + this.numHands = numHands; + this.minHandDetectionConfidence = minHandDetectionConfidence; + this.minHandPresenceConfidence = minHandPresenceConfidence; + this.minTrackingConfidence = minTrackingConfidence; + this.resultCallback = resultCallback; + } + + internal Proto.HandLandmarkerGraphOptions ToProto() + { + var baseOptionsProto = baseOptions.ToProto(); + baseOptionsProto.UseStreamMode = runningMode != Core.RunningMode.IMAGE; + + return new Proto.HandLandmarkerGraphOptions + { + BaseOptions = baseOptionsProto, + HandDetectorGraphOptions = new HandDetector.Proto.HandDetectorGraphOptions + { + NumHands = numHands, + MinDetectionConfidence = minHandDetectionConfidence, + }, + HandLandmarksDetectorGraphOptions = new Proto.HandLandmarksDetectorGraphOptions + { + MinDetectionConfidence = minHandPresenceConfidence, + }, + MinTrackingConfidence = minTrackingConfidence, + }; + } + + CalculatorOptions Tasks.Core.ITaskOptions.ToCalculatorOptions() + { + var options = new CalculatorOptions(); + options.SetExtension(Proto.HandLandmarkerGraphOptions.Extensions.Ext, ToProto()); + return options; + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs.meta new file mode 100644 index 000000000..93ef7c2a2 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerOptions.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 8e5019ddbbd2dfeb7957da909db1a2b5 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs new file mode 100644 index 000000000..e60ade01c --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs @@ -0,0 +1,56 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +using System.Collections.Generic; +using System.Linq; +using Mediapipe.Tasks.Components.Containers; + +namespace Mediapipe.Tasks.Vision.HandLandmarker +{ + /// + /// The hand landmarks result from HandLandmarker, where each vector element represents a single hand detected in the image. + /// + public readonly struct HandLandmarkerResult + { + /// + /// Classification of handedness. + /// + public readonly IReadOnlyList handedness; + /// + /// Detected hand landmarks in normalized image coordinates. + /// + public readonly IReadOnlyList handLandmarks; + /// + /// Detected hand landmarks in world coordinates. + /// + public readonly IReadOnlyList handWorldLandmarks; + + internal HandLandmarkerResult(IReadOnlyList handedness, + IReadOnlyList handLandmarks, IReadOnlyList handWorldLandmarks) + { + this.handedness = handedness; + this.handLandmarks = handLandmarks; + this.handWorldLandmarks = handWorldLandmarks; + } + + // TODO: add parameterless constructors + internal static HandLandmarkerResult Empty() + => new HandLandmarkerResult(new List(), new List(), new List()); + + internal static HandLandmarkerResult CreateFrom(IReadOnlyList handednessProto, + IReadOnlyList handLandmarksProto, IReadOnlyList handWorldLandmarksProto) + { + var handedness = handednessProto.Select(x => Classifications.CreateFrom(x)).ToList(); + var handLandmarks = handLandmarksProto.Select(NormalizedLandmarks.CreateFrom).ToList(); + var handWorldLandmarks = handWorldLandmarksProto.Select(Landmarks.CreateFrom).ToList(); + + return new HandLandmarkerResult(handedness, handLandmarks, handWorldLandmarks); + } + + public override string ToString() + => $"{{ \"handedness\": {Util.Format(handedness)}, \"handLandmarks\": {Util.Format(handLandmarks)}, \"handWorldLandmarks\": {Util.Format(handWorldLandmarks)} }}"; + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs.meta new file mode 100644 index 000000000..1ddff7e15 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Tasks/Vision/HandLandmarker/HandLandmarkerResult.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 11be1ce078f86095dbe436b3b5e6c903 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: