From 4f4f107a8b26091e7e790ff3e3ea1fa4656441b0 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 18 Dec 2023 14:14:46 -0800 Subject: [PATCH] Add Holistic Landmarker Web API. PiperOrigin-RevId: 592006517 --- .../tasks/web/core/task_runner_test_utils.ts | 14 +- mediapipe/tasks/web/vision/BUILD | 1 + .../web/vision/holistic_landmarker/BUILD | 84 +++ .../holistic_landmarker.ts | 642 ++++++++++++++++++ .../holistic_landmarker_options.d.ts | 71 ++ .../holistic_landmarker_result.d.ts | 55 ++ .../holistic_landmarker_test.ts | 403 +++++++++++ mediapipe/tasks/web/vision/index.ts | 3 + .../vision/pose_landmarker/pose_landmarker.ts | 8 + mediapipe/tasks/web/vision/types.ts | 1 + 10 files changed, 1276 insertions(+), 6 deletions(-) create mode 100644 mediapipe/tasks/web/vision/holistic_landmarker/BUILD create mode 100644 mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker.ts create mode 100644 mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_options.d.ts create mode 100644 mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_result.d.ts create mode 100644 mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_test.ts diff --git a/mediapipe/tasks/web/core/task_runner_test_utils.ts b/mediapipe/tasks/web/core/task_runner_test_utils.ts index 69d00b9444..3a01828831 100644 --- a/mediapipe/tasks/web/core/task_runner_test_utils.ts +++ b/mediapipe/tasks/web/core/task_runner_test_utils.ts @@ -73,15 +73,16 @@ export type FieldPathToValue = [string[] | string, unknown]; type JsonObject = Record; -type Deserializer = (binaryProto: string | Uint8Array) => JsonObject; +/** + * The function to convert a binary proto to a JsonObject. + * For example, the deserializer of HolisticLandmarkerOptions's binary proto is + * HolisticLandmarkerOptions.deserializeBinary(binaryProto).toObject(). + */ +export type Deserializer = (binaryProto: Uint8Array) => JsonObject; /** * Verifies that the graph has been initialized and that it contains the * provided options. - * - * @param deserializer - the function to convert a binary proto to a JsonObject. - * For example, the deserializer of HolisticLandmarkerOptions's binary proto is - * HolisticLandmarkerOptions.deserializeBinary(binaryProto).toObject(). */ export function verifyGraph( tasksFake: MediapipeTasksFake, @@ -101,7 +102,8 @@ export function verifyGraph( let proto; if (deserializer) { const binaryProto = - tasksFake.graph!.getNodeList()[0].getNodeOptionsList()[0].getValue(); + tasksFake.graph!.getNodeList()[0].getNodeOptionsList()[0].getValue() as + Uint8Array; proto = deserializer(binaryProto); } else { proto = (node.options as {ext: unknown}).ext; diff --git a/mediapipe/tasks/web/vision/BUILD b/mediapipe/tasks/web/vision/BUILD index 58795b1663..c44fb41a20 100644 --- a/mediapipe/tasks/web/vision/BUILD +++ b/mediapipe/tasks/web/vision/BUILD @@ -27,6 +27,7 @@ VISION_LIBS = [ "//mediapipe/tasks/web/vision/face_stylizer", "//mediapipe/tasks/web/vision/gesture_recognizer", "//mediapipe/tasks/web/vision/hand_landmarker", + "//mediapipe/tasks/web/vision/holistic_landmarker", "//mediapipe/tasks/web/vision/image_classifier", "//mediapipe/tasks/web/vision/image_embedder", "//mediapipe/tasks/web/vision/image_segmenter", diff --git a/mediapipe/tasks/web/vision/holistic_landmarker/BUILD b/mediapipe/tasks/web/vision/holistic_landmarker/BUILD new file mode 100644 index 0000000000..e13ec9614f --- /dev/null +++ b/mediapipe/tasks/web/vision/holistic_landmarker/BUILD @@ -0,0 +1,84 @@ +# This contains the MediaPipe Hand Landmarker Task. +# +# This task takes video frames and outputs synchronized frames along with +# the detection results for one or more holistic categories, using Hand Landmarker. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_declaration", "mediapipe_ts_library") +load("@npm//@bazel/jasmine:index.bzl", "jasmine_node_test") + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +mediapipe_ts_library( + name = "holistic_landmarker", + srcs = ["holistic_landmarker.ts"], + visibility = ["//visibility:public"], + deps = [ + ":holistic_landmarker_types", + "//mediapipe/framework:calculator_jspb_proto", + "//mediapipe/framework:calculator_options_jspb_proto", + "//mediapipe/framework/formats:classification_jspb_proto", + "//mediapipe/framework/formats:landmark_jspb_proto", + "//mediapipe/tasks/cc/core/proto:base_options_jspb_proto", + "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/pose_detector/proto:pose_detector_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/pose_landmarker/proto:pose_landmarks_detector_graph_options_jspb_proto", + "//mediapipe/tasks/web/components/containers:category", + "//mediapipe/tasks/web/components/containers:classification_result", + "//mediapipe/tasks/web/components/containers:landmark", + "//mediapipe/tasks/web/components/processors:classifier_result", + "//mediapipe/tasks/web/components/processors:landmark_result", + "//mediapipe/tasks/web/core", + "//mediapipe/tasks/web/vision/core:image_processing_options", + "//mediapipe/tasks/web/vision/core:vision_task_runner", + "//mediapipe/web/graph_runner:graph_runner_ts", + ], +) + +mediapipe_ts_declaration( + name = "holistic_landmarker_types", + srcs = [ + "holistic_landmarker_options.d.ts", + "holistic_landmarker_result.d.ts", + ], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/web/components/containers:category", + "//mediapipe/tasks/web/components/containers:classification_result", + "//mediapipe/tasks/web/components/containers:landmark", + "//mediapipe/tasks/web/components/containers:matrix", + "//mediapipe/tasks/web/core", + "//mediapipe/tasks/web/vision/core:mask", + "//mediapipe/tasks/web/vision/core:vision_task_options", + ], +) + +mediapipe_ts_library( + name = "holistic_landmarker_test_lib", + testonly = True, + srcs = [ + "holistic_landmarker_test.ts", + ], + deps = [ + ":holistic_landmarker", + ":holistic_landmarker_types", + "//mediapipe/framework:calculator_jspb_proto", + "//mediapipe/framework/formats:classification_jspb_proto", + "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_jspb_proto", + "//mediapipe/tasks/web/components/processors:landmark_result", + "//mediapipe/tasks/web/core", + "//mediapipe/tasks/web/core:task_runner_test_utils", + "//mediapipe/tasks/web/vision/core:vision_task_runner", + ], +) + +jasmine_node_test( + name = "holistic_landmarker_test", + tags = ["nomsan"], + deps = [":holistic_landmarker_test_lib"], +) diff --git a/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker.ts b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker.ts new file mode 100644 index 0000000000..a9054bed5e --- /dev/null +++ b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker.ts @@ -0,0 +1,642 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {Any} from 'google-protobuf/google/protobuf/any_pb'; +import {CalculatorGraphConfig} from '../../../../framework/calculator_pb'; +import {CalculatorOptions} from '../../../../framework/calculator_options_pb'; +import {ClassificationList as ClassificationListProto} from '../../../../framework/formats/classification_pb'; +import {LandmarkList, NormalizedLandmarkList} from '../../../../framework/formats/landmark_pb'; +import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb'; +import {FaceDetectorGraphOptions} from '../../../../tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb'; +import {FaceLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb'; +import {HandLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb'; +import {HandRoiRefinementGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb'; +import {HolisticLandmarkerGraphOptions} from '../../../../tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb'; +import {PoseDetectorGraphOptions} from '../../../../tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb'; +import {PoseLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb'; +import {Classifications} from '../../../../tasks/web/components/containers/classification_result'; +import {Landmark, NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark'; +import {convertFromClassifications} from '../../../../tasks/web/components/processors/classifier_result'; +import {convertToLandmarks, convertToWorldLandmarks} from '../../../../tasks/web/components/processors/landmark_result'; +import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset'; +import {ImageProcessingOptions} from '../../../../tasks/web/vision/core/image_processing_options'; +import {VisionGraphRunner, VisionTaskRunner} from '../../../../tasks/web/vision/core/vision_task_runner'; +import {ImageSource, WasmModule} from '../../../../web/graph_runner/graph_runner'; +// Placeholder for internal dependency on trusted resource url + +import {HolisticLandmarkerOptions} from './holistic_landmarker_options'; +import {HolisticLandmarkerResult} from './holistic_landmarker_result'; + +export * from './holistic_landmarker_options'; +export * from './holistic_landmarker_result'; +export {ImageSource}; + +// The OSS JS API does not support the builder pattern. +// tslint:disable:jspb-use-builder-pattern + +const IMAGE_STREAM = 'input_frames_image'; + +const POSE_LANDMARKS_STREAM = 'pose_landmarks'; +const POSE_WORLD_LANDMARKS_STREAM = 'pose_world_landmarks'; +const POSE_SEGMENTATION_MASK_STREAM = 'pose_segmentation_mask'; +const FACE_LANDMARKS_STREAM = 'face_landmarks'; +const FACE_BLENDSHAPES_STREAM = 'extra_blendshapes'; +const LEFT_HAND_LANDMARKS_STREAM = 'left_hand_landmarks'; +const LEFT_HAND_WORLD_LANDMARKS_STREAM = 'left_hand_world_landmarks'; +const RIGHT_HAND_LANDMARKS_STREAM = 'right_hand_landmarks'; +const RIGHT_HAND_WORLD_LANDMARKS_STREAM = 'right_hand_world_landmarks'; + +const HOLISTIC_LANDMARKER_GRAPH = + 'mediapipe.tasks.vision.holistic_landmarker.HolisticLandmarkerGraph'; + +const DEFAULT_SUPRESSION_THRESHOLD = 0.3; +const DEFAULT_SCORE_THRESHOLD = 0.5; + +/** + * A callback that receives the result from the holistic landmarker detection. + * The returned result are only valid for the duration of the callback. If + * asynchronous processing is needed, the masks need to be copied before the + * callback returns. + */ +export type HolisticLandmarkerCallback = (result: HolisticLandmarkerResult) => + void; + +/** Performs holistic landmarks detection on images. */ +export class HolisticLandmarker extends VisionTaskRunner { + private result: HolisticLandmarkerResult = { + faceLandmarks: [], + faceBlendshapes: [], + poseLandmarks: [], + poseWorldLandmarks: [], + poseSegmentationMasks: [], + leftHandLandmarks: [], + leftHandWorldLandmarks: [], + rightHandLandmarks: [], + rightHandWorldLandmarks: [] + }; + private outputFaceBlendshapes = false; + private outputPoseSegmentationMasks = false; + private userCallback?: HolisticLandmarkerCallback; + + private readonly options: HolisticLandmarkerGraphOptions; + private readonly handLandmarksDetectorGraphOptions: + HandLandmarksDetectorGraphOptions; + private readonly handRoiRefinementGraphOptions: HandRoiRefinementGraphOptions; + private readonly faceDetectorGraphOptions: FaceDetectorGraphOptions; + private readonly faceLandmarksDetectorGraphOptions: + FaceLandmarksDetectorGraphOptions; + private readonly poseDetectorGraphOptions: PoseDetectorGraphOptions; + private readonly poseLandmarksDetectorGraphOptions: + PoseLandmarksDetectorGraphOptions; + + /** + * Initializes the Wasm runtime and creates a new `HolisticLandmarker` from + * the provided options. + * @export + * @param wasmFileset A configuration object that provides the location of the + * Wasm binary and its loader. + * @param holisticLandmarkerOptions The options for the HolisticLandmarker. + * Note that either a path to the model asset or a model buffer needs to + * be provided (via `baseOptions`). + */ + static createFromOptions( + wasmFileset: WasmFileset, + holisticLandmarkerOptions: HolisticLandmarkerOptions): + Promise { + return VisionTaskRunner.createVisionInstance( + HolisticLandmarker, wasmFileset, holisticLandmarkerOptions); + } + + /** + * Initializes the Wasm runtime and creates a new `HolisticLandmarker` based + * on the provided model asset buffer. + * @export + * @param wasmFileset A configuration object that provides the location of the + * Wasm binary and its loader. + * @param modelAssetBuffer A binary representation of the model. + */ + static createFromModelBuffer( + wasmFileset: WasmFileset, + modelAssetBuffer: Uint8Array): Promise { + return VisionTaskRunner.createVisionInstance( + HolisticLandmarker, wasmFileset, {baseOptions: {modelAssetBuffer}}); + } + + /** + * Initializes the Wasm runtime and creates a new `HolisticLandmarker` based + * on the path to the model asset. + * @export + * @param wasmFileset A configuration object that provides the location of the + * Wasm binary and its loader. + * @param modelAssetPath The path to the model asset. + */ + static createFromModelPath( + wasmFileset: WasmFileset, + modelAssetPath: string): Promise { + return VisionTaskRunner.createVisionInstance( + HolisticLandmarker, wasmFileset, {baseOptions: {modelAssetPath}}); + } + + /** @hideconstructor */ + constructor( + wasmModule: WasmModule, + glCanvas?: HTMLCanvasElement|OffscreenCanvas|null) { + super( + new VisionGraphRunner(wasmModule, glCanvas), IMAGE_STREAM, + /* normRectStream= */ null, /* roiAllowed= */ false); + + this.options = new HolisticLandmarkerGraphOptions(); + this.options.setBaseOptions(new BaseOptionsProto()); + this.handLandmarksDetectorGraphOptions = + new HandLandmarksDetectorGraphOptions(); + this.options.setHandLandmarksDetectorGraphOptions( + this.handLandmarksDetectorGraphOptions); + this.handRoiRefinementGraphOptions = new HandRoiRefinementGraphOptions(); + this.options.setHandRoiRefinementGraphOptions( + this.handRoiRefinementGraphOptions); + this.faceDetectorGraphOptions = new FaceDetectorGraphOptions(); + this.options.setFaceDetectorGraphOptions(this.faceDetectorGraphOptions); + this.faceLandmarksDetectorGraphOptions = + new FaceLandmarksDetectorGraphOptions(); + this.options.setFaceLandmarksDetectorGraphOptions( + this.faceLandmarksDetectorGraphOptions); + this.poseDetectorGraphOptions = new PoseDetectorGraphOptions(); + this.options.setPoseDetectorGraphOptions(this.poseDetectorGraphOptions); + this.poseLandmarksDetectorGraphOptions = + new PoseLandmarksDetectorGraphOptions(); + this.options.setPoseLandmarksDetectorGraphOptions( + this.poseLandmarksDetectorGraphOptions); + + this.initDefaults(); + } + + protected override get baseOptions(): BaseOptionsProto { + return this.options.getBaseOptions()!; + } + + protected override set baseOptions(proto: BaseOptionsProto) { + this.options.setBaseOptions(proto); + } + + /** + * Sets new options for this `HolisticLandmarker`. + * + * Calling `setOptions()` with a subset of options only affects those options. + * You can reset an option back to its default value by explicitly setting it + * to `undefined`. + * + * @export + * @param options The options for the holistic landmarker. + */ + override setOptions(options: HolisticLandmarkerOptions): Promise { + // Configure face detector options. + if ('minFaceDetectionConfidence' in options) { + this.faceDetectorGraphOptions.setMinDetectionConfidence( + options.minFaceDetectionConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + if ('minFaceSuppressionThreshold' in options) { + this.faceDetectorGraphOptions.setMinSuppressionThreshold( + options.minFaceSuppressionThreshold ?? DEFAULT_SUPRESSION_THRESHOLD); + } + + // Configure face landmark detector options. + if ('minFaceLandmarksConfidence' in options) { + this.faceLandmarksDetectorGraphOptions.setMinDetectionConfidence( + options.minFaceLandmarksConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + if ('outputFaceBlendshapes' in options) { + this.outputFaceBlendshapes = !!options.outputFaceBlendshapes; + } + + // Configure pose detector options. + if ('minPoseDetectionConfidence' in options) { + this.poseDetectorGraphOptions.setMinDetectionConfidence( + options.minPoseDetectionConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + if ('minPoseSuppressionThreshold' in options) { + this.poseDetectorGraphOptions.setMinSuppressionThreshold( + options.minPoseSuppressionThreshold ?? DEFAULT_SUPRESSION_THRESHOLD); + } + + // Configure pose landmark detector options. + if ('minPoseLandmarksConfidence' in options) { + this.poseLandmarksDetectorGraphOptions.setMinDetectionConfidence( + options.minPoseLandmarksConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + if ('outputPoseSegmentationMasks' in options) { + this.outputPoseSegmentationMasks = !!options.outputPoseSegmentationMasks; + } + + // Configure hand detector options. + if ('minHandLandmarksConfidence' in options) { + this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence( + options.minHandLandmarksConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + return this.applyOptions(options); + } + + /** + * Performs holistic landmarks detection on the provided single image and + * invokes the callback with the response. The method returns synchronously + * once the callback returns. Only use this method when the HolisticLandmarker + * is created with running mode `image`. + * + * @export + * @param image An image to process. + * @param callback The callback that is invoked with the result. The + * lifetime of the returned masks is only guaranteed for the duration of + * the callback. + */ + detect(image: ImageSource, callback: HolisticLandmarkerCallback): void; + /** + * Performs holistic landmarks detection on the provided single image and + * invokes the callback with the response. The method returns synchronously + * once the callback returns. Only use this method when the HolisticLandmarker + * is created with running mode `image`. + * + * @export + * @param image An image to process. + * @param imageProcessingOptions the `ImageProcessingOptions` specifying how + * to process the input image before running inference. + * @param callback The callback that is invoked with the result. The + * lifetime of the returned masks is only guaranteed for the duration of + * the callback. + */ + detect( + image: ImageSource, imageProcessingOptions: ImageProcessingOptions, + callback: HolisticLandmarkerCallback): void; + /** + * Performs holistic landmarks detection on the provided single image and + * waits synchronously for the response. This method creates a copy of the + * resulting masks and should not be used in high-throughput applications. + * Only use this method when the HolisticLandmarker is created with running + * mode `image`. + * + * @export + * @param image An image to process. + * @return The landmarker result. Any masks are copied to avoid lifetime + * limits. + * @return The detected pose landmarks. + */ + detect(image: ImageSource): HolisticLandmarkerResult; + /** + * Performs holistic landmarks detection on the provided single image and + * waits synchronously for the response. This method creates a copy of the + * resulting masks and should not be used in high-throughput applications. + * Only use this method when the HolisticLandmarker is created with running + * mode `image`. + * + * @export + * @param image An image to process. + * @return The landmarker result. Any masks are copied to avoid lifetime + * limits. + * @return The detected pose landmarks. + */ + detect(image: ImageSource, imageProcessingOptions: ImageProcessingOptions): + HolisticLandmarkerResult; + /** @export */ + detect( + image: ImageSource, + imageProcessingOptionsOrCallback?: ImageProcessingOptions| + HolisticLandmarkerCallback, + callback?: HolisticLandmarkerCallback): HolisticLandmarkerResult|void { + const imageProcessingOptions = + typeof imageProcessingOptionsOrCallback !== 'function' ? + imageProcessingOptionsOrCallback : + {}; + this.userCallback = typeof imageProcessingOptionsOrCallback === 'function' ? + imageProcessingOptionsOrCallback : + callback!; + + this.resetResults(); + this.processImageData(image, imageProcessingOptions); + return this.processResults(); + } + + /** + * Performs holistic landmarks detection on the provided video frame and + * invokes the callback with the response. The method returns synchronously + * once the callback returns. Only use this method when the HolisticLandmarker + * is created with running mode `video`. + * + * @export + * @param videoFrame A video frame to process. + * @param timestamp The timestamp of the current frame, in ms. + * @param callback The callback that is invoked with the result. The + * lifetime of the returned masks is only guaranteed for the duration of + * the callback. + */ + detectForVideo( + videoFrame: ImageSource, timestamp: number, + callback: HolisticLandmarkerCallback): void; + /** + * Performs holistic landmarks detection on the provided video frame and + * invokes the callback with the response. The method returns synchronously + * once the callback returns. Only use this method when the holisticLandmarker + * is created with running mode `video`. + * + * @export + * @param videoFrame A video frame to process. + * @param timestamp The timestamp of the current frame, in ms. + * @param imageProcessingOptions the `ImageProcessingOptions` specifying how + * to process the input image before running inference. + * @param callback The callback that is invoked with the result. The + * lifetime of the returned masks is only guaranteed for the duration of + * the callback. + */ + detectForVideo( + videoFrame: ImageSource, timestamp: number, + imageProcessingOptions: ImageProcessingOptions, + callback: HolisticLandmarkerCallback): void; + /** + * Performs holistic landmarks detection on the provided video frame and + * returns the result. This method creates a copy of the resulting masks and + * should not be used in high-throughput applications. Only use this method + * when the HolisticLandmarker is created with running mode `video`. + * + * @export + * @param videoFrame A video frame to process. + * @param timestamp The timestamp of the current frame, in ms. + * @return The landmarker result. Any masks are copied to extend the + * lifetime of the returned data. + */ + detectForVideo(videoFrame: ImageSource, timestamp: number): + HolisticLandmarkerResult; + /** + * Performs holistic landmarks detection on the provided video frame and waits + * synchronously for the response. Only use this method when the + * HolisticLandmarker is created with running mode `video`. + * + * @export + * @param videoFrame A video frame to process. + * @param timestamp The timestamp of the current frame, in ms. + * @param imageProcessingOptions the `ImageProcessingOptions` specifying how + * to process the input image before running inference. + * @return The detected holistic landmarks. + */ + detectForVideo( + videoFrame: ImageSource, timestamp: number, + imageProcessingOptions: ImageProcessingOptions): HolisticLandmarkerResult; + /** @export */ + detectForVideo( + videoFrame: ImageSource, timestamp: number, + imageProcessingOptionsOrCallback?: ImageProcessingOptions| + HolisticLandmarkerCallback, + callback?: HolisticLandmarkerCallback): HolisticLandmarkerResult|void { + const imageProcessingOptions = + typeof imageProcessingOptionsOrCallback !== 'function' ? + imageProcessingOptionsOrCallback : + {}; + this.userCallback = typeof imageProcessingOptionsOrCallback === 'function' ? + imageProcessingOptionsOrCallback : + callback; + + this.resetResults(); + this.processVideoData(videoFrame, imageProcessingOptions, timestamp); + return this.processResults(); + } + + private resetResults(): void { + this.result = { + faceLandmarks: [], + faceBlendshapes: [], + poseLandmarks: [], + poseWorldLandmarks: [], + poseSegmentationMasks: [], + leftHandLandmarks: [], + leftHandWorldLandmarks: [], + rightHandLandmarks: [], + rightHandWorldLandmarks: [] + }; + } + + private processResults(): HolisticLandmarkerResult|void { + try { + if (this.userCallback) { + this.userCallback(this.result); + } else { + return this.result; + } + } finally { + // Free the image memory, now that we've finished our callback. + this.freeKeepaliveStreams(); + } + } + + /** Sets the default values for the graph. */ + private initDefaults(): void { + this.faceDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + this.faceDetectorGraphOptions.setMinSuppressionThreshold( + DEFAULT_SUPRESSION_THRESHOLD); + + this.faceLandmarksDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + + this.poseDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + this.poseDetectorGraphOptions.setMinSuppressionThreshold( + DEFAULT_SUPRESSION_THRESHOLD); + + this.poseLandmarksDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + + this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + } + + /** Converts raw data into a landmark, and adds it to our landmarks list. */ + private addJsLandmarks(data: Uint8Array, outputList: NormalizedLandmark[][]): + void { + const landmarksProto = NormalizedLandmarkList.deserializeBinary(data); + outputList.push(convertToLandmarks(landmarksProto)); + } + + /** + * Converts raw data into a world landmark, and adds it to our worldLandmarks + * list. + */ + private addJsWorldLandmarks(data: Uint8Array, outputList: Landmark[][]): + void { + const worldLandmarksProto = LandmarkList.deserializeBinary(data); + outputList.push(convertToWorldLandmarks(worldLandmarksProto)); + } + + /** Adds new blendshapes from the given proto. */ + private addBlenshape(data: Uint8Array, outputList: Classifications[]): void { + if (!this.outputFaceBlendshapes) { + return; + } + const classificationList = ClassificationListProto.deserializeBinary(data); + outputList.push(convertFromClassifications( + classificationList.getClassificationList() ?? [])); + } + + /** Updates the MediaPipe graph configuration. */ + protected override refreshGraph(): void { + const graphConfig = new CalculatorGraphConfig(); + + graphConfig.addInputStream(IMAGE_STREAM); + graphConfig.addOutputStream(POSE_LANDMARKS_STREAM); + graphConfig.addOutputStream(POSE_WORLD_LANDMARKS_STREAM); + graphConfig.addOutputStream(FACE_LANDMARKS_STREAM); + graphConfig.addOutputStream(LEFT_HAND_LANDMARKS_STREAM); + graphConfig.addOutputStream(LEFT_HAND_WORLD_LANDMARKS_STREAM); + graphConfig.addOutputStream(RIGHT_HAND_LANDMARKS_STREAM); + graphConfig.addOutputStream(RIGHT_HAND_WORLD_LANDMARKS_STREAM); + + const calculatorOptions = new CalculatorOptions(); + const optionsProto = new Any(); + optionsProto.setTypeUrl( + 'type.googleapis.com/mediapipe.tasks.vision.holistic_landmarker.proto.HolisticLandmarkerGraphOptions'); + optionsProto.setValue(this.options.serializeBinary()); + + const landmarkerNode = new CalculatorGraphConfig.Node(); + landmarkerNode.setCalculator(HOLISTIC_LANDMARKER_GRAPH); + landmarkerNode.addNodeOptions(optionsProto); + + landmarkerNode.addInputStream('IMAGE:' + IMAGE_STREAM); + landmarkerNode.addOutputStream('POSE_LANDMARKS:' + POSE_LANDMARKS_STREAM); + landmarkerNode.addOutputStream( + 'POSE_WORLD_LANDMARKS:' + POSE_WORLD_LANDMARKS_STREAM); + landmarkerNode.addOutputStream('FACE_LANDMARKS:' + FACE_LANDMARKS_STREAM); + landmarkerNode.addOutputStream( + 'LEFT_HAND_LANDMARKS:' + LEFT_HAND_LANDMARKS_STREAM); + landmarkerNode.addOutputStream( + 'LEFT_HAND_WORLD_LANDMARKS:' + LEFT_HAND_WORLD_LANDMARKS_STREAM); + landmarkerNode.addOutputStream( + 'RIGHT_HAND_LANDMARKS:' + RIGHT_HAND_LANDMARKS_STREAM); + landmarkerNode.addOutputStream( + 'RIGHT_HAND_WORLD_LANDMARKS:' + RIGHT_HAND_WORLD_LANDMARKS_STREAM); + landmarkerNode.setOptions(calculatorOptions); + + graphConfig.addNode(landmarkerNode); + // We only need to keep alive the image stream, since the protos are being + // deep-copied anyways via serialization+deserialization. + this.addKeepaliveNode(graphConfig); + + this.graphRunner.attachProtoListener( + POSE_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsLandmarks(binaryProto, this.result.poseLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + POSE_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + this.graphRunner.attachProtoListener( + POSE_WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsWorldLandmarks(binaryProto, this.result.poseWorldLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + POSE_WORLD_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + if (this.outputPoseSegmentationMasks) { + landmarkerNode.addOutputStream( + 'POSE_SEGMENTATION_MASK:' + POSE_SEGMENTATION_MASK_STREAM); + this.keepStreamAlive(POSE_SEGMENTATION_MASK_STREAM); + + this.graphRunner.attachImageListener( + POSE_SEGMENTATION_MASK_STREAM, (mask, timestamp) => { + this.result.poseSegmentationMasks = [this.convertToMPMask( + mask, /* interpolateValues= */ true, + /* shouldCopyData= */ !this.userCallback)]; + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + POSE_SEGMENTATION_MASK_STREAM, timestamp => { + this.result.poseSegmentationMasks = []; + this.setLatestOutputTimestamp(timestamp); + }); + } + + this.graphRunner.attachProtoListener( + FACE_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsLandmarks(binaryProto, this.result.faceLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + FACE_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + if (this.outputFaceBlendshapes) { + graphConfig.addOutputStream(FACE_BLENDSHAPES_STREAM); + landmarkerNode.addOutputStream( + 'FACE_BLENDSHAPES:' + FACE_BLENDSHAPES_STREAM); + this.graphRunner.attachProtoListener( + FACE_BLENDSHAPES_STREAM, (binaryProto, timestamp) => { + this.addBlenshape(binaryProto, this.result.faceBlendshapes); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + FACE_BLENDSHAPES_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + } + + this.graphRunner.attachProtoListener( + LEFT_HAND_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsLandmarks(binaryProto, this.result.leftHandLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + LEFT_HAND_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + this.graphRunner.attachProtoListener( + LEFT_HAND_WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsWorldLandmarks( + binaryProto, this.result.leftHandWorldLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + LEFT_HAND_WORLD_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + this.graphRunner.attachProtoListener( + RIGHT_HAND_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsLandmarks(binaryProto, this.result.rightHandLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + RIGHT_HAND_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + this.graphRunner.attachProtoListener( + RIGHT_HAND_WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => { + this.addJsWorldLandmarks( + binaryProto, this.result.rightHandWorldLandmarks); + this.setLatestOutputTimestamp(timestamp); + }); + this.graphRunner.attachEmptyPacketListener( + RIGHT_HAND_WORLD_LANDMARKS_STREAM, timestamp => { + this.setLatestOutputTimestamp(timestamp); + }); + + const binaryGraph = graphConfig.serializeBinary(); + this.setGraph(new Uint8Array(binaryGraph), /* isBinary= */ true); + } +} + + diff --git a/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_options.d.ts b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_options.d.ts new file mode 100644 index 0000000000..5967b87466 --- /dev/null +++ b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_options.d.ts @@ -0,0 +1,71 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {VisionTaskOptions} from '../../../../tasks/web/vision/core/vision_task_options'; + +/** Options to configure the MediaPipe HolisticLandmarker Task */ +export declare interface HolisticLandmarkerOptions extends VisionTaskOptions { + /** + * The minimum confidence score for the face detection to be considered + * successful. Defaults to 0.5. + */ + minFaceDetectionConfidence?: number|undefined; + + /** + * The minimum non-maximum-suppression threshold for face detection to be + * considered overlapped. Defaults to 0.3. + */ + minFaceSuppressionThreshold?: number|undefined; + + /** + * The minimum confidence score of face presence score in the face landmarks + * detection. Defaults to 0.5. + */ + minFaceLandmarksConfidence?: number|undefined; + + /** + * Whether FaceLandmarker outputs face blendshapes classification. Face + * blendshapes are used for rendering the 3D face model. + */ + outputFaceBlendshapes?: boolean|undefined; + + /** + * The minimum confidence score for the pose detection to be considered + * successful. Defaults to 0.5. + */ + minPoseDetectionConfidence?: number|undefined; + + /** + * The minimum non-maximum-suppression threshold for pose detection to be + * considered overlapped. Defaults to 0.3. + */ + minPoseSuppressionThreshold?: number|undefined; + + /** + * The minimum confidence score of pose presence score in the pose landmarks + * detection. Defaults to 0.5. + */ + minPoseLandmarksConfidence?: number|undefined; + + /** Whether to output segmentation masks. Defaults to false. */ + outputPoseSegmentationMasks?: boolean|undefined; + + /** + * The minimum confidence score of hand presence score in the hand landmarks + * detection. Defaults to 0.5. + */ + minHandLandmarksConfidence?: number|undefined; +} diff --git a/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_result.d.ts b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_result.d.ts new file mode 100644 index 0000000000..d784501728 --- /dev/null +++ b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_result.d.ts @@ -0,0 +1,55 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {Category} from '../../../../tasks/web/components/containers/category'; +import {Classifications} from '../../../../tasks/web/components/containers/classification_result'; +import {Landmark, NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark'; +import {MPMask} from '../../../../tasks/web/vision/core/mask'; + +export {Category, Landmark, NormalizedLandmark}; + +/** + * Represents the holistic landmarks detection results generated by + * `HolisticLandmarker`. + */ +export declare interface HolisticLandmarkerResult { + /** Detected face landmarks in normalized image coordinates. */ + faceLandmarks: NormalizedLandmark[][]; + + /** Optional face blendshapes results. */ + faceBlendshapes: Classifications[]; + + /** Detected pose landmarks in normalized image coordinates. */ + poseLandmarks: NormalizedLandmark[][]; + + /** Pose landmarks in world coordinates of detected poses. */ + poseWorldLandmarks: Landmark[][]; + + /** Optional segmentation mask for the detected pose. */ + poseSegmentationMasks: MPMask[]; + + /** Left hand landmarks of detected left hands. */ + leftHandLandmarks: NormalizedLandmark[][]; + + /** Left hand landmarks in world coordinates of detected left hands. */ + leftHandWorldLandmarks: Landmark[][]; + + /** Right hand landmarks of detected right hands. */ + rightHandLandmarks: NormalizedLandmark[][]; + + /** Right hand landmarks in world coordinates of detected right hands. */ + rightHandWorldLandmarks: Landmark[][]; +} diff --git a/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_test.ts b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_test.ts new file mode 100644 index 0000000000..bef67106f4 --- /dev/null +++ b/mediapipe/tasks/web/vision/holistic_landmarker/holistic_landmarker_test.ts @@ -0,0 +1,403 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import 'jasmine'; + +import {CalculatorGraphConfig} from '../../../../framework/calculator_pb'; +import {Classification, ClassificationList} from '../../../../framework/formats/classification_pb'; +import {HolisticLandmarkerGraphOptions} from '../../../../tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb'; +import {createLandmarks, createWorldLandmarks} from '../../../../tasks/web/components/processors/landmark_result_test_lib'; +import {addJasmineCustomFloatEqualityTester, createSpyWasmModule, Deserializer, MediapipeTasksFake, SpyWasmModule, verifyGraph, verifyListenersRegistered} from '../../../../tasks/web/core/task_runner_test_utils'; +import {VisionGraphRunner} from '../../../../tasks/web/vision/core/vision_task_runner'; + +import {HolisticLandmarker} from './holistic_landmarker'; +import {HolisticLandmarkerOptions} from './holistic_landmarker_options'; + + +// The OSS JS API does not support the builder pattern. +// tslint:disable:jspb-use-builder-pattern + +type ProtoListener = ((binaryProtos: Uint8Array, timestamp: number) => void); +const holisticLandmarkerDeserializer = + (binaryProto => + HolisticLandmarkerGraphOptions.deserializeBinary(binaryProto) + .toObject()) as Deserializer; + +function createBlendshapes(): ClassificationList { + const blendshapesProto = new ClassificationList(); + const classification = new Classification(); + classification.setScore(0.1); + classification.setIndex(1); + classification.setLabel('face_label'); + classification.setDisplayName('face_display_name'); + blendshapesProto.addClassification(classification); + return blendshapesProto; +} + +class HolisticLandmarkerFake extends HolisticLandmarker implements + MediapipeTasksFake { + calculatorName = + 'mediapipe.tasks.vision.holistic_landmarker.HolisticLandmarkerGraph'; + attachListenerSpies: jasmine.Spy[] = []; + graph: CalculatorGraphConfig|undefined; + fakeWasmModule: SpyWasmModule; + listeners = new Map(); + + constructor() { + super(createSpyWasmModule(), /* glCanvas= */ null); + this.fakeWasmModule = + this.graphRunner.wasmModule as unknown as SpyWasmModule; + + this.attachListenerSpies[0] = + spyOn(this.graphRunner, 'attachProtoListener') + .and.callFake((stream, listener) => { + expect(stream).toMatch( + /(pose_landmarks|pose_world_landmarks|pose_segmentation_mask|face_landmarks|extra_blendshapes|left_hand_landmarks|left_hand_world_landmarks|right_hand_landmarks|right_hand_world_landmarks)/); + this.listeners.set(stream, listener); + }); + + spyOn(this.graphRunner, 'setGraph').and.callFake(binaryGraph => { + this.graph = CalculatorGraphConfig.deserializeBinary(binaryGraph); + }); + spyOn(this.graphRunner, 'addGpuBufferAsImageToStream'); + spyOn(this.graphRunner, 'addProtoToStream'); + } + + getGraphRunner(): VisionGraphRunner { + return this.graphRunner; + } +} + +describe('HolisticLandmarker', () => { + let holisticLandmarker: HolisticLandmarkerFake; + + beforeEach(async () => { + addJasmineCustomFloatEqualityTester(); + holisticLandmarker = new HolisticLandmarkerFake(); + await holisticLandmarker.setOptions( + {baseOptions: {modelAssetBuffer: new Uint8Array([])}}); + }); + + afterEach(() => { + holisticLandmarker.close(); + }); + + it('initializes graph', async () => { + verifyGraph(holisticLandmarker); + verifyGraph( + holisticLandmarker, undefined, undefined, + holisticLandmarkerDeserializer); + }); + + it('reloads graph when settings are changed', async () => { + verifyListenersRegistered(holisticLandmarker); + + await holisticLandmarker.setOptions({minFaceDetectionConfidence: 0.6}); + verifyGraph( + holisticLandmarker, + [['faceDetectorGraphOptions', 'minDetectionConfidence'], 0.6], + undefined, holisticLandmarkerDeserializer); + verifyListenersRegistered(holisticLandmarker); + + await holisticLandmarker.setOptions({minFaceDetectionConfidence: 0.7}); + verifyGraph( + holisticLandmarker, + [['faceDetectorGraphOptions', 'minDetectionConfidence'], 0.7], + undefined, holisticLandmarkerDeserializer); + verifyListenersRegistered(holisticLandmarker); + }); + + it('merges options', async () => { + await holisticLandmarker.setOptions({minFaceDetectionConfidence: 0.5}); + await holisticLandmarker.setOptions({minFaceSuppressionThreshold: 0.5}); + await holisticLandmarker.setOptions({minFaceLandmarksConfidence: 0.5}); + await holisticLandmarker.setOptions({minPoseDetectionConfidence: 0.5}); + await holisticLandmarker.setOptions({minPoseSuppressionThreshold: 0.5}); + await holisticLandmarker.setOptions({minPoseLandmarksConfidence: 0.5}); + await holisticLandmarker.setOptions({minHandLandmarksConfidence: 0.5}); + + verifyGraph( + holisticLandmarker, + [ + 'faceDetectorGraphOptions', { + baseOptions: undefined, + minDetectionConfidence: 0.5, + minSuppressionThreshold: 0.5, + numFaces: undefined + } + ], + undefined, holisticLandmarkerDeserializer); + verifyGraph( + holisticLandmarker, + [ + 'faceLandmarksDetectorGraphOptions', { + baseOptions: undefined, + minDetectionConfidence: 0.5, + smoothLandmarks: undefined, + faceBlendshapesGraphOptions: undefined + } + ], + undefined, holisticLandmarkerDeserializer); + verifyGraph( + holisticLandmarker, + [ + 'poseDetectorGraphOptions', { + baseOptions: undefined, + minDetectionConfidence: 0.5, + minSuppressionThreshold: 0.5, + numPoses: undefined + } + ], + undefined, holisticLandmarkerDeserializer); + verifyGraph( + holisticLandmarker, + [ + 'poseLandmarksDetectorGraphOptions', { + baseOptions: undefined, + minDetectionConfidence: 0.5, + smoothLandmarks: undefined + } + ], + undefined, holisticLandmarkerDeserializer); + verifyGraph( + holisticLandmarker, + [ + 'handLandmarksDetectorGraphOptions', + {baseOptions: undefined, minDetectionConfidence: 0.5} + ], + undefined, holisticLandmarkerDeserializer); + }); + + describe('setOptions()', () => { + interface TestCase { + optionPath: [keyof HolisticLandmarkerOptions, ...string[]]; + fieldPath: string[]; + customValue: unknown; + defaultValue: unknown; + } + + const testCases: TestCase[] = [ + { + optionPath: ['minFaceDetectionConfidence'], + fieldPath: ['faceDetectorGraphOptions', 'minDetectionConfidence'], + customValue: 0.1, + defaultValue: 0.5 + }, + { + optionPath: ['minFaceSuppressionThreshold'], + fieldPath: ['faceDetectorGraphOptions', 'minSuppressionThreshold'], + customValue: 0.2, + defaultValue: 0.3 + }, + { + optionPath: ['minFaceLandmarksConfidence'], + fieldPath: + ['faceLandmarksDetectorGraphOptions', 'minDetectionConfidence'], + customValue: 0.2, + defaultValue: 0.5 + }, + { + optionPath: ['minPoseDetectionConfidence'], + fieldPath: ['poseDetectorGraphOptions', 'minDetectionConfidence'], + customValue: 0.1, + defaultValue: 0.5 + }, + { + optionPath: ['minPoseSuppressionThreshold'], + fieldPath: ['poseDetectorGraphOptions', 'minSuppressionThreshold'], + customValue: 0.2, + defaultValue: 0.3 + }, + { + optionPath: ['minPoseLandmarksConfidence'], + fieldPath: + ['poseLandmarksDetectorGraphOptions', 'minDetectionConfidence'], + customValue: 0.2, + defaultValue: 0.5 + }, + { + optionPath: ['minHandLandmarksConfidence'], + fieldPath: + ['handLandmarksDetectorGraphOptions', 'minDetectionConfidence'], + customValue: 0.1, + defaultValue: 0.5 + }, + ]; + + /** Creates an options object that can be passed to setOptions() */ + function createOptions( + path: string[], value: unknown): HolisticLandmarkerOptions { + const options: Record = {}; + let currentLevel = options; + for (const element of path.slice(0, -1)) { + currentLevel[element] = {}; + currentLevel = currentLevel[element] as Record; + } + currentLevel[path[path.length - 1]] = value; + return options; + } + + for (const testCase of testCases) { + it(`uses default value for ${testCase.optionPath[0]}`, async () => { + verifyGraph( + holisticLandmarker, [testCase.fieldPath, testCase.defaultValue], + undefined, holisticLandmarkerDeserializer); + }); + + it(`can set ${testCase.optionPath[0]}`, async () => { + await holisticLandmarker.setOptions( + createOptions(testCase.optionPath, testCase.customValue)); + verifyGraph( + holisticLandmarker, [testCase.fieldPath, testCase.customValue], + undefined, holisticLandmarkerDeserializer); + }); + + it(`can clear ${testCase.optionPath[0]}`, async () => { + await holisticLandmarker.setOptions( + createOptions(testCase.optionPath, testCase.customValue)); + verifyGraph( + holisticLandmarker, [testCase.fieldPath, testCase.customValue], + undefined, holisticLandmarkerDeserializer); + + await holisticLandmarker.setOptions( + createOptions(testCase.optionPath, undefined)); + verifyGraph( + holisticLandmarker, [testCase.fieldPath, testCase.defaultValue], + undefined, holisticLandmarkerDeserializer); + }); + } + }); + + it('supports outputFaceBlendshapes', async () => { + const stream = 'extra_blendshapes'; + await holisticLandmarker.setOptions({}); + expect(holisticLandmarker.graph!.getOutputStreamList()) + .not.toContain(stream); + + await holisticLandmarker.setOptions({outputFaceBlendshapes: false}); + expect(holisticLandmarker.graph!.getOutputStreamList()) + .not.toContain(stream); + + await holisticLandmarker.setOptions({outputFaceBlendshapes: true}); + expect(holisticLandmarker.graph!.getOutputStreamList()).toContain(stream); + }); + + it('transforms results', async () => { + const faceLandmarksProto = createLandmarks().serializeBinary(); + const blendshapesProto = createBlendshapes().serializeBinary(); + + const poseLandmarksProto = createLandmarks().serializeBinary(); + const poseWorldLandmarksProto = createWorldLandmarks().serializeBinary(); + + const leftHandLandmarksProto = createLandmarks().serializeBinary(); + const leftHandWorldLandmarksProto = + createWorldLandmarks().serializeBinary(); + const rightHandLandmarksProto = createLandmarks().serializeBinary(); + const rightHandWorldLandmarksProto = + createWorldLandmarks().serializeBinary(); + + await holisticLandmarker.setOptions( + {outputFaceBlendshapes: true, outputPoseSegmentationMasks: false}); + + // Pass the test data to our listener + holisticLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => { + verifyListenersRegistered(holisticLandmarker); + holisticLandmarker.listeners.get('face_landmarks')! + (faceLandmarksProto, 1337); + holisticLandmarker.listeners.get('extra_blendshapes')! + (blendshapesProto, 1337); + + holisticLandmarker.listeners.get('pose_landmarks')! + (poseLandmarksProto, 1337); + holisticLandmarker.listeners.get('pose_world_landmarks')! + (poseWorldLandmarksProto, 1337); + + holisticLandmarker.listeners.get('left_hand_landmarks')! + (leftHandLandmarksProto, 1337); + holisticLandmarker.listeners.get('left_hand_world_landmarks')! + (leftHandWorldLandmarksProto, 1337); + + holisticLandmarker.listeners.get('right_hand_landmarks')! + (rightHandLandmarksProto, 1337); + holisticLandmarker.listeners.get('right_hand_world_landmarks')! + (rightHandWorldLandmarksProto, 1337); + }); + + // Invoke the holistic landmarker + const landmarks = holisticLandmarker.detect({} as HTMLImageElement); + expect(holisticLandmarker.getGraphRunner().addGpuBufferAsImageToStream) + .toHaveBeenCalledTimes(1); + expect(holisticLandmarker.fakeWasmModule._waitUntilIdle).toHaveBeenCalled(); + + expect(landmarks).toEqual({ + faceLandmarks: [[{x: 0, y: 0, z: 0}]], + faceBlendshapes: [{ + categories: [{ + index: 1, + score: 0.1, + categoryName: 'face_label', + displayName: 'face_display_name' + }], + headIndex: -1, + headName: '' + }], + poseLandmarks: [[{x: 0, y: 0, z: 0}]], + poseWorldLandmarks: [[{x: 0, y: 0, z: 0}]], + poseSegmentationMasks: [], + leftHandLandmarks: [[{x: 0, y: 0, z: 0}]], + leftHandWorldLandmarks: [[{x: 0, y: 0, z: 0}]], + rightHandLandmarks: [[{x: 0, y: 0, z: 0}]], + rightHandWorldLandmarks: [[{x: 0, y: 0, z: 0}]] + }); + }); + + it('clears results between invoations', async () => { + const faceLandmarksProto = createLandmarks().serializeBinary(); + const poseLandmarksProto = createLandmarks().serializeBinary(); + const poseWorldLandmarksProto = createWorldLandmarks().serializeBinary(); + const leftHandLandmarksProto = createLandmarks().serializeBinary(); + const leftHandWorldLandmarksProto = + createWorldLandmarks().serializeBinary(); + const rightHandLandmarksProto = createLandmarks().serializeBinary(); + const rightHandWorldLandmarksProto = + createWorldLandmarks().serializeBinary(); + + // Pass the test data to our listener + holisticLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => { + holisticLandmarker.listeners.get('face_landmarks')! + (faceLandmarksProto, 1337); + holisticLandmarker.listeners.get('pose_landmarks')! + (poseLandmarksProto, 1337); + holisticLandmarker.listeners.get('pose_world_landmarks')! + (poseWorldLandmarksProto, 1337); + holisticLandmarker.listeners.get('left_hand_landmarks')! + (leftHandLandmarksProto, 1337); + holisticLandmarker.listeners.get('left_hand_world_landmarks')! + (leftHandWorldLandmarksProto, 1337); + holisticLandmarker.listeners.get('right_hand_landmarks')! + (rightHandLandmarksProto, 1337); + holisticLandmarker.listeners.get('right_hand_world_landmarks')! + (rightHandWorldLandmarksProto, 1337); + }); + + // Invoke the holistic landmarker twice + const landmarks1 = holisticLandmarker.detect({} as HTMLImageElement); + const landmarks2 = holisticLandmarker.detect({} as HTMLImageElement); + + // Verify that landmarks2 is not a concatenation of all previously returned + // hands. + expect(landmarks1).toEqual(landmarks2); + }); +}); diff --git a/mediapipe/tasks/web/vision/index.ts b/mediapipe/tasks/web/vision/index.ts index 52bafdd5f0..a07b7e92c6 100644 --- a/mediapipe/tasks/web/vision/index.ts +++ b/mediapipe/tasks/web/vision/index.ts @@ -23,6 +23,7 @@ import {FaceLandmarker as FaceLandmarkerImpl} from '../../../tasks/web/vision/fa import {FaceStylizer as FaceStylizerImpl} from '../../../tasks/web/vision/face_stylizer/face_stylizer'; import {GestureRecognizer as GestureRecognizerImpl} from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer'; import {HandLandmarker as HandLandmarkerImpl} from '../../../tasks/web/vision/hand_landmarker/hand_landmarker'; +import {HolisticLandmarker as HolisticLandmarkerImpl} from '../../../tasks/web/vision/holistic_landmarker/holistic_landmarker'; import {ImageClassifier as ImageClassifierImpl} from '../../../tasks/web/vision/image_classifier/image_classifier'; import {ImageEmbedder as ImageEmbedderImpl} from '../../../tasks/web/vision/image_embedder/image_embedder'; import {ImageSegmenter as ImageSegementerImpl} from '../../../tasks/web/vision/image_segmenter/image_segmenter'; @@ -41,6 +42,7 @@ const FaceLandmarker = FaceLandmarkerImpl; const FaceStylizer = FaceStylizerImpl; const GestureRecognizer = GestureRecognizerImpl; const HandLandmarker = HandLandmarkerImpl; +const HolisticLandmarker = HolisticLandmarkerImpl; const ImageClassifier = ImageClassifierImpl; const ImageEmbedder = ImageEmbedderImpl; const ImageSegmenter = ImageSegementerImpl; @@ -58,6 +60,7 @@ export { FaceStylizer, GestureRecognizer, HandLandmarker, + HolisticLandmarker, ImageClassifier, ImageEmbedder, ImageSegmenter, diff --git a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts index 262966d727..c511d1cd45 100644 --- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts +++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts @@ -206,6 +206,7 @@ export class PoseLandmarker extends VisionTaskRunner { * callback returns. Only use this method when the PoseLandmarker is created * with running mode `image`. * + * @export * @param image An image to process. * @param callback The callback that is invoked with the result. The * lifetime of the returned masks is only guaranteed for the duration of @@ -218,6 +219,7 @@ export class PoseLandmarker extends VisionTaskRunner { * callback returns. Only use this method when the PoseLandmarker is created * with running mode `image`. * + * @export * @param image An image to process. * @param imageProcessingOptions the `ImageProcessingOptions` specifying how * to process the input image before running inference. @@ -235,6 +237,7 @@ export class PoseLandmarker extends VisionTaskRunner { * use this method when the PoseLandmarker is created with running mode * `image`. * + * @export * @param image An image to process. * @return The landmarker result. Any masks are copied to avoid lifetime * limits. @@ -248,6 +251,7 @@ export class PoseLandmarker extends VisionTaskRunner { * use this method when the PoseLandmarker is created with running mode * `image`. * + * @export * @param image An image to process. * @return The landmarker result. Any masks are copied to avoid lifetime * limits. @@ -280,6 +284,7 @@ export class PoseLandmarker extends VisionTaskRunner { * callback returns. Only use this method when the PoseLandmarker is created * with running mode `video`. * + * @export * @param videoFrame A video frame to process. * @param timestamp The timestamp of the current frame, in ms. * @param callback The callback that is invoked with the result. The @@ -295,6 +300,7 @@ export class PoseLandmarker extends VisionTaskRunner { * callback returns. Only use this method when the PoseLandmarker is created * with running mode `video`. * + * @export * @param videoFrame A video frame to process. * @param timestamp The timestamp of the current frame, in ms. * @param imageProcessingOptions the `ImageProcessingOptions` specifying how @@ -313,6 +319,7 @@ export class PoseLandmarker extends VisionTaskRunner { * in high-throughput applications. Only use this method when the * PoseLandmarker is created with running mode `video`. * + * @export * @param videoFrame A video frame to process. * @param timestamp The timestamp of the current frame, in ms. * @return The landmarker result. Any masks are copied to extend the @@ -327,6 +334,7 @@ export class PoseLandmarker extends VisionTaskRunner { * callback returns. Only use this method when the PoseLandmarker is created * with running mode `video`. * + * @export * @param videoFrame A video frame to process. * @param timestamp The timestamp of the current frame, in ms. * @param imageProcessingOptions the `ImageProcessingOptions` specifying how diff --git a/mediapipe/tasks/web/vision/types.ts b/mediapipe/tasks/web/vision/types.ts index 760b97b77d..b906b11af3 100644 --- a/mediapipe/tasks/web/vision/types.ts +++ b/mediapipe/tasks/web/vision/types.ts @@ -22,6 +22,7 @@ export * from '../../../tasks/web/vision/face_detector/face_detector'; export * from '../../../tasks/web/vision/face_landmarker/face_landmarker'; export * from '../../../tasks/web/vision/face_stylizer/face_stylizer'; export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer'; +export * from '../../../tasks/web/vision/holistic_landmarker/holistic_landmarker'; export * from '../../../tasks/web/vision/hand_landmarker/hand_landmarker'; export * from '../../../tasks/web/vision/image_classifier/image_classifier'; export * from '../../../tasks/web/vision/image_embedder/image_embedder';