From 14a74363bc3f4a72fec8f14d23303ca5d65480db Mon Sep 17 00:00:00 2001
From: Wei-Chen-hub <1259566226@qq.com>
Date: Wed, 6 Nov 2024 18:12:46 +0800
Subject: [PATCH] update signavatar 241106

---
 mmhuman3d/data/data_converters/signavatar.py | 444 +++++++++++++++++++
 tools/convert_datasets.py                    |   4 +
 2 files changed, 448 insertions(+)
 create mode 100644 mmhuman3d/data/data_converters/signavatar.py

diff --git a/mmhuman3d/data/data_converters/signavatar.py b/mmhuman3d/data/data_converters/signavatar.py
new file mode 100644
index 00000000..80b87814
--- /dev/null
+++ b/mmhuman3d/data/data_converters/signavatar.py
@@ -0,0 +1,444 @@
+import glob
+import json
+import os
+import pdb
+import random
+import pickle
+import time
+from typing import List
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+import torch
+# from scipy.spatial.distance import cdist
+
+# import mmcv
+# from mmhuman3d.models.body_models.builder import build_body_model
+# from mmhuman3d.core.conventions.keypoints_mapping import smplx
+from mmhuman3d.core.conventions.keypoints_mapping import (
+    convert_kps,
+    get_keypoint_idx,
+    get_keypoint_idxs_by_part,
+)
+from mmhuman3d.models.body_models.utils import batch_transform_to_camera_frame
+from mmhuman3d.models.body_models.utils import transform_to_camera_frame
+from mmhuman3d.data.data_structures.human_data import HumanData
+from .base_converter import BaseModeConverter
+from .builder import DATA_CONVERTERS
+from mmhuman3d.models.body_models.builder import build_body_model
+from mmhuman3d.core.cameras import build_cameras
+
+@DATA_CONVERTERS.register_module()
+class SignAvatarConverter(BaseModeConverter):
+    """Synbody dataset."""
+    ACCEPTED_MODES = ['lan2m', 'ham2m', 'word2m']
+
+    def __init__(self, modes: List = []) -> None:
+
+        self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
+        self.misc_config = dict(
+            bbox_body_scale=1.2,
+            bbox_facehand_scale=1.0,
+            bbox_source='keypoints2d_original',
+            flat_hand_mean=False,
+            cam_param_type='prespective',
+            cam_param_source='original',
+            smplx_source='original',
+            # contact_label=['part_segmentation', 'contact_region'],
+            # part_segmentation=['left_foot', 'right_foot'],
+        )
+
+        self.smplx_shape = {
+            'betas': (-1, 10),
+            'transl': (-1, 3),
+            'global_orient': (-1, 3),
+            'body_pose': (-1, 21, 3),
+            'left_hand_pose': (-1, 15, 3),
+            'right_hand_pose': (-1, 15, 3),
+            'leye_pose': (-1, 3),
+            'reye_pose': (-1, 3),
+            'jaw_pose': (-1, 3),
+            'expression': (-1, 10)
+        }
+
+        super(SignAvatarConverter, self).__init__(modes)
+        
+    def split_video(self, video_path: str, annot_len: int = 0) -> bool:
+        # split video into 06d frames using ffmpeg
+
+        # create folder to store frames
+        frame_folder = video_path.replace('.mp4', '').replace('videos', 'images')
+        os.makedirs(frame_folder, exist_ok=True)
+        
+        existing_files = glob.glob(os.path.join(frame_folder, '*.jpg'))
+        if annot_len != 0:
+            if len(existing_files) == annot_len:
+                return True
+        
+        # split video into frames
+        os.system(f'ffmpeg -loglevel quiet -i {video_path} {frame_folder}/%06d.jpg')
+        
+        # check for number of frames
+        frame_files = glob.glob(os.path.join(frame_folder, '*.jpg'))
+        if annot_len != 0:
+            if len(frame_files) == annot_len:
+                return True
+            else:
+                return False
+        
+    def _keypoints_to_scaled_bbox_fh(self,
+                                     keypoints,
+                                     occ=None,
+                                     scale=1.0,
+                                     convention='smplx'):
+        '''Obtain scaled bbox in xyxy format given keypoints
+        Args:
+            keypoints (np.ndarray): Keypoints
+            scale (float): Bounding Box scale
+
+        Returns:
+            bbox_xyxy (np.ndarray): Bounding box in xyxy format
+        '''
+        bboxs = []
+        for body_part in ['head', 'left_hand', 'right_hand']:
+            kp_id = get_keypoint_idxs_by_part(body_part, convention=convention)
+
+            # keypoints_factory=smplx.SMPLX_KEYPOINTS)
+            kps = keypoints[kp_id]
+
+            if occ == None:
+                conf = 1
+            else:
+                occ_p = occ[kp_id]
+
+                if np.sum(occ_p) / len(kp_id) >= 0.1:
+                    conf = 0
+                    # print(f'{body_part} occluded, occlusion: {np.sum(occ_p) / len(kp_id)}, skip')
+                else:
+                    # print(f'{body_part} good, {np.sum(self_occ_p + occ_p) / len(kp_id)}')
+                    conf = 1
+
+            xmin, ymin = np.amin(kps, axis=0)
+            xmax, ymax = np.amax(kps, axis=0)
+
+            width = (xmax - xmin) * scale
+            height = (ymax - ymin) * scale
+
+            x_center = 0.5 * (xmax + xmin)
+            y_center = 0.5 * (ymax + ymin)
+            xmin = x_center - 0.5 * width
+            xmax = x_center + 0.5 * width
+            ymin = y_center - 0.5 * height
+            ymax = y_center + 0.5 * height
+
+            bbox = np.stack([xmin, ymin, xmax, ymax, conf],
+                            axis=0).astype(np.float32)
+
+            bboxs.append(bbox)
+        return bboxs[0], bboxs[1], bboxs[2]
+        
+    def convert_by_mode(self, dataset_path: str, out_path: str,
+                        mode: str) -> dict:
+        """
+        Args:
+            dataset_path (str): Path to directory where raw images and
+            annotations are stored.
+            out_path (str): Path to directory to save preprocessed npz file
+            mode (str): Mode in accepted modes
+
+        Returns:
+            dict:
+                A dict containing keys image_path, bbox_xywh, keypoints2d,
+                keypoints2d_mask, keypoints3d, keypoints3d_mask, cam_param
+                stored in HumanData() format
+        """
+        
+        assert mode == 'lan2m'
+        
+        base_folder_dict = {'lan2m': 'language2motion'}
+        base_folder = base_folder_dict[mode]
+        
+        # load annotations
+        annot_base_folder = os.path.join(dataset_path, base_folder, 'annotations')
+        annot_files = glob.glob(os.path.join(annot_base_folder, '*.pkl'))
+        
+        # bulid smplx model
+        gendered_smplx = {}
+        for gender in ['male', 'female', 'neutral']:
+            gendered_smplx[gender] = build_body_model(
+                dict(
+                    type='SMPLX',
+                    keypoint_src='smplx',
+                    keypoint_dst='smplx',
+                    model_path='data/body_models/smplx',
+                    gender=gender,
+                    num_betas=10,
+                    use_face_contour=True,
+                    flat_hand_mean=self.misc_config['flat_hand_mean'],
+                    use_pca=False,
+                    batch_size=1)).to(self.device)
+            
+        # init seed and size
+        seed, size = '241106', '99999'
+        size_i = min(int(size), len(annot_files))
+        random.seed(int(seed))
+        np.set_printoptions(suppress=True)
+        random_ids = np.random.RandomState(seed=int(seed)).permutation(999999)
+        used_id_num = 0
+        
+        print('Total sequences:', len(annot_files))
+        
+        # use HumanData to store all data
+        human_data = HumanData()
+    
+        # initialize output for human_data
+        smplx_ = {}
+        for key in self.smplx_shape.keys():
+            smplx_[key] = []
+        keypoints2d_, keypoints3d_ = [], []
+        bboxs_ = {}
+        for bbox_name in [
+                'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh',
+                'rhand_bbox_xywh'
+        ]:
+            bboxs_[bbox_name] = []
+        meta_ = {}
+        for meta_name in ['principal_point', 'focal_length', 'height', 'width', 'gender',
+                        'sequence_name', 'left_hand_valid', 'right_hand_valid']:
+            meta_[meta_name] = []
+        image_path_ = []
+            
+        annot_files = annot_files[:size_i]
+        
+        # for annot_path in tqdm(annot_files, desc=f'Splitting {mode}', 
+        #                        leave=False, position=0):
+        #     vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4')
+        #     self.split_video(vid_path)
+            
+        # from concurrent.futures import ThreadPoolExecutor, as_completed
+        # from tqdm import tqdm
+
+        # # 使用线程池并行处理视频分割
+        # with ThreadPoolExecutor(max_workers=16) as executor:
+        #     futures = [
+        #         executor.submit(self.split_video, annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4'))
+        #         for annot_path in annot_files
+        #     ]
+
+        #     # 使用 tqdm 追踪任务进度
+        #     for future in tqdm(as_completed(futures), desc=f'Splitting {mode}', leave=False, position=0, total=len(annot_files)):
+        #         # try:
+        #         future.result()  # 捕获异常并确保进度条准确
+        #         # except Exception as e:
+        #         #     print(f"Error processing file: {e}")
+        
+        # test_seqs = ['_20g7MG8K1U_3-8-rgb_front', '_Dh512GX6d8_14-8-rgb_front', 
+        #              '00kppw3aqus_11-3-rgb_front']
+        # annot_files = [f'{annot_base_folder}/{seq}.pkl' for seq in test_seqs]
+            
+        for annot_path in tqdm(annot_files, desc=f'Converting {mode}', 
+                               leave=False, position=0):
+            
+            # load annot pickle
+            annot_seq = np.load(annot_path, allow_pickle=True)
+            # for key in annot_seq.keys():
+            #     print(key, annot_seq[key].shape)
+            vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4')
+            frame_folder = vid_path.replace('.mp4', '').replace('videos', 'images')
+
+            annot_len = annot_seq['smplx'].shape[0]
+            split_success = self.split_video(vid_path, annot_len)
+            if not split_success:
+                pdb.set_trace()
+                continue
+            
+            smplx_seq = annot_seq['smplx'].copy()
+            gender = 'neutral'
+            smplx_param = {
+                'global_orient': smplx_seq[:, :3],
+                'body_pose': smplx_seq[:, 3:66],
+                'left_hand_pose': smplx_seq[:, 66:111],
+                'right_hand_pose': smplx_seq[:, 111:156],
+                'jaw_pose': smplx_seq[:, 156:159],
+                'betas': smplx_seq[:, 159:169],
+                'expression': smplx_seq[:, 169:179],
+                'transl': smplx_seq[:, 179:182]
+            }
+            for key in self.smplx_shape.keys():
+                if key in smplx_param.keys():
+                    smplx_param[key] = smplx_param[key].reshape(self.smplx_shape[key])
+                else:
+                    pad_shape = np.array(self.smplx_shape[key])
+                    pad_shape[0] = annot_len
+                    pad_shape = tuple(pad_shape)
+                    smplx_param[key] = np.zeros(pad_shape)
+
+            # prepare smplx tensor
+            smplx_param_tensor = {}
+            for key in self.smplx_shape.keys():
+                    smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]),
+                                                           dtype=torch.float).to(self.device)
+            
+            # ue2opencv = np.array([[-1.0, 0, 0, 0],
+            #         [0, -1, 0, 0],
+            #         [0, 0, 1, 0],
+            #         [0, 0, 0, 1]])
+            
+            # get output
+            output = gendered_smplx[gender](**smplx_param_tensor)
+            kps3d_c = output['joints']
+            # kps3d_c = output['joints'].detach().cpu().numpy()
+            # pelvis_world = kps3d_c[:, get_keypoint_idx('pelvis', 'smplx'), :]
+            
+            # # transform to cam space
+            # global_orient, transl = batch_transform_to_camera_frame(
+            #     global_orient=smplx_param['global_orient'].reshape(-1, 3),
+            #     transl=smplx_param['transl'].reshape(-1, 3),
+            #     pelvis=pelvis_world.reshape(-1, 3),
+            #     extrinsic=ue2opencv)
+            
+            # smplx_param['global_orient'] = global_orient
+            # smplx_param['transl'] = transl
+
+            # # prepare smplx tensor
+            # smplx_param_tensor = {}
+            # for key in self.smplx_shape.keys():
+            #         smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]),
+            #                                                dtype=torch.float).to(self.device)
+
+            # get image size
+            img_path = os.path.join(frame_folder, '000001.jpg')
+            img = cv2.imread(img_path)
+            height, width, _ = img.shape 
+            
+            for fid in tqdm(annot_seq['total_valid_index'], position=1, leave=False):
+                # get image path
+                img_p = os.path.join(frame_folder, f'{fid+1:06d}.jpg')
+                image_path = img_p.replace(dataset_path + '/', '')
+                if not os.path.exists(img_path):
+                    pdb.set_trace()
+
+                left_valid = annot_seq['left_valid'][fid].cpu().item()
+                right_valid = annot_seq['right_valid'][fid].cpu().item()
+                # smplx_valid = True if str(fid) in annot_seq['total_valid_index'] else False
+                
+                focal_length = list(annot_seq['focal'][fid])
+                principal_point = list(annot_seq['princpt'][fid])
+                
+                camera = build_cameras(
+                    dict(
+                        type='PerspectiveCameras',
+                        convention='opencv',
+                        in_ndc=False,
+                        focal_length=focal_length,
+                        image_size=(width, height),
+                        principal_point=principal_point)).to(self.device)
+                
+                # 3d -> 2d
+                kps2d = camera.transform_points_screen(kps3d_c[fid]).detach().cpu().numpy().squeeze()[:, :2]
+                kps3d = kps3d_c[fid].detach().cpu().numpy().squeeze()
+
+                # test overlay
+                # img = cv2.imread(img_p)
+                # for kp in kps2d:
+                #     cv2.circle(img, (int(kp[0]), int(kp[1])), 5, (0, 255, 0), -1)
+                # cv2.imwrite(f'{out_path}/{os.path.basename(frame_folder)}_{fid}.jpg', img)
+                
+                # get bbox from 2d keypoints
+                bboxs = self._keypoints_to_scaled_bbox_bfh(
+                    kps2d,
+                    body_scale=self.misc_config['bbox_body_scale'],
+                    fh_scale=self.misc_config['bbox_facehand_scale'])
+                for i, bbox_name in enumerate([
+                        'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh',
+                        'rhand_bbox_xywh'
+                ]):
+                    xmin, ymin, xmax, ymax, conf = bboxs[i]
+                    bbox = np.array([
+                        max(0, xmin),
+                        max(0, ymin),
+                        min(width, xmax),
+                        min(height, ymax)
+                    ])
+                    bbox_xywh = self._xyxy2xywh(bbox)  # list of len 4
+                    bbox_xywh.append(conf)  # (5,)
+                    bboxs_[bbox_name].append(bbox_xywh)
+                    
+                # append image path
+                image_path_.append(image_path)
+
+                # append keypoints
+                keypoints2d_.append(kps2d)
+                keypoints3d_.append(kps3d)
+
+                # append smplx
+                for key in self.smplx_shape.keys():
+                    # try:
+                    smplx_[key].append(smplx_param[key][fid])
+                    # except:
+                    #     pdb.set_trace()
+
+                # append meta
+                meta_['principal_point'].append(principal_point)
+                meta_['focal_length'].append(focal_length)
+                meta_['height'].append(height)
+                meta_['width'].append(width)
+                meta_['gender'].append(gender)
+                meta_['sequence_name'].append(os.path.basename(frame_folder))
+                meta_['left_hand_valid'].append(left_valid)
+                meta_['right_hand_valid'].append(right_valid)
+                
+        # get size
+        size_i = len(annot_files)
+        
+               # save keypoints 2d smplx
+        keypoints2d = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2)
+        keypoints2d_conf = np.ones([keypoints2d.shape[0], 144, 1])
+        keypoints2d = np.concatenate([keypoints2d, keypoints2d_conf], axis=-1)
+        keypoints2d, keypoints2d_mask = convert_kps(
+            keypoints2d, src='smplx', dst='human_data')
+        human_data['keypoints2d_smplx'] = keypoints2d
+        human_data['keypoints2d_smplx_mask'] = keypoints2d_mask
+
+        # save keypoints 3d smplx
+        keypoints3d = np.concatenate(keypoints3d_, axis=0).reshape(-1, 144, 3)
+        keypoints3d_conf = np.ones([keypoints3d.shape[0], 144, 1])
+        keypoints3d = np.concatenate([keypoints3d, keypoints3d_conf], axis=-1)
+        keypoints3d, keypoints3d_mask = convert_kps(
+            keypoints3d, src='smplx', dst='human_data')
+        human_data['keypoints3d_smplx'] = keypoints3d
+        human_data['keypoints3d_smplx_mask'] = keypoints3d_mask
+
+        # pdb.set_trace()
+        # save bbox
+        for bbox_name in [
+                'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh',
+                'rhand_bbox_xywh'
+        ]:
+            bbox_xywh_ = np.array(bboxs_[bbox_name]).reshape((-1, 5))
+            human_data[bbox_name] = bbox_xywh_
+
+        # save smplx
+        for key in smplx_.keys():
+            smplx_[key] = np.concatenate(
+                smplx_[key], axis=0).reshape(self.smplx_shape[key])
+
+        human_data['smplx'] = smplx_
+
+        # save image path
+        human_data['image_path'] = image_path_
+
+        # save contact
+        # human_data['contact'] = contact_
+
+        # save meta and misc
+        human_data['config'] = f'signavatar_{mode}'
+        human_data['misc'] = self.misc_config
+        human_data['meta'] = meta_
+
+        os.makedirs(out_path, exist_ok=True)
+        out_file = os.path.join(
+            # out_path, f'moyo_{self.misc_config["flat_hand_mean"]}.npz')
+            out_path, f'signavatar_{mode}_{seed}_{"{:05d}".format(size_i)}.npz')
+        human_data.dump(out_file) 
\ No newline at end of file
diff --git a/tools/convert_datasets.py b/tools/convert_datasets.py
index a480f1b3..f044371c 100644
--- a/tools/convert_datasets.py
+++ b/tools/convert_datasets.py
@@ -191,6 +191,10 @@
         type='RichConverter',  # real
         prefix='rich',
         modes=['train', 'test', 'val']),
+    signavatar=dict(
+        type='SignAvatarConverter',  # real
+        prefix='signavatar',
+        modes=['lan2m', 'ham2m', 'word2m']),
     sgnify=dict(
         type='SgnifyConverter',  # real
         prefix='sgnify',