diff --git a/ARGS_README.md b/ARGS_README.md new file mode 100644 index 0000000..db5d372 --- /dev/null +++ b/ARGS_README.md @@ -0,0 +1,306 @@ +# Arguments +This is a description to provide details about arguments of Posepred API. +Pospred is an open-source toolbox for pose prediction in PyTorch. Posepred is a library that provides a unified interface to train, evaluate, and visualize the models. The library has 5 important APIs. The details of how to use these API are described below. Two other important directories are models and losses. In these two directories, you can add any desired model and loss function and leverage all predefined functions of the library to train and test and compare in a fair manner. + +# Hydra +``` +posepred +├── configs +│ ├── hydra +| ├── data +| └── main.yaml -- main config file for data module (Essentially responsible for creating dataloader) +| ├── model +| ├── common.yaml -- share config file for all models +| ├── st_trans.yaml +│ ├── history_repeats_itself.yaml +| ├── sts_gcn.yaml +| ├── ... +| ├── optimizer +| ├── adam.yaml -- config file for adam optimizer +| ├── sgd.yaml -- config file for stochastic gradient descent optimizer +| ├── ... +| ├── scheduler +| ├── reduce_lr_on_plateau.yaml -- config file for reducing learning_rate on plateau technique arguments +| ├── step_lr.yaml -- config file for step of scheduler arguments +| ├── ... +| ├── visualize.yaml -- config file for visualizer API arguments +| ├── evaluate.yaml -- config file for evaluate API arguments +| ├── preprocess.yaml -- config file for preprocess API arguments +| ├── train.yaml -- config file for train API arguments +| ├── generate_output.yaml -- config file for generate_output API arguments +| |── metrics.yaml -- config file for metrics +| |── shared.yaml -- config file for shared arguments for the apis +| +└─── logging.conf -- logging configurations +``` +Now we will precisely explain each module. +#### data +Location: 'configs/hydra/data' + +`main.yaml`: +``` +Mandatory arguments: +keypoint_dim: Number of dim data should have Ex: 2 for 2D and 3 for 3D (int) +model_pose_format: Used data format for pose dataset (str) +metric_pose_format: Used data format for metrics if pose dataset is used. If no value is specified it'll use the model_pose_format's value (str) +is_h36_testing: Set True to configure the dataloader for testing huamn3.6m (bool) +is_testing: Set True to configure the dataloader for testing (bool) (default: False) +batch_size: Indicates size of batch size (int) (default: 256) +shuffle: Indicates shuffling the data in dataloader (bool) (default: False) +pin_memory: Using pin memory or not in dataloader (bool) (default: False) +num_workers: Number of workers (int) +len_observed: Number of frames to observe (int) +len_future: Number of frames to predict(int) + +optional arguments: +seq_rate: The gap between start of two adjacent sequences (1 means no gap) (int) (default: 2) (only used for pose data_loader) +frame_rate: The gap between two frames (1 means no gap) (int) (default: 2) (only used for pose data_loader) +``` +#### model +Folder Location: 'configs/hydra/model' + +**Available model names for Apis:** st_trans, msr_gcn, pgbig, sts_gcn, history_repeats_itself, potr, pv_lstm, derpof, disentangled, zero_vel + +`common.yaml`: +``` +Mandatory arguments: +keypoint_dim: Number of dim data should have Ex: 2 for 2D and 3 for 3D (int) +pred_frames_num: Number of frames to predict, obligatory when ground-truth is not available (int) +obs_frames_num: Number of frames to observe (int) +mean_pose: +std_pose: +device: Choose either 'cpu' or 'cuda' (str) +``` + +`.yaml`: + +For each model you implement, you should provide a yaml file to configure its argumants. +``` +Mandatory arguments: +type: Name of the model (str) +loss.type: Name of the loss function (str) + +optional arguments: +Every specific argument required for your model! +``` + +#### optimizer +Folder Location: 'configs/hydra/optimizer' + +`adam.yaml` +``` +type type=adam for adam optimizer (str) +lr learning rate (float) (default=0.001) +weight_decay weight decay coefficient (default: 1e-5) +``` +`adamw.yaml` +``` +type type=adamw for adamw optimizer (str) +lr learning rate (float) (default=0.001) +betas coefficients used for computing running averages of gradient and its square (default: (0.9, 0.999)) +weight_decay weight decay coefficient (default: 1e-5) +``` +`sam.yaml` +``` +type type=sam for sharpness aware minimization (str) +lr learning rate (float) (default=0.001) +weight_decay weight decay coefficient (default: 1e-5) +``` +`sgd.yaml` +``` +type type=sgd for stochastic gradient descent (str) +lr learning rate (float) (default=0.001) +momentum momentum factor in sgd optimizer (float) (default=0) +dampening dampening for momentum in sgd optimizer (float) (default=0) +weight_decay weight decay coefficient (default: 1e-5) +nesterov enables Nesterov momentum (bool) (default=False) +``` + +#### scheduler +Folder Location: 'configs/hydra/scheduler' + +`multi_step_lr.yaml` +``` +type type=multi_step_lr to use this technique +step_size List of epoch indices. Must be increasing. +gamma Multiplicative factor of learning rate decay. (float) (default=0.4) +``` +`reduce_lr_on_plateau.yaml` +``` +type type=reduce_lr_on_plateau to use this technique (str) +mode One of `min`, `max`. In `min` mode, lr will be reduced when the quantity monitored has stopped + decreasing; in `max` mode it will be reduced when the quantity monitored has stopped increasing (str) (default=min) +factor actor by which the learning rate will be reduced. new_lr = lr * factor (float) (default=0.5) +patience Number of epochs with no improvement after which learning rate will be reduced. (int) (default=20) +threshold Threshold for measuring the new optimum, to only focus on significant changes (float) (default=le-3) +verbose If True, prints a message to stdout for each update. (bool) (defaulTrue) +``` +`step_lr.yaml` +``` +type type=step_lr to use this technique +step_size Period of learning rate decay (int) (default=50) +gamma Multiplicative factor of learning rate decay. (float) (default=0.5) +last_epoch The index of last epoch (int) (default=-1) +verbose If True, prints a message to stdout for each update (bool) (default=False) +``` +#### metrics +File Location: 'configs/hydra/metrics.yaml' + +`metrics.yaml`: +``` +pose_metrics: List which metrics in the metrics module you want to use. +``` + + +## Preprocessing + +**Available dataset names for preprocessing:** human3.6m, amass, 3dpw + +Check preprocessing config file: "configs/hydra/preprocess.yaml" for more details. + +You can change preprocessor via commandline like below: +``` +mandatory arguments: + - annotated_data_path Path of the dataset + - dataset Name of the dataset Ex: 'human3.6m' or '3dpw' (str) + - data_type Type of data to use Ex: 'train', 'validation' or 'test' (str) + +optional arguments: + - load_60Hz This value is used only for 3DPW + - output_name Name of generated csv file (str) (for default we have specific conventions for each dataset) +``` +Example: +```bash +python -m api.preprocess \ + dataset=human3.6m \ + annotated_data_path=$DATASET_PATH \ + data_type=train \ + output_name=new_full \ + data_type=train +``` + +## Training +Check training config file: "configs/hydra/train.yaml" for more details. + +You can change training args via command line like below: +``` +mandatory arguments: + data Name of the dataloader yaml file, default is main dataloader (str) + model Name of the model yaml file (str) + optimizer Name of the optimizer yaml file, default is adam (str) + scheduler Name of the scheduler yaml file, default is reduce_lr_on_plateau (str) + train_dataset Path of the train dataset (str) + keypoint_dim Dimension of the data Ex: 2 for 2D and 3 for 3D (int) + epochs Number of training epochs (int) (default: 10) + +optional arguments: + - valid_dataset Path of validation dataset (str) + - normalize Normalize the data or not (bool) + - snapshot_interval Save snapshot every N epochs (int) + - load_path Path to load a model (str) + - start_epoch Start epoch (int) + - device Choose either 'cpu' or 'cuda' (str) + - save_dir Path to save the model (str) + - obs_frames_num Number of observed frames for pose dataset (int) + - pred_frames_num Number of future frames for pose dataset (int) + - model_pose_format Used data format for pose dataset (str) + - metric_pose_format Used data format for metrics if pose dataset is used. If no value is specified it'll use the model_pose_format's value + - experiment_name: Experiment name for MLFlow (str) (default: "defautl experiment") + - mlflow_tracking_uri: Path for mlruns folder for MLFlow (str) (default: saves mlruns in the current folder) + +``` + +Example: +```bash +python -m api.train model=history_repeats_itself \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_TEST_PATH \ + obs_frames_num=50 \ + pred_frames_num=25 +``` + +## Evaluation +Check evaluation config file: "configs/hydra/evaluate.yaml" for more details. + +You can change evaluation args via command line like below: +``` +mandatory arguments: + data Name of the dataloader yaml file, default is main dataloader (str) + model Name of the model yaml file (str) + dataset Name of dataset Ex: 'posetrack' or '3dpw' (str) + keypoint_dim Number of dim data should have Ex: 2 for 2D and 3 for 3D (int) + load_path Path to load a model (str) + save_dir Path to save output csv file (str) + +optional arguments: + - device Choose either 'cpu' or 'cuda' (str) + - obs_frames_num Number of observed frames for pose dataset (int) (default: 10) + - pred_frames_num Number of future frames for pose dataset (int) (default:25) + - model_pose_format Used data format for pose dataset (str) (default: xyz) + - metric_pose_format Used data format for metrics if pose dataset is used. If no value is specified it'll use the model_pose_format's value +``` + +Example: +```bash +python -m api.evaluate model=msr_gcn \ + dataset=$DATASET_TEST_PATH \ + rounds_num=1 \ + obs_frames_num=10 \ + pred_frames_num=25 \ + load_path=$MODEL_PATH +``` +another example: +```bash +python -m api.evaluate model=zero_vel \ + dataset=$DATASET_TEST_PATH \ + rounds_num=1 \ + obs_frames_num=10 \ + pred_frames_num=25 +``` + +## Generating Outputs + +``` +mandatory arguments: + data Name of the dataloader yaml file, default is main dataloader (str) + model Name of the model yaml file (str) + dataset Name of dataset Ex: 'posetrack' or '3dpw' (str) + keypoint_dim Number of dim data should have Ex: 2 for 2D and 3 for 3D (int) + load_path Path to load a model (str) + pred_frames_num Number of frames to predict. Mandatory if load_path is None. (int) + +optional arguments: + save_dir Path to save the model (str) + device Choose either 'cpu' or 'cuda' (str) +``` + +Example: +```bash +python -m api.generate_final_output model=st_trans \ + dataset=$DATASET_PATH \ + load_path=$MODEL_CHECKPOINT \ + obs_frames_num=10 \ + pred_frames_num=25 \ + data.is_h36_testing=true \ + save_dir=$OUTPUT_PATH +``` + +## Visualization +You can directly change config file: "congifs/hydra/visualize.yaml". Note that you need your model and data: "configs/hydra/data/main.yaml" configs but the default ones should be fine. + +Also, all essential changes you need are defined below: +``` +mandatory arguments: + dataset_type Name of using dataset. (str) + model Name of desired model. (str) + images_dir Path to existing images on your local computer (str) + showing Indicates which images we want to show (dash(-) separated list) ([observed, future, predicted, completed]) + index Index of a sequence in dataset to visualize. (int) + + +optional arguments: + load_path Path to pretrained model. Mandatory if using a training-based model (str) + pred_frames_num Number of frames to predict. Mandatory if load_path is None. (int) +``` + +TODO: add examples \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..29ebfa5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. \ No newline at end of file diff --git a/README.md b/README.md index 645b440..3b5c745 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,234 @@ -
-

UnPOSed

-

an open-source library of human POSe forecasting with Uncertainty

+# UnPOSed +UnPOSed is an open-source toolbox for pose prediction/forecasting a sequence of human pose given an observed sequence, implemented in PyTorch. -
-
-This repository is being updated so stay tuned! +

+ Input pose
observation + + Output pose and ground-truth
prediction +

+ +# Overview + +The main parts of the library are as follows: + +``` +unposed +├── api +| ├── preprocess.py -- script to run the preprocessor module +| ├── train.py -- script to train the models, runs factory.trainer.py +│ ├── evaluate.py -- script to evaluate the models, runs factory.evaluator.py +| └── generate_final_output.py -- script to generate and save the outputs of the models, runs factory.output_generator.py +├── models +| ├── st_trans/ST_Trans.py +| ├── pgbig/pgbig.py +│ ├── history_repeats_itself/history_repeats_itself.py +│ ├── sts_gcn/sts_gcn.py +| ├── zero_vel.py +│ ├── msr_gcn/msrgcn.py +| ├── potr/potr.py +| ├── pv_lstm.py +│ ├── disentangled.py +| ├── derpof.py +| ├── ... +├── losses +| ├── pua_loss.py +│ ├── mpjpe.py +| ├── ... +``` +The library has four important APIs to +- preprocess data +- train the model +- evaluate a model quantitatively +- generate their outputs + +The details of how to use these API are described below. Two other important directories are models and losses. In these two directories, you can add any desired model and loss function and leverage all predefined functions of the library to train and test and compare in a fair manner. + +Please check other directories (optimizers, mmetrics, schedulers, visualization, utils, etc.) for more abilites. + +# Getting Started +To get started as quickly as possible, follow the instructions in this section. This should allow you train a model from scratch, evaluate your pretrained models, and produce basic visualizations. + +### Dependencies +Make sure you have the following dependencies installed before proceeding: +- Python 3.7+ distribution +- PyTorch >= 1.7.0 +- CUDA >= 10.0.0 +- pip >= 21.3.1 +### Virtualenv +You can create and activate virtual environment like below: +```bash +pip install --upgrade virtualenv + +virtualenv -p python3.7 + +source /bin/activate + +pip install --upgrade pip +``` +### Requirements + +You can install all the required packages with: + +```bash +pip install -r requirements.txt +``` +Before moving forward, you need to install Hydra and know its basic functions to run different modules and APIs. + +## Hydra +Hydra is a framework for elegantly configuring complex applications with hierarchical structure. +For more information about Hydra, read their official page [documentation](https://hydra.cc/). + +In order to have a better structure and understanding of our arguments, we use Hydra to dynamically create a hierarchical configuration by composition and override it through config files and the command line. +If you have any issues and errors install hydra like below: +```bash +pip install hydra-core --upgrade +``` +for more information about Hydra and modules please visit [here](ARGS_README.md#Hydra) + +## MLFlow + +We use MLflow in this library for tracking the training process. The features provided by MLFlow help users track their training process, set up experiments with multiple runs and compare runs with each other. Its clean, and organized UI helps users to better understand and track what the experiments. See more from MLFlow [here](https://mlflow.org/). + +This part is not obligatory but you can use MLFlow by running the command below in the folder containing `mlruns` folder to track the training processes. +```bash +mlflow ui +``` + +# Datasets + +We currently Support the following datasets: +- [Human3.6M](http://vision.imar.ro/human3.6m/description.php) in exponential map can be downloaded from [here](http://www.cs.stanford.edu/people/ashesh/h3.6m.zip). +- [AMASS](https://amass.is.tue.mpg.de/en) from their official website.. +- [3DPW](https://virtualhumans.mpi-inf.mpg.de/3DPW/) from their official website. + + +Please download the datasets and put them in a their specific folder. We will refer to this folder as `$DATASET_PATH` in the following sections. + +# Models + +We've tested the following models: +- [ST-Transformer](https://arxiv.org/abs/2304.06707) +- [PG-Big](https://arxiv.org/abs/2203.16051) +- [History Repeats Itself](https://arxiv.org/abs/2007.11755) +- [STS-GCN](https://arxiv.org/abs/2110.04573) +- [MSR-GCN](https://arxiv.org/abs/2108.07152) +- [PoTR](https://openaccess.thecvf.com/content/ICCV2021W/SoMoF/papers/Martinez-Gonzalez_Pose_Transformers_POTR_Human_Motion_Prediction_With_Non-Autoregressive_Transformers_ICCVW_2021_paper.pdf) +- [PV-LSTM](https://github.com/vita-epfl/bounding-box-prediction) +- [Disentangled](https://github.com/Armin-Saadat/pose-prediction-autoencoder) +- [DER-POF](https://openaccess.thecvf.com/content/ICCV2021W/SoMoF/html/Parsaeifard_Learning_Decoupled_Representations_for_Human_Pose_Forecasting_ICCVW_2021_paper.html) +- Zero-Vel + +## Adding a Model + +To add a new model, you need to follow the below steps: + +- add the model file or files in the model directory +- add the model reference to the models.\_\_init\_\_.py +- add the model's required parameters to the configs/hydra/models. This step is necessary even if you don't have additional parameters +- if your model has new loss function which is not implemented in the library, you can add your loss function to the losses folder. + +## Adding a Metric + +To add a new metric, you need to follow the below steps: + +- implement your metric function in the metrics.pose_metrics.py file +- add the model reference to the metrics.\_\_init\_\_.py +- add your metric to the configs/hydra/metrics.yml + +# Preprocessing + +We need to create clean static files to enhance dataloader and speed-up other parts. +To fulfill mentioned purpose, put the data in DATASET_PATH and run preprocessing api called `preprocess` like below: + +Example: +```bash +python -m api.preprocess \ + dataset=human3.6m \ + annotated_data_path=$DATASET_PATH \ + data_type=test +``` +See [here](ARGS_README.md#preprocessing) for more details about preprocessing arguments. +This process should be repeated for training, validation and test set. This is a one-time use api and later you just use the saved jsonl files. + +# Training + +Given the preprocessed data, train models from scratch: +```bash +python -m api.train model=st_trans \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=50 \ + pred_frames_num=25 \ + model.loss.nT=25 \ + model.pre_post_process=human3.6m \ + model.n_major_joints=22 \ + model.loss.nJ=32 +``` +DATASET_TRAIN_PATH and DATASET_VALIDATION_PATH refer to the preprocessed json files. + +**NOTE**: You can see more commands for training models [here](training_commands.md). + +Provide **validation_dataset** to adjust learning-rate and report metrics on validation-dataset as well. + +See [here](ARGS_README.md#training) for more details about training arguments. + + +# Evaluation + +You can evaluate any pretrained model with: +```bash +python -m api.evaluate model=st_trans \ + dataset=$DATASET_TEST_PATH \ + load_path=$MODEL_CHECKPOINT \ + obs_frames_num=10 \ + pred_frames_num=25 \ + data.is_h36_testing=true +``` +in case of non-trainable model, run: +```bash +python -m api.evaluate model=zero_vel \ + dataset=$DATASET_TEST_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + data.is_h36_testing=true +``` +See [here](ARGS_README.md#evaluation) for more details about evaluation arguments. + + + +## Epistemic Uncertainty + +You can also evaluate the epistemic uncertainty of the models using the approach presented in the paper. For the ease of use, we have provided the trained EpU model in the release section. +Using the trained EpU model, one can evaluate the epistemic uncertainty of the pose prediction model: +```bash +python -m api.evaluate model=st_trans \ + dataset=$DATASET_TEST_PATH \ + load_path=$MODEL_CHECKPOINT \ + obs_frames_num=10 \ + pred_frames_num=25 \ + data.is_testing=true \ + data.is_h36_testing=true \ + dataset_name=human3.6m \ + eval_epu=true \ + epu_model_path=$EPU_MODEL +``` + + +# Generating Outputs + +Generate and save the predicted future poses: +```bash +python -m api.generate_final_output model=st_trans \ + dataset=$DATASET_PATH \ + load_path=$MODEL_CHECKPOINT \ + obs_frames_num=10 \ + pred_frames_num=25 \ + data.is_h36_testing=true \ + save_dir=$OUTPUT_PATH +``` +See [here](ARGS_README.md#generating-outputs) for more details about prediction arguments. + + + +# Work in Progress +This repository is being updated so please stay tuned! diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/evaluate.py b/api/evaluate.py new file mode 100644 index 0000000..3da199f --- /dev/null +++ b/api/evaluate.py @@ -0,0 +1,54 @@ +import logging +import hydra +from omegaconf import DictConfig +import torch +from data_loader import get_dataloader +from models import MODELS +from losses import LOSSES +from factory.evaluator import Evaluator +from factory.epistemic_uncertainty_evaluator import UncertaintyEvaluator +from epistemic_uncertainty.main import load_dc_model +from utils.reporter import Reporter +from utils.save_load import load_snapshot +import os + +from path_definition import HYDRA_PATH + +logger = logging.getLogger(__name__) + + +@hydra.main(config_path=HYDRA_PATH, config_name="evaluate") +def evaluate(cfg: DictConfig): + if cfg.load_path is None and cfg.model is None: + msg = 'either specify a load_path or config a model.' + logger.error(msg) + raise Exception(msg) + dataloader = get_dataloader(cfg.dataset, cfg.data) + eval_reporter = Reporter(state='') + uncertainty_reporter = Reporter(state='') + if cfg.load_path is not None: + model, loss_module, _, _, _, _, _ = load_snapshot(cfg.load_path) + cfg.save_dir = cfg.load_path[:cfg.load_path.rindex('snapshots/')] + else: + cfg.model.pred_frames_num = dataloader.dataset.future_frames_num + cfg.model.keypoints_num = dataloader.dataset.keypoints_num + cfg.model.obs_frames_num = dataloader.dataset.obs_frames_num + # cfg.model.mean_pose = dataloader.dataset.mean_pose #new commented + # cfg.model.std_pose = dataloader.dataset.std_pose #new commented + cfg.save_dir = os.getcwd() + + model = MODELS[cfg.model.type](cfg.model) + loss_module = LOSSES[cfg.model.loss.type](cfg.model.loss) + if cfg.model.type == 'nearest_neighbor': + model.train_dataloader = get_dataloader(cfg.model.train_dataset, cfg.data) + + evaluator = Evaluator(cfg, dataloader, model, loss_module, eval_reporter) + evaluator.evaluate() + if cfg.eval_epu: + uncertainty_model = load_dc_model(cfg.dataset_name, cfg.n_clusters, cfg.epu_model_path) + uncertainty_evaluator = UncertaintyEvaluator(cfg, dataloader, model, uncertainty_model, uncertainty_reporter) + uncertainty_evaluator.evaluate() + + +if __name__ == '__main__': + evaluate() diff --git a/api/generate_final_output.py b/api/generate_final_output.py new file mode 100644 index 0000000..78156f7 --- /dev/null +++ b/api/generate_final_output.py @@ -0,0 +1,42 @@ +import os +import logging +import hydra +from omegaconf import DictConfig + +from data_loader import get_dataloader +from models import MODELS +from factory.output_generator import Output_Generator +from utils.save_load import load_snapshot, setup_testing_dir + +from path_definition import HYDRA_PATH + +logger = logging.getLogger(__name__) + + +@hydra.main(config_path=HYDRA_PATH, config_name="generate_output") +def generate_output(cfg: DictConfig): + if cfg.load_path is None and cfg.model is None: + msg = 'either specify a load_path or config a model.' + logger.error(msg) + raise Exception(msg) + + dataloader = get_dataloader(cfg.dataset, cfg.data) + + if cfg.load_path is not None: + model, _, _, _, _, _, _ = load_snapshot(cfg.load_path) + else: + cfg.model.keypoints_num = dataloader.dataset.keypoints_num + model = MODELS[cfg.model.type](cfg.model) + if cfg.model.type == 'nearest_neighbor': + model.train_dataloader = get_dataloader( + cfg.model.train_dataset, cfg.data) + cfg.save_dir = os.getcwd() + setup_testing_dir(cfg.save_dir) + + output_enerator = Output_Generator( + model, dataloader, cfg.save_dir, cfg.device) + output_enerator.generate() + + +if __name__ == '__main__': + generate_output() \ No newline at end of file diff --git a/api/preprocess.py b/api/preprocess.py new file mode 100644 index 0000000..d3d987b --- /dev/null +++ b/api/preprocess.py @@ -0,0 +1,44 @@ +import logging + +import hydra +from omegaconf import DictConfig + +from path_definition import HYDRA_PATH +from preprocessor.dpw_preprocessor import Preprocessor3DPW +from preprocessor.human36m_preprocessor import Human36mPreprocessor +from preprocessor.amass_preprocessor import AmassPreprocessor +from data_loader import DATASETS, DATA_TYPES + +logger = logging.getLogger(__name__) + + +@hydra.main(config_path=HYDRA_PATH, config_name="preprocess") +def preprocess(cfg: DictConfig): + assert cfg.dataset in DATASETS, "invalid dataset name" + assert cfg.data_type in DATA_TYPES, "data_type choices: " + str(DATA_TYPES) + + if cfg.dataset == 'human3.6m': + preprocessor = Human36mPreprocessor( + dataset_path=cfg.annotated_data_path, + custom_name=cfg.output_name, + ) + elif cfg.dataset == '3dpw': + preprocessor = Preprocessor3DPW( + dataset_path=cfg.annotated_data_path, + custom_name=cfg.output_name, + load_60Hz=cfg.load_60Hz + ) + elif cfg.dataset == 'amass': + preprocessor = AmassPreprocessor( + dataset_path=cfg.annotated_data_path, + custom_name=cfg.output_name, + ) + else: + msg = "Invalid preprocessor." + logger.error(msg) + raise Exception(msg) + preprocessor.normal(data_type=cfg.data_type) + + +if __name__ == '__main__': + preprocess() \ No newline at end of file diff --git a/api/train.py b/api/train.py new file mode 100644 index 0000000..c4403db --- /dev/null +++ b/api/train.py @@ -0,0 +1,77 @@ +import logging +import os +from itertools import chain + +import hydra +from omegaconf import DictConfig + +from factory.epistemic_uncertainty_trainer import UncertaintyTrainer +from epistemic_uncertainty.main import load_dc_model +from data_loader import get_dataloader, get_dataset +from factory.trainer import Trainer +from losses import LOSSES +from models import MODELS +from optimizers import OPTIMIZERS +from path_definition import HYDRA_PATH +from schedulers import SCHEDULERS +from utils.reporter import Reporter +from utils.save_load import load_snapshot, save_snapshot, setup_training_dir +from factory.epistemic_uncertainty_evaluator import UncertaintyEvaluator + +logger = logging.getLogger(__name__) + + +@hydra.main(config_path=HYDRA_PATH, config_name="train") +def train(cfg: DictConfig): + if cfg.load_path is None and cfg.model is None: + msg = 'either specify a load_path or config a model.' + logger.error(msg) + raise Exception(msg) + + train_dataloader = get_dataloader(cfg.train_dataset, cfg.data) + train_dataset = get_dataset(cfg.train_dataset, cfg.data) + cfg.data.is_testing = True + valid_dataloader = get_dataloader(cfg.valid_dataset, cfg.data) + + # you either train the pose prediction model or the epu model + # the if part here is about training a pose prediction model and by default this is active + # the else part is about training the epu model + if not cfg.train_epu: + if cfg.load_path is not None: + model, loss_module, optimizer, optimizer_args, epoch, train_reporter, valid_reporter = load_snapshot( + cfg.load_path) + cfg.start_epoch = epoch + cfg.optimizer = optimizer_args + cfg.save_dir = cfg.load_path[:cfg.load_path.rindex('snapshots/')] + else: + cfg.model.keypoints_num = train_dataloader.dataset.keypoints_num + # cfg.model.mean_pose = train_dataloader.dataset.mean_pose + # cfg.model.std_pose = train_dataloader.dataset.std_pose + + model = MODELS[cfg.model.type](cfg.model) + + loss_module = LOSSES[cfg.model.loss.type](cfg.model.loss) + + optimizer = OPTIMIZERS[cfg.optimizer.type]( + chain(model.parameters(), loss_module.parameters()), cfg.optimizer) + + train_reporter = Reporter(state='train') + valid_reporter = Reporter(state='valid') + cfg.save_dir = os.getcwd() + setup_training_dir(cfg.save_dir) + save_snapshot(model, loss_module, optimizer, cfg.optimizer, + 0, train_reporter, valid_reporter, cfg.save_dir) + scheduler = SCHEDULERS[cfg.scheduler.type](optimizer, cfg.scheduler) + trainer = Trainer(cfg, train_dataloader, valid_dataloader, model, loss_module, optimizer, cfg.optimizer, scheduler, + train_reporter, valid_reporter) + trainer.train() + + else: + # this part is for training the epistemic uncertainty model + uncertainty_model: None + uncertainty_trainer = UncertaintyTrainer(cfg, train_dataset, train_dataloader, valid_dataloader) + uncertainty_trainer.train() + + +if __name__ == '__main__': + train() diff --git a/api/visualize.py b/api/visualize.py new file mode 100644 index 0000000..fb1ecc5 --- /dev/null +++ b/api/visualize.py @@ -0,0 +1,135 @@ +import os +import logging +import hydra +from omegaconf import DictConfig +import random +import torch + +from data_loader import get_dataloader, DATASETS, VISUALIZING_TYPES +from models import MODELS +from utils.save_load import load_snapshot +from utils.others import dict_to_device +from visualization.visualizer import Visualizer + +from path_definition import HYDRA_PATH + +logger = logging.getLogger(__name__) + + +@hydra.main(config_path=HYDRA_PATH, config_name="visualize") +def visualize(cfg: DictConfig): + assert cfg.dataset_type in DATASETS and cfg.dataset_type not in ['amass', '3dpw'], 'dataset_type chioces: ' + str(DATASETS) + ' and not in [amass, 3dpw]' + showing = cfg.showing.strip().split('-') + for k in showing: + if k not in VISUALIZING_TYPES: + raise Exception( + 'options for showing are: ' + str(VISUALIZING_TYPES) + '''\nuse '-' to seperate different types.''') + if 'future' in showing and cfg.data.is_testing: + raise Exception('do not have access to future frames when data.is_testing is true.') + + # prepare data + dataloader = get_dataloader(cfg.dataset, cfg.data) + + index = random.randint(0, dataloader.dataset.__len__() - 1) if cfg.index is None else cfg.index + data = dataloader.dataset.__getitem__(index) + for key in ['observed_pose', 'observed_metric_pose', 'future_pose', 'future_metric_pose', 'observed_noise']: + if key in data.keys(): + data[key] = data[key].unsqueeze(0) + + # prepare model + if cfg.load_path is not None: + model, _, _, _, _, _, _ = load_snapshot(cfg.load_path) + else: + cfg.model.keypoint_dim = cfg.data.keypoint_dim + cfg.model.keypoints_num = dataloader.dataset.keypoints_num + cfg.model.pred_frames_num = dataloader.dataset.future_frames_num if cfg.pred_frames_num is None else cfg.pred_frames_num + assert cfg.model.pred_frames_num is not None, 'specify pred_frames_num or set data.is_testing=false' + model = MODELS[cfg.model.type](cfg.model) + if cfg.model.type == 'nearest_neighbor': + model.train_dataloader = get_dataloader(cfg.model.train_dataset, cfg.data) + + # predict + model = model.to(cfg.device).eval() + with torch.no_grad(): + outputs = model(dict_to_device(data, cfg.device)) + assert 'pred_pose' in outputs.keys(), 'outputs of model should include pred_pose' + if 'pred_metric_pose' in outputs: + data['pred_metric_pose'] = outputs['pred_metric_pose'] + data['pred_pose'] = outputs['pred_pose'] + if 'completed' in showing: + assert 'comp_pose' in outputs.keys(), 'outputs of model should include comp_pose' + data['comp_pose'] = outputs['comp_pose'] + + names = [] + poses = [] + images_path = [] + cam_exs = [] + + if 'observed' in showing: + names.append('observed') + pose = data['observed_pose'] + poses.append(pose.permute(1, 0, 2)) + image_path = data['observed_image'] if 'observed_image' in data.keys() else None + images_path.append(image_path) + cam_ex = data['observed_cam_ex'] if 'observed_cam_ex' in data.keys() else None + cam_exs.append(cam_ex) + + if 'completed' in showing: + names.append('completed') + pose = data['comp_pose'] + poses.append(pose.permute(1, 0, 2)) + image_path = data['observed_image'] if 'observed_image' in data.keys() else None + images_path.append(image_path) + cam_ex = data['observed_cam_ex'] if 'observed_cam_ex' in data.keys() else None + cam_exs.append(cam_ex) + + if 'future' in showing: + names.append('future') + tag = 'future_metric_pose' + if tag not in data: + tag = 'future_pose' + + pose = data[tag] + poses.append(pose.permute(1, 0, 2)) + image_path = data['future_image'] if 'future_image' in data.keys() else None + images_path.append(image_path) + cam_ex = data['future_cam_ex'] if 'future_cam_ex' in data.keys() else None + cam_exs.append(cam_ex) + + if 'predicted' in showing: + names.append('predicted') + + tag = 'pred_metric_pose' + if tag not in data: + tag = 'pred_pose' + + pose = data[tag] + poses.append(pose.permute(1, 0, 2)) + image_path = data['future_image'] if 'future_image' in data.keys() else None + images_path.append(image_path) + cam_ex = data['future_cam_ex'] if 'future_cam_ex' in data.keys() else None + cam_exs.append(cam_ex) + + cam_in = data.get('cam_in') if 'cam_in' in data.keys() else None + + for i, p in enumerate(poses): + if p is not None and p.is_cuda: + poses[i] = p.detach().cpu() + + if cfg.data.is_noisy: + observed_noise = data['observed_noise'].squeeze(0) if cfg.data.is_interactive else data['observed_noise'] + observed_noise = observed_noise.detach().cpu() if observed_noise.is_cuda else observed_noise + observed_noise = observed_noise.squeeze(0) + else: + observed_noise = None + + visualizer = Visualizer(dataset_name=cfg.dataset_type, parent_dir=cfg.save_dir, images_dir=cfg.images_dir) + gif_name = '_'.join((cfg.model.type, cfg.dataset.split("/")[-1], str(index))) + if cfg.data.keypoint_dim == 2: + visualizer.visualizer_2D(names, poses, images_path, observed_noise, gif_name) + else: + visualizer.visualizer_3D(names, poses, cam_exs, cam_in, images_path, observed_noise, gif_name) + + +if __name__ == '__main__': + visualize() diff --git a/configs/hydra/data/main.yaml b/configs/hydra/data/main.yaml new file mode 100644 index 0000000..c4413f8 --- /dev/null +++ b/configs/hydra/data/main.yaml @@ -0,0 +1,18 @@ +keypoint_dim: ${keypoint_dim} + +model_pose_format: ${model_pose_format} +metric_pose_format: ${metric_pose_format} + +pose_formats: [] + +is_h36_testing: false +is_testing: false +batch_size: 16 +shuffle: true +pin_memory: false +num_workers: 4 +seq_rate: 2 # number of frames to skip between each sequence +frame_rate: 2 # number of frames to skip between each frame in a sequence +len_observed: ${obs_frames_num} +len_future: ${pred_frames_num} +random_reverse_prob: 0.5 \ No newline at end of file diff --git a/configs/hydra/evaluate.yaml b/configs/hydra/evaluate.yaml new file mode 100644 index 0000000..7792cc8 --- /dev/null +++ b/configs/hydra/evaluate.yaml @@ -0,0 +1,14 @@ +defaults: + - _self_ + - shared + - data: main + - model: + - metrics + +dataset: ??? +rounds_num: 1 +device: cuda +eval_epu: False +n_clusters: 17 +epu_model_path: ??? +dataset_name: ??? \ No newline at end of file diff --git a/configs/hydra/generate_output.yaml b/configs/hydra/generate_output.yaml new file mode 100644 index 0000000..4010e91 --- /dev/null +++ b/configs/hydra/generate_output.yaml @@ -0,0 +1,10 @@ +defaults: + - shared + - data: main + - model: + - _self_ + +dataset: ??? + +data: + is_testing: true \ No newline at end of file diff --git a/configs/hydra/metrics.yaml b/configs/hydra/metrics.yaml new file mode 100644 index 0000000..337cf8b --- /dev/null +++ b/configs/hydra/metrics.yaml @@ -0,0 +1,13 @@ +pose_metrics: + - ADE + - FDE + - F1 #new: + - F3 + - F7 + - F9 + - F13 + - F17 + - F21 + # - MSE + # - local_ade + # - local_fde diff --git a/configs/hydra/model/common.yaml b/configs/hydra/model/common.yaml new file mode 100644 index 0000000..89f3a62 --- /dev/null +++ b/configs/hydra/model/common.yaml @@ -0,0 +1,7 @@ +keypoint_dim: ${keypoint_dim} +pred_frames_num: ${pred_frames_num} +obs_frames_num: ${obs_frames_num} +keypoints_num: +mean_pose: +std_pose: +device: ${device} diff --git a/configs/hydra/model/derpof.yaml b/configs/hydra/model/derpof.yaml new file mode 100644 index 0000000..b6b196a --- /dev/null +++ b/configs/hydra/model/derpof.yaml @@ -0,0 +1,14 @@ +defaults: + - _self_ + - common + +type: derpof +embedding_dim: 8 +hidden_dim: 64 +latent_dim: 32 +dropout: 0.2 + + +loss: + type: derpof + local_loss_weight: 0.1 \ No newline at end of file diff --git a/configs/hydra/model/disentangled.yaml b/configs/hydra/model/disentangled.yaml new file mode 100644 index 0000000..2aa451a --- /dev/null +++ b/configs/hydra/model/disentangled.yaml @@ -0,0 +1,16 @@ +defaults: + - _self_ + - common + +type: disentangled + +hidden_size: 200 +hardtanh_limit: 10 +n_layers: 1 +dropout_enc: 0 +dropout_pose_dec: 0 +dropout_mask_dec: 0 + +loss: + type: mpjpe + nJ: 32 # 18 for AMASS and 3DPW, 32 for Human3.6m \ No newline at end of file diff --git a/configs/hydra/model/history_repeats_itself.yaml b/configs/hydra/model/history_repeats_itself.yaml new file mode 100644 index 0000000..0cdc4c8 --- /dev/null +++ b/configs/hydra/model/history_repeats_itself.yaml @@ -0,0 +1,26 @@ +defaults: + - _self_ + - common + +type: history_repeats_itself +kernel_size: 10 +d_model: 256 +in_features: 66 +input_n: ${obs_frames_num} +output_n: ${pred_frames_num} +num_stage: 12 +dct_n: 20 +itera: 1 +un_mode: 'default' +init_mode: 'default' +modality: 'Human36' # Can be either Human36 or AMASS + +loss: + type: his_rep_itself + input_n: ${obs_frames_num} + output_n: ${pred_frames_num} + itera: 1 + kernel_size: 10 + device: ${device} + un_mode: + modality: 'Human36' # Can be either Human36 or AMASS diff --git a/configs/hydra/model/msr_gcn.yaml b/configs/hydra/model/msr_gcn.yaml new file mode 100644 index 0000000..8c6d2bc --- /dev/null +++ b/configs/hydra/model/msr_gcn.yaml @@ -0,0 +1,18 @@ +defaults: + - _self_ + - common + +type: msr_gcn + +p_dropout: 0.1 +leaky_c: 0.2 +final_out_noden: 22 +input_feature: 35 +global_max: 6793.261339887178 +global_min: -6245.483729757661 +dct_used: 35 +input_n: 10 +output_n: 25 + +loss: + type: msr_gcn \ No newline at end of file diff --git a/configs/hydra/model/pgbig.yaml b/configs/hydra/model/pgbig.yaml new file mode 100644 index 0000000..d4fb9c7 --- /dev/null +++ b/configs/hydra/model/pgbig.yaml @@ -0,0 +1,31 @@ +defaults: + - _self_ + - common + +type: pgbig + +kernel_size: 10 # must be 10. Also obs_frames_num must be 10 to match the kernel size +d_model: 16 +dct_n: 35 # usually obs_frames_num + pred_frames_num +in_features: 54 # How many features are in a frame? 3 * n_joints. 66 for Human3.6M, 54 for 3DPW and AMASS +num_stage: 12 +drop_out: 0.3 + +# can be human3.6m , AMASS, 3DPW, none +pre_post_process: AMASS + +device: ${device} + +loss: + device: ${device} + pre_post_process: AMASS # Should be excatly like the one in the model + type: pgbig_loss + inner_type: ORIGINAL # PUAL # if you want to use the PUAL loss, set this to PUAL. otherwise set it to something else and not None. In that case it will use the original loss function. + tasks: STJ # use S if you want each stage be a seperate task + nT: 35 # must be obs_frames_num + pred_frames_num + nJ: 18 # 18 for AMASS/3DPW, 22 for Human3.6M + time_prior: sig5 + action_list: ["walking", "eating", "smoking", "discussion", "directions", "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] + clipMinS: -1 + clipMaxS: + init_mean: 3 # 3.5 for Human3.6M, 3 for AMASS/3DPW diff --git a/configs/hydra/model/potr.yaml b/configs/hydra/model/potr.yaml new file mode 100644 index 0000000..fe74754 --- /dev/null +++ b/configs/hydra/model/potr.yaml @@ -0,0 +1,60 @@ +defaults: + - _self_ + - common + +type: potr + +num_encoder_layers: 4 +num_decoder_layers: 4 +query_selection: &query_selection false +use_query_embedding: false +num_layers: 6 +model_dim: 128 +num_heads: 2 +dim_ffn: 16 +dropout: 0.5 +init_fn_name: xavier +pre_normalization: true +pose_embedding_type: gcn_enc +pos_enc_beta: 500 +pos_enc_alpha: 10 +use_class_token: false +predict_activity: &predict_activity true +non_autoregressive: true + +include_last_obs: &include_last_obs false +pad_decoder_inputs: &pad_decoder_inputs true + + +# data related +pose_format: &model_pose_format ${model_pose_format} +metric_pose_format: ${metric_pose_format} +future_frames_num: &pred_frames_num ${pred_frames_num} # convert to pred frames num which is in common + +n_major_joints: &n_major_joints 21 +n_joints: &n_joints 21 +n_h36m_joints: 32 +pose_dim: ${keypoint_dim} +num_activities: &num_activities 15 + +device: &device ${device} +consider_uncertainty: &consider_uncertainty false + +loss: + type: potr + activity_weight: 1.0 + uncertainty_weight: 1.0 + loss_fn: l1 + query_selection: *query_selection + predict_activity: *predict_activity + pose_format: *model_pose_format + future_frames_num: *pred_frames_num + obs_frames_num: ${obs_frames_num} #*obs_frames_num + include_last_obs: *include_last_obs + pose_dim: ${keypoint_dim} #*keypoint_dim + n_major_joints: *n_joints + pad_decoder_inputs: *pad_decoder_inputs + device: *device + consider_uncertainty: *consider_uncertainty + num_activities: *num_activities + #metric_pose_format: metric_pose_format diff --git a/configs/hydra/model/pv_lstm.yaml b/configs/hydra/model/pv_lstm.yaml new file mode 100644 index 0000000..6a99dc8 --- /dev/null +++ b/configs/hydra/model/pv_lstm.yaml @@ -0,0 +1,27 @@ +defaults: + - _self_ + - common + +type: pv_lstm +hidden_size: 200 +hardtanh_limit: 10 +n_layers: 1 +dropout_enc: 0 +dropout_pose_dec: 0 +dropout_mask_dec: 0 + +# loss: +# type: mse_vel +# mask_weight: 0 + +loss: + device: ${device} + type: pua_loss + tasks: TJ + nT: 25 + nJ: 18 # 32, 22 + time_prior: sig5 + action_list: ["walking", "eating", "smoking", "discussion", "directions", "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] + clipMinS: -1 + clipMaxS: + init_mean: 3.5 \ No newline at end of file diff --git a/configs/hydra/model/st_trans.yaml b/configs/hydra/model/st_trans.yaml new file mode 100644 index 0000000..efde5f1 --- /dev/null +++ b/configs/hydra/model/st_trans.yaml @@ -0,0 +1,35 @@ +defaults: + - _self_ + - common + +type: st_trans + +n_major_joints: 22 # 18 for AMASS and 3DPW and 22 for human3.6m +diff_layers: 6 +diff_channels: 64 +diff_nheads: 8 + +model_is_unconditional: 0 +model_timeemb: 128 +model_featureemb: 16 + +# can be human3.6m , AMASS, 3DPW, none +pre_post_process: human3.6m + +device: &device ${device} + +# loss: +# type: mpjpe +# nJ: 32 + +loss: + device: ${device} + type: pua_loss + tasks: TJ + nT: 25 + nJ: 32 + time_prior: sig5 + action_list: ["walking", "eating", "smoking", "discussion", "directions", "greeting", "phoning", "posing", "purchases", "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"] + clipMinS: -1 + clipMaxS: + init_mean: 3.5 diff --git a/configs/hydra/model/sts_gcn.yaml b/configs/hydra/model/sts_gcn.yaml new file mode 100644 index 0000000..7eef867 --- /dev/null +++ b/configs/hydra/model/sts_gcn.yaml @@ -0,0 +1,16 @@ +defaults: + - _self_ + - common + +type: sts_gcn +st_gcnn_dropout: 0.1 +n_txcnn_layers: 4 +txc_kernel_size: [3, 3] +txc_dropout: 0 +n_major_joints: 18 # 18 for AMASS and 3DPW, 22 for Human3.6m + +pre_post_process: AMASS # human3.6m, AMASS, 3DPW + +loss: + type: mpjpe + nJ: 18 # 18 for AMASS and 3DPW, 32 for Human3.6m \ No newline at end of file diff --git a/configs/hydra/model/zero_vel.yaml b/configs/hydra/model/zero_vel.yaml new file mode 100644 index 0000000..c85b9ff --- /dev/null +++ b/configs/hydra/model/zero_vel.yaml @@ -0,0 +1,9 @@ +defaults: + - _self_ + - common + +type: zero_vel + +loss: + type: mse_pose + mask_weight: 0 \ No newline at end of file diff --git a/configs/hydra/optimizer/adam.yaml b/configs/hydra/optimizer/adam.yaml new file mode 100644 index 0000000..41e8d9c --- /dev/null +++ b/configs/hydra/optimizer/adam.yaml @@ -0,0 +1,3 @@ +type: adam +lr: 0.005 +weight_decay: 1e-05 \ No newline at end of file diff --git a/configs/hydra/optimizer/adamw.yaml b/configs/hydra/optimizer/adamw.yaml new file mode 100644 index 0000000..83b708c --- /dev/null +++ b/configs/hydra/optimizer/adamw.yaml @@ -0,0 +1,7 @@ +type: adamw +lr: 0.0001 +betas: + - 0.9 + - 0.999 + +weight_decay: 0.00001 \ No newline at end of file diff --git a/configs/hydra/optimizer/sam.yaml b/configs/hydra/optimizer/sam.yaml new file mode 100644 index 0000000..f296e44 --- /dev/null +++ b/configs/hydra/optimizer/sam.yaml @@ -0,0 +1,5 @@ +type: sam +base_optimizer: + type: adam + lr: 0.001 + weight_decay: 1e-05 diff --git a/configs/hydra/optimizer/sgd.yaml b/configs/hydra/optimizer/sgd.yaml new file mode 100644 index 0000000..60411cb --- /dev/null +++ b/configs/hydra/optimizer/sgd.yaml @@ -0,0 +1,6 @@ +type: sgd +lr: 0.01 +momentum: 0.9 +dampening: 0 +weight_decay: 1e-05 +nesterov: false \ No newline at end of file diff --git a/configs/hydra/preprocess.yaml b/configs/hydra/preprocess.yaml new file mode 100644 index 0000000..d3106a3 --- /dev/null +++ b/configs/hydra/preprocess.yaml @@ -0,0 +1,11 @@ +defaults: + - shared + - _self_ + +annotated_data_path: ??? +dataset: ??? +data_type: ??? +output_name: + +# This is used just for 3DPW +load_60Hz: True \ No newline at end of file diff --git a/configs/hydra/scheduler/multi_step_lr.yaml b/configs/hydra/scheduler/multi_step_lr.yaml new file mode 100644 index 0000000..6d01526 --- /dev/null +++ b/configs/hydra/scheduler/multi_step_lr.yaml @@ -0,0 +1,3 @@ +type: multi_step_lr +milestones: [1, 2, 3, 5, 7, 9] +gamma: 0.4 diff --git a/configs/hydra/scheduler/reduce_lr_on_plateau.yaml b/configs/hydra/scheduler/reduce_lr_on_plateau.yaml new file mode 100644 index 0000000..67f068d --- /dev/null +++ b/configs/hydra/scheduler/reduce_lr_on_plateau.yaml @@ -0,0 +1,6 @@ +type: reduce_lr_on_plateau +mode: min +factor: 0.8 +patience: 3 +threshold: 0.1 +verbose: true \ No newline at end of file diff --git a/configs/hydra/scheduler/step_lr.yaml b/configs/hydra/scheduler/step_lr.yaml new file mode 100644 index 0000000..505590c --- /dev/null +++ b/configs/hydra/scheduler/step_lr.yaml @@ -0,0 +1,5 @@ +type: step_lr +step_size: 2 #50 +gamma: 0.5 #0.5 +last_epoch: 10 +verbose: true \ No newline at end of file diff --git a/configs/hydra/shared.yaml b/configs/hydra/shared.yaml new file mode 100644 index 0000000..e804ad4 --- /dev/null +++ b/configs/hydra/shared.yaml @@ -0,0 +1,8 @@ +obs_frames_num: 10 +pred_frames_num: 25 +model_pose_format: xyz +metric_pose_format: +keypoint_dim: 3 +device: cuda +load_path: +save_dir: \ No newline at end of file diff --git a/configs/hydra/train.yaml b/configs/hydra/train.yaml new file mode 100644 index 0000000..96b411c --- /dev/null +++ b/configs/hydra/train.yaml @@ -0,0 +1,37 @@ +defaults: + - _self_ + - shared + - data: main + - model: ??? + - optimizer: adam + - scheduler: multi_step_lr + - metrics + +train_dataset: ??? +valid_dataset: +epochs: 15 +snapshot_interval: 1 +start_epoch: 0 +experiment_name: Default +experiment_tag: +mlflow_tracking_uri: +train_epu: False +epu_model_path: ??? +n_clusters: 17 +dataset_name: ??? +lstm: + alpha: 0.001 + optimizer: adam + scheduler: tri + lr: 0.0001 + lr_decay: 0.99 + epochs: 200 +dc: + lr: 0.0005 + lr_decay: 0.98 + weight_decay: 0.00001 + gamma: 0.6 + epochs: 30 + stop_cret: 0.001 + update_interval: 2.0 + alpha: 0.001 # equal to lstm.alpha diff --git a/configs/hydra/visualize.yaml b/configs/hydra/visualize.yaml new file mode 100644 index 0000000..ee46231 --- /dev/null +++ b/configs/hydra/visualize.yaml @@ -0,0 +1,13 @@ +defaults: + - data: main + - model: ??? + - shared + - _self_ + +dataset: ??? +dataset_type: ??? +keypoint_dim: 3 +showing: future, predicted +pred_frames_num: +index: +images_dir: diff --git a/configs/logging.conf b/configs/logging.conf new file mode 100644 index 0000000..5a6eecf --- /dev/null +++ b/configs/logging.conf @@ -0,0 +1,29 @@ +[loggers] +keys=root,consoleLogger + +[handlers] +keys=consoleHandler + +[formatters] +keys=simpleFormatter, consoleFormatter + +[logger_root] +level=INFO +handlers=consoleHandler + +[logger_consoleLogger] +level=INFO +handlers=consoleHandler +qualname=consoleLogger + +[handler_consoleHandler] +class=StreamHandler +level=DEBUG +formatter=consoleFormatter +args=(sys.stdout,) + +[formatter_simpleFormatter] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s + +[formatter_consoleFormatter] +format=%(name)s - %(levelname)s - %(message)s \ No newline at end of file diff --git a/data_loader/__init__.py b/data_loader/__init__.py new file mode 100644 index 0000000..9c7ddf9 --- /dev/null +++ b/data_loader/__init__.py @@ -0,0 +1,33 @@ +from torch.utils.data import DataLoader + +from .pose_dataset import PoseDataset + +DATASETS = ['3dpw', 'human3.6m', 'amass'] +DATA_TYPES = ['train', 'validation', 'test'] +VISUALIZING_TYPES = ['observed', 'future', 'predicted', 'completed'] + + +def get_dataloader(dataset_path, args): + if dataset_path is None: + return None + + dataset = PoseDataset( + dataset_path, args.keypoint_dim, args.is_testing, args.model_pose_format, args.metric_pose_format, + args.seq_rate, args.frame_rate, args.len_observed, + args.len_future, args.is_h36_testing, args.random_reverse_prob + ) + + dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=args.shuffle, pin_memory=args.pin_memory, + num_workers=args.num_workers) + return dataloader + +def get_dataset(dataset_path, args): + if dataset_path is None: + return None + + dataset = PoseDataset( + dataset_path, args.keypoint_dim, args.is_testing, args.model_pose_format, args.metric_pose_format, + args.seq_rate, args.frame_rate, args.len_observed, + args.len_future, args.is_h36_testing, args.random_reverse_prob + ) + return dataset diff --git a/data_loader/pose_dataset.py b/data_loader/pose_dataset.py new file mode 100644 index 0000000..f6e0826 --- /dev/null +++ b/data_loader/pose_dataset.py @@ -0,0 +1,129 @@ +import json +import logging +import os + +import jsonlines +import numpy as np +import torch +from torch.utils.data import Dataset +from utils.others import find_indices_256 +from tqdm.notebook import tqdm + +from path_definition import PREPROCESSED_DATA_DIR +import random +logger = logging.getLogger(__name__) + + +class PoseDataset(Dataset): + def __init__(self, + dataset_path, + keypoint_dim, + is_testing, + model_pose_format, + metric_pose_format, + seq_rate, + frame_rate, + len_observed, + len_future, + is_h36_testing, + random_reverse_prob=0.5): + + total_len = (len_observed + len_future) * frame_rate + self.frame_rate = frame_rate + self.total_len = total_len + self.len_observed = len_observed + self.len_future = len_future + self.random_reverse_prob = random_reverse_prob + + data = list() + self.tensor_keys_to_keep = [] + self.tensor_keys_to_ignore = [] + + if not metric_pose_format: + metric_pose_format = model_pose_format + + indexes = [] + self.extra_keys_to_keep = ['video_section', 'action'] + + with jsonlines.open(dataset_path) as reader: + for seq in tqdm(reader): + + seq_tensor = {} + fps = 1 + + for k, v in seq.items(): + if k == "{}_pose".format(model_pose_format): + seq_tensor["pose"] = torch.tensor(v, dtype=torch.float32) + if k == "{}_pose".format(metric_pose_format): + seq_tensor["metric_pose"] = torch.tensor(v, dtype=torch.float32) + if k in self.extra_keys_to_keep: + seq_tensor[k] = v + if k == "fps": + fps = v//25 + + assert "pose" in seq_tensor, "model pose format not found in the sequence" + assert "metric_pose" in seq_tensor, "metric pose format not found in the sequence" + + if fps > 1: + seq_tensor["pose"] = seq_tensor["pose"][::fps] + seq_tensor["metric_pose"] = seq_tensor["metric_pose"][::fps] + + data.append(seq_tensor) + len_seq = seq_tensor['pose'].shape[0] + + bias = 1 if is_h36_testing else frame_rate + indexes = indexes + [(len(data) - 1, i) + for i in range(0, len_seq - total_len + bias, seq_rate)] + + if is_h36_testing: + indexes = [] + for i in range(0, len(data), 2): + len1 = (data[i]['pose'].shape[0] + frame_rate - 1) // frame_rate + len2 = (data[i + 1]['pose'].shape[0] + frame_rate - 1) // frame_rate + + idxo1, idxo2 = find_indices_256(len1, len2, + len_observed + len_future, len_observed) + indexes = indexes + [(i, j * frame_rate) for j in idxo1[:, 0]] + indexes = indexes + [(i + 1, j * frame_rate) for j in idxo2[:, 0]] + + self.obs_frames_num = self.len_observed + self.future_frames_num = self.len_future + + self.keypoints_num = int(data[0]['pose'].shape[-1] // keypoint_dim) + + self.data = data + self.indexes = indexes + self.keypoint_dim = keypoint_dim + self.is_testing = is_testing + self.is_h36_testing = is_h36_testing + + def __len__(self): + return len(self.indexes) + + def __getitem__(self, index): + data_index, seq_index = self.indexes[index] + seq = self.data[data_index] + outputs = {} + + random_reverse = random.random() < self.random_reverse_prob + if self.is_testing or self.is_h36_testing: + random_reverse = False + + output_keys = ['metric_pose', 'pose'] + + for k in output_keys: + temp_seq = seq[k][seq_index:seq_index + self.total_len] + if random_reverse: + temp_seq = torch.flip(temp_seq, [0]) + temp_seq = temp_seq[::self.frame_rate] + + + + outputs["observed_" + k] = temp_seq[:self.len_observed] + outputs["future_" + k] = temp_seq[self.len_observed:] + + for k in self.extra_keys_to_keep: + if k in seq: + outputs[k] = seq[k] + + return outputs \ No newline at end of file diff --git a/epistemic_uncertainty/__init__.py b/epistemic_uncertainty/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/data/__init__.py b/epistemic_uncertainty/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/data/amass_3d.py b/epistemic_uncertainty/data/amass_3d.py new file mode 100644 index 0000000..2e3827a --- /dev/null +++ b/epistemic_uncertainty/data/amass_3d.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from torch.utils.data import Dataset, DataLoader + +from .ang2joint import * +from ..utils.functions import scale +from ..utils.dataset_utils import JOINTS_TO_INCLUDE, SKIP_RATE + +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/amass3d.py +''' + +PATH_TO_SMPL_SKELETON = '../utils/smpl_skeleton.npz' +AMASS_DIM_USED = JOINTS_TO_INCLUDE['AMASS'] +AMASS_SKIP_RATE = SKIP_RATE['AMASS'] +AMASS_SCALE_RATIO = 1 + + +class Amass(Dataset): + + def __init__(self, path_to_data, input_n, output_n, skip_rate, split=0, + apply_joints_to_include=False, ): + """ + + Args: + path_to_data: + input_n: + output_n: + skip_rate: + apply_dim_used: + split: + """ + self.path_to_data = os.path.join(path_to_data, 'AMASS/') # "D:\data\AMASS\\" + self.split = split + self.in_n = input_n + self.out_n = output_n + self.skip_rate = skip_rate + self.apply_joints_to_include = apply_joints_to_include + + # self.sample_rate = opt.sample_rate + self.p3d = [] + self.keys = [] + self.data_idx = [] + self.joint_used = np.arange(4, 22) # start from 4 for 17 joints, removing the non moving ones + seq_len = self.in_n + self.out_n + #TODO: DELETE SFU FROM TEST AND VAL + amass_splits = [ + ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], + ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], + ['BioMotionLab_NTroje'], + ] + + # load mean skeleton + skel = np.load(PATH_TO_SMPL_SKELETON) + p3d0 = torch.from_numpy(skel['p3d0']).float().cuda()[:, :22] + parents = skel['parents'] + parent = {} + for i in range(len(parents)): + if i > 21: + break + parent[i] = parents[i] + n = 0 + for ds in amass_splits[split]: + if not os.path.isdir(self.path_to_data + ds): + print(ds) + continue + print('>>> loading {}'.format(ds)) + for sub in os.listdir(self.path_to_data + '/' + ds): + if not os.path.isdir(self.path_to_data + ds + '/' + sub): + continue + for act in os.listdir(self.path_to_data + ds + '/' + sub): + if not act.endswith('.npz'): + continue + # if not ('walk' in act or 'jog' in act or 'run' in act or 'treadmill' in act): + # continue + pose_all = np.load(self.path_to_data + ds + '/' + sub + '/' + act) + try: + poses = pose_all['poses'] + except: + print('no poses at {}_{}_{}'.format(ds, sub, act)) + continue + frame_rate = pose_all['mocap_framerate'] + fn = poses.shape[0] + sample_rate = int(frame_rate // 25) + fidxs = range(0, fn, sample_rate) + fn = len(fidxs) + poses = poses[fidxs] + poses = torch.from_numpy(poses).float().cuda() + poses = poses.reshape([fn, -1, 3]) + # remove global rotation + poses[:, 0] = 0 + p3d0_tmp = p3d0.repeat([fn, 1, 1]) + p3d = ang2joint(p3d0_tmp, poses, parent) + # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy() + self.p3d.append(p3d.cpu().data.numpy()) + if split == 2: + valid_frames = np.arange(0, fn - seq_len + 1, self.skip_rate) + else: + valid_frames = np.arange(0, fn - seq_len + 1, self.skip_rate) + + # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames) + self.keys.append((ds, sub, act)) + tmp_data_idx_1 = [n] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + n += 1 + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + pose = self.p3d[key][fs].reshape((-1, 66))[:, AMASS_DIM_USED] \ + if self.apply_joints_to_include \ + else self.p3d[key][fs].reshape((-1, 66)) + return scale(pose, AMASS_SCALE_RATIO), item # , key + + +if __name__ == '__main__': + a = Amass('../dataset/', 0, 25, AMASS_SKIP_RATE, split=0, apply_joints_to_include=True) + d = DataLoader(a, batch_size=256, shuffle=True, + pin_memory=True) + for data in d: + pass diff --git a/epistemic_uncertainty/data/ang2joint.py b/epistemic_uncertainty/data/ang2joint.py new file mode 100644 index 0000000..bfa0cc3 --- /dev/null +++ b/epistemic_uncertainty/data/ang2joint.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch + +''' +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/ang2joint.py +''' + +def ang2joint(p3d0, pose, + parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, + 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): + """ + :param p3d0:[batch_size, joint_num, 3] + :param pose:[batch_size, joint_num, 3] + :param parent: + :return: + """ + # model_path = './model.npz' + # params = np.load(model_path, allow_pickle=True) + # kintree_table = params['kintree_table'] + batch_num = p3d0.shape[0] + # id_to_col = {kintree_table[1, i]: i + # for i in range(kintree_table.shape[1])} + # parent = { + # i: id_to_col[kintree_table[0, i]] + # for i in range(1, kintree_table.shape[1]) + # } + # parent = {1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13, + # 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21} + jnum = len(parent.keys()) + # v_shaped = torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template + # J = torch.matmul(self.J_regressor, v_shaped) + # face_J = v_shaped[:, [333, 2801, 6261], :] + J = p3d0 + R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) + results = [] + results.append( + with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) + ) + # for i in range(1, kintree_table.shape[1]): + for i in range(1, jnum): + results.append( + torch.matmul( + results[parent[i]], + with_zeros( + torch.cat( + (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), + dim=2 + ) + ) + ) + ) + + stacked = torch.stack(results, dim=1) + J_transformed = stacked[:, :, :3, 3] + return J_transformed + + +# In[ ]: + + +def rodrigues(r): + """ + Rodrigues' rotation formula that turns axis-angle tensor into rotation + matrix in a batch-ed manner. + Parameter: + ---------- + r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3]. + Return: + ------- + Rotation matrix of shape [batch_size * angle_num, 3, 3]. + """ + eps = r.clone().normal_(std=1e-8) + theta = torch.norm(r + eps, dim=(1, 2), keepdim=True) + # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple + theta_dim = theta.shape[0] + r_hat = r / theta + cos = torch.cos(theta) + z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device) + m = torch.stack( + (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick, + -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1) + m = torch.reshape(m, (-1, 3, 3)) + i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device) + A = r_hat.permute(0, 2, 1) + dot = torch.matmul(A, r_hat) + R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m + return R + + +# In[ ]: + + +def with_zeros(x): + """ + Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. + Parameter: + --------- + x: Tensor to be appended. + Return: + ------ + Tensor after appending of shape [4,4] + """ + ones = torch.tensor( + [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float + ).expand(x.shape[0], -1, -1).to(x.device) + ret = torch.cat((x, ones), dim=1) + return ret + + +def pack(x): + """ + Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor. + Parameter: + ---------- + x: A tensor of shape [batch_size, 4, 1] + Return: + ------ + A tensor of shape [batch_size, 4, 4] after appending. + """ + zeros43 = torch.zeros( + (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device) + ret = torch.cat((zeros43, x), dim=3) + return ret + diff --git a/epistemic_uncertainty/data/dpw3.py b/epistemic_uncertainty/data/dpw3.py new file mode 100644 index 0000000..3304cb1 --- /dev/null +++ b/epistemic_uncertainty/data/dpw3.py @@ -0,0 +1,110 @@ +import os +import pickle as pkl +from os import walk + +import numpy as np +import torch +from torch.utils.data import Dataset + +from . import ang2joint +from ..utils.dataset_utils import JOINTS_TO_INCLUDE, SKIP_RATE +from ..utils.functions import scale + +DPW3_DIM_USED = JOINTS_TO_INCLUDE['AMASS'] +DPW3_SKIP_RATE = SKIP_RATE['AMASS'] +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/dpw3d.py +''' + +PATH_TO_SMPL_SKELETON = '../utils/smpl_skeleton.npz' +SCALE_RATIO = 1 + + +class Dpw3(Dataset): + + def __init__(self, data_dir, input_n, output_n, skip_rate, apply_joints_to_include=False, split=0): + """ + + Args: + data_dir: + input_n: + output_n: + skip_rate: + apply_joints_to_include: + split: + """ + self.path_to_data = os.path.join(data_dir, '3DPW/sequenceFiles') + self.split = split + self.in_n = input_n + self.out_n = output_n + self.apply_joints_to_include = apply_joints_to_include + # self.sample_rate = opt.sample_rate + self.p3d = [] + self.keys = [] + self.data_idx = [] + self.joint_used = np.arange(4, 22) + seq_len = self.in_n + self.out_n + + if split == 0: + data_path = self.path_to_data + '/train/' + elif split == 2: + data_path = self.path_to_data + '/test/' + elif split == 1: + data_path = self.path_to_data + '/validation/' + files = [] + for (dirpath, dirnames, filenames) in walk(data_path): + files.extend(filenames) + skel = np.load(PATH_TO_SMPL_SKELETON) + p3d0 = torch.from_numpy(skel['p3d0']).float().cuda()[:, :22] + parents = skel['parents'] + parent = {} + for i in range(len(parents)): + if i > 21: + break + parent[i] = parents[i] + n = 0 + + sample_rate = int(60 // 25) + + for f in files: + with open(data_path + f, 'rb') as f: + print('>>> loading {}'.format(f)) + data = pkl.load(f, encoding='latin1') + joint_pos = data['poses_60Hz'] + for i in range(len(joint_pos)): + poses = joint_pos[i] + fn = poses.shape[0] + fidxs = range(0, fn, sample_rate) + fn = len(fidxs) + poses = poses[fidxs] + poses = torch.from_numpy(poses).float().cuda() + poses = poses.reshape([fn, -1, 3]) + poses = poses[:, :-2] + # remove global rotation + poses[:, 0] = 0 + p3d0_tmp = p3d0.repeat([fn, 1, 1]) + p3d = ang2joint.ang2joint(p3d0_tmp, poses, parent) + self.p3d.append(p3d.cpu().data.numpy()) + + if split == 2: + valid_frames = np.arange(0, fn - seq_len + 1) + else: + valid_frames = np.arange(0, fn - seq_len + 1, skip_rate) + + tmp_data_idx_1 = [n] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + n += 1 + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + pose = self.p3d[key][fs].reshape((-1, 66))[:, DPW3_DIM_USED] if self.apply_joints_to_include \ + else self.p3d[key][fs].reshape((-1, 66)) + return scale(pose, SCALE_RATIO), item # , key + + diff --git a/epistemic_uncertainty/data/human_36.py b/epistemic_uncertainty/data/human_36.py new file mode 100644 index 0000000..0bcae38 --- /dev/null +++ b/epistemic_uncertainty/data/human_36.py @@ -0,0 +1,119 @@ +import torch +from torch.utils.data import Dataset +import random +import os + + +from ..utils import functions as data_utils +import numpy as np +from math import * +from ..utils.dataset_utils import * + + +class Human36M(Dataset): + + def __init__(self, path_to_data: str, input_n: int, output_n: int, skip_rate: int, scale: float, actions: list, + apply_joints_to_include=False, + split=0): + """ + :param path_to_data: + :param actions: a list of ACTIONS. All ACTIONS will be considered if None + :param input_n: number of input frames + :param output_n: number of output frames + :param split: 0 train, 1 testing, 2 validation + :param skip_rate: + """ + self.path_to_data = os.path.join(path_to_data, 'h3.6m/dataset') + self.split = split + self.in_n = input_n + self.out_n = output_n + self.apply_joints_to_include = apply_joints_to_include + self.skip_rate = 2 + self.p3d = {} + self.data_idx = [] + self.scale = scale + seq_len = self.in_n + self.out_n + subs = H36M_SUBJECTS[split] + key = 0 + for subj in subs: + for index in np.arange(len(actions)): + action = actions[index] + if self.split == 0 or self.split == 1: + for sub_action in [1, 2]: # subactions + self._init_train_or_val_set(action, key, seq_len, skip_rate, sub_action, subj) + key += 1 + else: + self._init_test(action, key, seq_len, subj) + key += 2 + # ignore constant joints and joints at same position with other joints + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + + def _init_test(self, action, key, seq_len, subj): + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = self._get_file_name(action, 1, subj) + num_frames1, the_sequence1 = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames1, the_sequence1) + self.p3d[key] = coordinates + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = self._get_file_name(action, 2, subj) + num_frames2, the_sequence2 = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames2, the_sequence2) + self.p3d[key + 1] = coordinates + fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [key + 1] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def _init_train_or_val_set(self, action, key, seq_len, skip_rate, sub_action, subj): + print(f'Reading subject {subj}, action {action}, subaction {sub_action}') + filename = self._get_file_name(action, sub_action, subj) + num_frames, the_sequence = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames, the_sequence) + self.p3d[key] = coordinates + valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def _get_3d_coordinates(self, num_frames, the_sequence): + self._remove_extra_params(the_sequence) + # convert exponential map format to 3D points + p3d = data_utils.expmap2xyz_torch(the_sequence) # shape: (frames, 32, 3) + return p3d.view(num_frames, -1).cpu().data.numpy() # shape: (frames, 96) + + def _remove_extra_params(self, the_sequence): + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + + def _get_sequence(self, filename): + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape # n = number of frames, d = number of parameters + frames_with_skip = range(0, n, self.skip_rate) + num_frames = len(frames_with_skip) + the_sequence = np.array(the_sequence[frames_with_skip, :]) + the_sequence = torch.from_numpy(the_sequence).float() + if torch.cuda.is_available(): + the_sequence = the_sequence.cuda() + return num_frames, the_sequence + + def _get_file_name(self, action, sub_action, subj): + return '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, sub_action) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + pose = self.p3d[key][fs] + if self.apply_joints_to_include: + pose = pose[..., np.array(JOINTS_TO_INCLUDE['Human36m'])] + return data_utils.scale(pose, self.scale), item diff --git a/epistemic_uncertainty/data/human_36_action.py b/epistemic_uncertainty/data/human_36_action.py new file mode 100644 index 0000000..75b25a1 --- /dev/null +++ b/epistemic_uncertainty/data/human_36_action.py @@ -0,0 +1,132 @@ +import torch +from torch.utils.data import Dataset +import random +import os + +import utils.functions as data_utils +import numpy as np +from math import * +from utils.dataset_utils import * + + +class Human36M(Dataset): + + def __init__(self, path_to_data: str, input_n: int, output_n: int, skip_rate: int, scale: float, actions: list, + apply_joints_to_include=False, vel=False, + split=0): + """ + :param path_to_data: + :param actions: a list of ACTIONS. All ACTIONS will be considered if None + :param input_n: number of input frames + :param output_n: number of output frames + :param split: 0 train, 1 testing, 2 validation + :param skip_rate: + """ + self.path_to_data = os.path.join(path_to_data, 'h3.6m/dataset') + self.split = split + self.in_n = input_n + self.out_n = output_n + self.apply_joints_to_include = apply_joints_to_include + self.skip_rate = 2 + self.p3d = {} + self.data_idx = [] + self.scale = scale + seq_len = self.in_n + self.out_n + self.vel = vel if split != 2 else False + subs = H36M_SUBJECTS[split] + key = 0 + for subj in subs: + for index in np.arange(len(actions)): + action = actions[index] + if self.split == 0 or self.split == 1: + for sub_action in [1, 2]: # subactions + self._init_train_or_val_set(action, key, seq_len, skip_rate, sub_action, subj) + key += 1 + else: + self._init_test(action, key, seq_len, subj) + key += 2 + # ignore constant joints and joints at same position with other joints + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + self.len = self.__len__() + + def _init_test(self, action, key, seq_len, subj): + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = self._get_file_name(action, 1, subj) + num_frames1, the_sequence1 = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames1, the_sequence1) + self.p3d[key] = coordinates, action + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = self._get_file_name(action, 2, subj) + num_frames2, the_sequence2 = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames2, the_sequence2) + self.p3d[key + 1] = coordinates, action + fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [key + 1] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def _init_train_or_val_set(self, action, key, seq_len, skip_rate, sub_action, subj): + print(f'Reading subject {subj}, action {action}, subaction {sub_action}') + filename = self._get_file_name(action, sub_action, subj) + num_frames, the_sequence = self._get_sequence(filename) + coordinates = self._get_3d_coordinates(num_frames, the_sequence) + if self.vel: + coordinates = coordinates[1:] - coordinates[:-1] + num_frames -= 1 + seq_len -= 1 + self.p3d[key] = coordinates, action + valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def _get_3d_coordinates(self, num_frames, the_sequence): + self._remove_extra_params(the_sequence) + # convert exponential map format to 3D points + p3d = data_utils.expmap2xyz_torch(the_sequence) # shape: (frames, 32, 3) + return p3d.view(num_frames, -1).cpu().data.numpy() # shape: (frames, 96) + + def _remove_extra_params(self, the_sequence): + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + + def _get_sequence(self, filename): + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape # n = number of frames, d = number of parameters + frames_with_skip = range(0, n, self.skip_rate) + num_frames = len(frames_with_skip) + the_sequence = np.array(the_sequence[frames_with_skip, :]) + the_sequence = torch.from_numpy(the_sequence).float() + if torch.cuda.is_available(): + the_sequence = the_sequence.cuda() + return num_frames, the_sequence + + def _get_file_name(self, action, sub_action, subj): + return '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, sub_action) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + n_key, n_start_frame = self.data_idx[min(item + 1, self.len - 1)] + seq_len = self.in_n + self.out_n + if self.vel: + seq_len -= 1 + fs = np.arange(start_frame, start_frame + seq_len) + n_fs = np.arange(n_start_frame, n_start_frame + seq_len) + pose, action = self.p3d[key] + n_pose, n_action = self.p3d[n_key] + pose, n_pose = pose[fs], n_pose[n_fs] + if self.apply_joints_to_include: + pose = pose[..., np.array(JOINTS_TO_INCLUDE['Human36m'])] + n_pose = n_pose[..., np.array(JOINTS_TO_INCLUDE['Human36m'])] + return data_utils.scale(pose, self.scale), action, data_utils.scale(n_pose, self.scale), n_action, item \ No newline at end of file diff --git a/epistemic_uncertainty/main.py b/epistemic_uncertainty/main.py new file mode 100644 index 0000000..75873e4 --- /dev/null +++ b/epistemic_uncertainty/main.py @@ -0,0 +1,21 @@ +import os +from argparse import Namespace + +from torch.utils.data import Dataset +from torch.utils.data.dataloader import DataLoader + +from .model.dc.deep_clustering import DCModel +from .model.dc.train_dc_seq import train_dc_model, cluster +from .model.lstm.lstm import LstmAutoEncoder, EncoderWrapper +from .model.lstm.train_lstm import train_lstm_model +from .utils.train_utils import save_model, save_model_results_dict +from .utils.uncertainty import * +from .utils.dataset_utils import TRAIN_K, VALID_K, TEST_K, INCLUDED_JOINTS_COUNT, SKIP_RATE, SCALE_RATIO, H36_ACTIONS, \ + DIM + + +def load_dc_model(dataset_name: str, n_clusters: int, dc_model_path: str, dev='cuda'): + lstm_ae = LstmAutoEncoder(pose_dim=INCLUDED_JOINTS_COUNT[dataset_name]).to(dev) + dc_model = DCModel(lstm_ae, n_clusters=n_clusters).to(dev) + dc_model.load_state_dict(torch.load(dc_model_path)) + return dc_model \ No newline at end of file diff --git a/epistemic_uncertainty/model/__init__.py b/epistemic_uncertainty/model/__init__.py new file mode 100644 index 0000000..e606fb9 --- /dev/null +++ b/epistemic_uncertainty/model/__init__.py @@ -0,0 +1,129 @@ +# model.py +import torch +import torch.nn as nn +import numpy as np + +class Encoder(nn.Module): + def __init__(self, pose_dim=96, h_dim=32, num_layers=1, dropout=0.2, dev='cuda'): + super(Encoder, self).__init__() + self.pose_dim = pose_dim + self.h_dim = h_dim + self.dev = dev + self.num_layers = num_layers + self.encoder = nn.LSTM( + input_size=self.pose_dim, + hidden_size=h_dim, + num_layers=num_layers, + dropout=dropout, + batch_first=True, + + ).to(dev) + + def forward(self, x): + # x is in shape of (batch, seq_len, feature_dim) + batch, seq_len, l = x.shape + state_tuple = (torch.zeros(self.num_layers, batch, self.h_dim, device=self.dev), + torch.zeros(self.num_layers, batch, self.h_dim, device=self.dev)) + x = x.contiguous() + _, state_tuple = self.encoder(x, state_tuple) + + last_frame = x[:, -1, :] # dim: (batch, pose_dim) + state_tuple = state_tuple[0][-1, :, :].unsqueeze(0), state_tuple[1][-1, :, :].unsqueeze(0) + return last_frame, state_tuple + + +class Decoder(nn.Module): + def __init__(self, pose_dim=16, h_dim=32, num_layers=1, dropout=0.2, seq_len=25, dev='cuda'): + super(Decoder, self).__init__() + self.pose_dim = pose_dim + self.seq_len = seq_len + self.h_dim = h_dim + self.dev = dev + self.decoder = nn.LSTM( + input_size=self.pose_dim, + hidden_size=h_dim, + num_layers=num_layers, + dropout=dropout, + batch_first=True + ).to(dev) + self.hidden_to_input_space = nn.Linear(h_dim, pose_dim).to(dev) + + def forward(self, first_input, init_state_tuple): + state_tuple = init_state_tuple + batch, _ = first_input.shape + current_input = first_input.unsqueeze(1) + pred_s_g = torch.tensor([], device=self.dev) + + for i in range(self.seq_len): + output, state_tuple = self.decoder(current_input, state_tuple) + current_input = self.hidden_to_input_space(output.view(-1, self.h_dim)) + current_input = current_input.unsqueeze(1) + pred_s_g = torch.cat((pred_s_g, current_input), dim=1) + + return pred_s_g # dim: (batch, seq_len, pos_dim) + + +class LstmAutoEncoder(nn.Module): + def __init__(self, pose_dim=96, h_dim=64, num_layers=1, dropout=0.2, seq_len=25, dev='cuda'): + super(LstmAutoEncoder, self).__init__() + + self.encoder = Encoder(pose_dim, h_dim, num_layers, dropout, dev) + self.decoder = Decoder(pose_dim, h_dim, 1, dropout, seq_len, dev) + + def forward(self, x): + last_output, decoder_init_state = self.encoder(x) + return self.decoder(last_output, decoder_init_state) + +class Cl(nn.Module): + """ + Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the + sample belonging to each cluster. The probability is calculated with student's t-distribution. + Partially ported from: https://github.com/XifengGuo/DCEC/ and https://github.com/michaal94/torch_DCEC/ + # Example + ``` + cl = ClusteringLayer(n_clusters=10) + ``` + # Arguments + n_clusters: number of clusters. + input_dim: size of input data with shape `(n_samples, n_features)` + weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers. + alpha: parameter in Student's t-distribution. Default to 1.0. + # Input shape + 2D tensor with shape: `(n_samples, n_features)`. + # Output shape + 2D tensor with shape: `(n_samples, n_clusters)`. + """ + + def __init__(self, n_clusters, input_dim, weights=None, alpha=1.0, **kwargs): + super(Cl, self).__init__() + if weights is not None: + assert weights.shape[1] == input_dim + self.n_clusters = n_clusters + self.input_dim = input_dim # (n_samples, n_features) + self.alpha = alpha + self.initial_weights = weights + self.clusters = nn.Parameter(torch.Tensor(n_clusters, int(input_dim))) + self.clusters = nn.init.xavier_uniform_(self.clusters) + if self.initial_weights is not None: + self.initial_weights = torch.from_numpy(self.initial_weights) + self.clusters = nn.Parameter(self.initial_weights) + del self.initial_weights + self.input_dim = self.clusters.size(1) + + def forward(self, x): + """ student t-distribution, as same as used in t-SNE algorithm. + q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it. + Arguments: + x: the variable containing data, shape=(n_samples, n_features) + Return: + q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters) + """ + q_denom = (x.unsqueeze(1) - self.clusters) ** 2 + q_denom = q_denom.sum(dim=2) + q_denom /= self.alpha + q_denom += 1.0 + q = 1.0 / q_denom + q = q ** ((self.alpha + 1.0) / 2.0) + q = q.t() / q.sum(dim=1) # Div shapes [20, 1024] / [1024] + q = q.t() + return q \ No newline at end of file diff --git a/epistemic_uncertainty/model/dc/__init__.py b/epistemic_uncertainty/model/dc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/model/dc/deep_clustering.py b/epistemic_uncertainty/model/dc/deep_clustering.py new file mode 100644 index 0000000..0bb61b0 --- /dev/null +++ b/epistemic_uncertainty/model/dc/deep_clustering.py @@ -0,0 +1,97 @@ +import torch.nn as nn +import torch as torch +from ..lstm.lstm import LstmAutoEncoder + + +class DCModel(nn.Module): + def __init__(self, lstm_ae: LstmAutoEncoder, input_shape=512, n_clusters=17, alpha=1.0, initial_clusters=None, + device='cuda'): + super(DCModel, self).__init__() + self.input_shape = input_shape + self.n_clusters = n_clusters + self.alpha = alpha + self.y_pred = [] + + self.ae = lstm_ae.to(device) + self.encoder = self.ae.encoder + self.decoder = self.ae.decoder + + self.clustering_layer = ClusteringLayer(self.n_clusters, input_shape, weights=initial_clusters) + + def forward(self, x, ret_z=False): + last_output, hidden_state = self.encoder(x) + cat_hidden_state = self.ae.encode(x) + x_reconstructed = self.decoder(last_output, hidden_state) + cls_softmax = self.clustering_layer(cat_hidden_state) + if ret_z: + return cls_softmax, x_reconstructed, cat_hidden_state + else: + return cls_softmax, x_reconstructed + + def predict(self, x): + z = self.ae.encode(x) + cls_softmax = self.clustering_layer(z) + cls = torch.argmax(cls_softmax , 1) + return cls + + @staticmethod + def target_distribution(q): + weight = q ** 2 / q.sum(0) + w = weight.t() / weight.sum(1) + w = w.t() + return w + + +class ClusteringLayer(nn.Module): + """ + Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the + sample belonging to each cluster. The probability is calculated with student's t-distribution. + Partially ported from: https://github.com/XifengGuo/DCEC/ and https://github.com/michaal94/torch_DCEC/ + # Example + ``` + cl = ClusteringLayer(n_clusters=10) + ``` + # Arguments + n_clusters: number of clusters. + input_dim: size of input data with shape `(n_samples, n_features)` + weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers. + alpha: parameter in Student's t-distribution. Default to 1.0. + # Input shape + 2D tensor with shape: `(n_samples, n_features)`. + # Output shape + 2D tensor with shape: `(n_samples, n_clusters)`. + """ + + def __init__(self, n_clusters, input_dim, weights=None, alpha=1.0, **kwargs): + super(ClusteringLayer, self).__init__(**kwargs) + if weights is not None: + assert weights.shape[1] == input_dim + self.n_clusters = n_clusters + self.input_dim = input_dim # (n_samples, n_features) + self.alpha = alpha + self.initial_weights = weights + self.clusters = nn.Parameter(torch.Tensor(n_clusters, int(input_dim))) + self.clusters = nn.init.xavier_uniform_(self.clusters) + if self.initial_weights is not None: + self.initial_weights = torch.from_numpy(self.initial_weights) + self.clusters = nn.Parameter(self.initial_weights) + del self.initial_weights + self.input_dim = self.clusters.size(1) + + def forward(self, x): + """ student t-distribution, as same as used in t-SNE algorithm. + q_ij = 1/(1+dist(x_i, u_j)^2), then scale it. + Arguments: + x: the variable containing data, shape=(n_samples, n_features) + Return: + q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters) + """ + q_denom = (x.unsqueeze(1) - self.clusters) ** 2 + q_denom = q_denom.sum(dim=2) + q_denom /= self.alpha + q_denom += 1.0 + q = 1.0 / q_denom + q = q ** ((self.alpha + 1.0) / 2.0) + q = q.t() / q.sum(dim=1) # Div shapes [20, 1024] / [1024] + q = q.t() + return q diff --git a/epistemic_uncertainty/model/dc/train_dc.py b/epistemic_uncertainty/model/dc/train_dc.py new file mode 100644 index 0000000..034341d --- /dev/null +++ b/epistemic_uncertainty/model/dc/train_dc.py @@ -0,0 +1,174 @@ +import math +import time +from argparse import Namespace + +import numpy as np +import torch.nn as nn +# from sklearn.cluster import KMeans +from sklearn.cluster import KMeans +from torch.utils.data import DataLoader +from tqdm import tqdm + +from .deep_clustering import DCModel +from ..lstm.lstm import EncoderWrapper +from ...utils.train_utils import * + + +# TODO: Copyright after everything! +def train_dc_model(dc_cfg, dc_model: DCModel, dataset, batch_size, num_workers=8, optimizer=None, + scheduler=None, dev='cuda'): + """ + By now, the following should have been completed: + Step 1: Pretraining the AE + Step 2: Initializing Clusters with K-Means + Now: + Step 3: Deep Clustering + """ + params = list(dc_model.parameters()) + list(dc_model.clustering_layer.parameters()) + if optimizer is None: + new_lr, optimizer = get_optimizer(dc_cfg, optimizer, params) + else: + new_lr, optimizer = dc_cfg.lr, optimizer(params) + + loader_args = {'batch_size': batch_size, 'num_workers': num_workers, 'pin_memory': True, } + dataset.return_indices = True + e_loader = DataLoader(dataset, shuffle=False, **loader_args) + m_loader = DataLoader(dataset, shuffle=True, drop_last=True, **loader_args) + + reconstruction_loss_fn = nn.MSELoss(size_average=True).to(dev) + cls_loss_fn = nn.KLDivLoss(size_average=False).to(dev) + + y_pred = [] + y_pred_prev = np.copy(y_pred) + loss = [0, 0, 0] + p = None + + dc_model.train() + dc_model = dc_model.to(dev) + + optimizer.zero_grad() + stop_flag = False + + eval_epochs, eval_iterations = get_eval_epochs_iterations(dc_cfg, m_loader) + + for epoch in range(dc_cfg.epochs): + loss_list = [] + start_time = time.time() + for it, data_arr in enumerate(tqdm(m_loader, desc="Train Epoch", leave=True)): + if (epoch % eval_epochs == 0) or (epoch == 0): + if it % eval_iterations == 0: + print("Target distribution update at epoch {} iteration {}".format(epoch, it)) + p, y_pred = calc_curr_p(dc_model, e_loader) + + if epoch >= 1: + stop_flag, y_pred_prev, delta_label = eval_clustering_stop_cret(y_pred, y_pred_prev, + stop_cret=dc_cfg.stop_cret) + if epoch >= 3 and stop_flag: + print("Stop flag in epoch {}".format(epoch)) + break + else: + stop_flag = False + + if not stop_flag: + indices = data_arr[-1] + p_iter = torch.from_numpy(p[indices]).to(dev) + data = data_arr[0].to(dev) + cls_softmax, x_reconstructed = dc_model(data) + reconstruction_loss = reconstruction_loss_fn(data, x_reconstructed) + clustering_loss = cls_loss_fn(torch.log(p_iter), cls_softmax) + + reg_loss = calc_reg_loss(dc_model) + loss = reconstruction_loss + dc_cfg.gamma * clustering_loss + dc_cfg.alpha * reg_loss + + loss.backward() + optimizer.step() + optimizer.zero_grad() + loss_list.append(loss.item()) + + new_lr = adjust_lr(optimizer, epoch, new_lr, dc_cfg.lr_decay, scheduler=scheduler) + print("Epoch {} Done in {}s, loss is {}\n".format(epoch, time.time() - start_time, loss)) + if stop_flag: + break + + +def adjust_lr(optimizer, epoch, lr=None, lr_decay=None, scheduler=None): + if scheduler is not None: + scheduler.step() + new_lr = scheduler.get_lr()[0] + elif (lr is not None) and (lr_decay is not None): + new_lr = lr * (lr_decay ** epoch) + for param_group in optimizer.param_groups: + param_group['lr'] = new_lr + else: + raise ValueError('Missing parameters for LR adjustment') + return new_lr + + +def get_optimizer(args, opt, params): + if opt is None: + opt = torch.optim.Adam + + optimizer = opt(params, lr=args.lr, weight_decay=args.weight_decay) + new_lr = args.lr + return new_lr, optimizer + + +def get_eval_epochs_iterations(args, m_loader): + epoch_iterations = len(m_loader.dataset) // m_loader.batch_size + eval_frac, eval_intp = math.modf(args.update_interval) + eval_epochs = int(eval_intp) + eval_iterations = int(eval_frac * epoch_iterations) + 1 # Round up to avoid eval at last iter + if eval_epochs == 0: + eval_epochs = 1 # Eval every epoch + if eval_iterations == 1: + eval_iterations = epoch_iterations + 1 # Once every evaluation epoch + return eval_epochs, eval_iterations + + +def calc_curr_p(dc_model: DCModel, data_loader: DataLoader, data_ind=0, device='cuda:0'): + p = [] + y_pred = [] + for it, data_arr in enumerate(tqdm(data_loader, desc="P Calculation")): + with torch.no_grad(): + pose_data = data_arr[data_ind].to(device) + curr_q, _ = dc_model(pose_data) + curr_p = dc_model.target_distribution(curr_q) + y_pred_curr = torch.argmax(curr_q, 1) + p.append(curr_p.cpu().numpy()) + y_pred.append(y_pred_curr.cpu().numpy()) + + p = np.concatenate(p, axis=0) + y_pred = np.concatenate(y_pred, axis=0) + return p, y_pred + + +def eval_clustering_stop_cret(y_pred, y_pred_prev, stop_cret=1e-3): + stop_flag = False + delta_label = np.sum(y_pred != y_pred_prev).astype(np.float32) / y_pred.shape[0] + print('delta_label ', delta_label) + y_pred_prev = np.copy(y_pred) + if delta_label < stop_cret: + print('delta_label ', delta_label, '< tol ', stop_cret) + print('Reached tolerance threshold. Stopping training if past min epochs.') + stop_flag = True + return stop_flag, y_pred_prev, delta_label + + +def cluster(dataset, encoder: EncoderWrapper, k, device): + batch_size = 1024 + data_loader = DataLoader(dataset, batch_size, True) + encoded_data = torch.tensor([], device=device) + encoder.eval() + with torch.no_grad(): + for data in data_loader: + data = data[0] + hidden_state = encoder(data.to(device)).to(device) + encoded_data = torch.cat((encoded_data, hidden_state), dim=0) + + kmeans = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=1000) + encoded_data = encoded_data.to('cpu').detach().numpy() + print('Initializing cluster centers with k-means...') + kmeans.fit(encoded_data) + print('Clustering finished!') + return kmeans.cluster_centers_ + diff --git a/epistemic_uncertainty/model/dc/train_dc_seq.py b/epistemic_uncertainty/model/dc/train_dc_seq.py new file mode 100644 index 0000000..66812d0 --- /dev/null +++ b/epistemic_uncertainty/model/dc/train_dc_seq.py @@ -0,0 +1,193 @@ +import copy +import math +import time +from argparse import Namespace + +import numpy as np +import torch.nn as nn +from sklearn.cluster import KMeans +from torch.utils.data import DataLoader +from tqdm import tqdm + +from .deep_clustering import DCModel +from ..lstm.lstm import EncoderWrapper +from ...utils.train_utils import * + + +def train_dc_model(args: Namespace, dc_model: DCModel, dataset, batch_size, num_workers=8, optimizer=None, + scheduler=None, dev='cuda'): + """ + By now, the following should have been completed: + Step 1: Pretraining the AE + Step 2: Initializing Clusters with K-Means + Now: + Step 3: Deep Clustering + """ + params = list(dc_model.parameters()) + list(dc_model.clustering_layer.parameters()) + if optimizer is None: + new_lr, optimizer = get_optimizer(args, optimizer, params) + else: + new_lr, optimizer = args.dc_lr, optimizer(params) + + loader_args = {'batch_size': batch_size, 'num_workers': num_workers, 'pin_memory': True, } + dataset.return_indices = True + e_loader = DataLoader(dataset, shuffle=False, **loader_args) + m_loader = DataLoader(dataset, shuffle=True, drop_last=True, **loader_args) + + reconstruction_loss_fn = nn.MSELoss(reduction='mean').to(dev) + cls_loss_fn = nn.KLDivLoss().to(dev) + seq_loss_fn = nn.MSELoss(reduction='mean').to(dev) + + y_pred = [] + y_pred_prev = np.copy(y_pred) + loss = [0, 0, 0] + p = None + + dc_model.train() + dc_model = dc_model.to(dev) + + optimizer.zero_grad() + stop_flag = False + + eval_epochs, eval_iterations = get_eval_epochs_iterations(args, m_loader) + b_loss = 10000 + b_model = None + + for epoch in range(args.dc_epochs): + loss_list = [] + start_time = time.time() + for it, data_arr in enumerate(tqdm(m_loader, desc="Train Epoch", leave=True)): + if (epoch % eval_epochs == 0) or (epoch == 0): + if it % eval_iterations == 0: + print("Target distribution update at epoch {} iteration {}".format(epoch, it)) + p, y_pred = calc_curr_p(dc_model, e_loader) + + if epoch >= 1: + stop_flag, y_pred_prev, delta_label = eval_clustering_stop_cret(y_pred, y_pred_prev, + stop_cret=args.dc_stop_cret) + if epoch >= 3 and stop_flag: + print("Stop flag in epoch {}".format(epoch)) + break + else: + stop_flag = False + + if not stop_flag: + indices = data_arr[-1] + p_iter = torch.from_numpy(p[indices]).to(dev) + data = data_arr[0].to(dev) + n_data = data_arr[2].to(dev) + cls_softmax, x_reconstructed, z = dc_model(data, ret_z=True) + n_cls, _, n_z = dc_model(n_data, ret_z=True) + reconstruction_loss = reconstruction_loss_fn(data, x_reconstructed) + clustering_loss = cls_loss_fn(torch.log(p_iter), cls_softmax) + actions = np.array(data_arr[1]) + n_actions = np.array(data_arr[3]) + idx = np.where(n_actions == actions)[0] + seq_loss = seq_loss_fn(n_z[idx], z[idx]) + + reg_loss = calc_reg_loss(dc_model) + loss = reconstruction_loss + args.dc_gamma * clustering_loss + args.alpha * reg_loss + args.dc_lambda * seq_loss + + loss.backward() + optimizer.step() + optimizer.zero_grad() + c_loss = loss.item() + if b_loss > c_loss: + c_loss = b_loss + b_model = copy.deepcopy(dc_model) + loss_list.append(c_loss) + + new_lr = adjust_lr(optimizer, epoch, new_lr, args.dc_lr_decay, scheduler=scheduler) + print("Epoch {} Done in {}s, loss is {}\n".format(epoch, time.time() - start_time, loss)) + if stop_flag: + break + return b_model + + +def adjust_lr(optimizer, epoch, lr=None, lr_decay=None, scheduler=None): + if scheduler is not None: + scheduler.step() + new_lr = scheduler.get_lr()[0] + elif (lr is not None) and (lr_decay is not None): + new_lr = lr * (lr_decay ** epoch) + for param_group in optimizer.param_groups: + param_group['lr'] = new_lr + else: + raise ValueError('Missing parameters for LR adjustment') + return new_lr + + +def get_optimizer(args, opt, params): + if opt is None: + opt = torch.optim.Adam + + optimizer = opt(params, lr=args.dc_lr, weight_decay=args.dc_weight_decay) + new_lr = args.dc_lr + return new_lr, optimizer + + +def get_eval_epochs_iterations(args, m_loader): + epoch_iterations = len(m_loader.dataset) // m_loader.batch_size + eval_frac, eval_intp = math.modf(args.dc_update_interval) + eval_epochs = int(eval_intp) + eval_iterations = int(eval_frac * epoch_iterations) + 1 # Round up to avoid eval at last iter + if eval_epochs == 0: + eval_epochs = 1 # Eval every epoch + if eval_iterations == 1: + eval_iterations = epoch_iterations + 1 # Once every evaluation epoch + return eval_epochs, eval_iterations + + +def calc_curr_p(dc_model: DCModel, data_loader: DataLoader, data_ind=0, device='cuda:0'): + p = [] + y_pred = [] + for it, data_arr in enumerate(tqdm(data_loader, desc="P Calculation")): + with torch.no_grad(): + pose_data = data_arr[data_ind].to(device) + curr_q, _ = dc_model(pose_data) + curr_p = dc_model.target_distribution(curr_q) + y_pred_curr = torch.argmax(curr_q, 1) + p.append(curr_p.cpu().numpy()) + y_pred.append(y_pred_curr.cpu().numpy()) + + p = np.concatenate(p, axis=0) + y_pred = np.concatenate(y_pred, axis=0) + return p, y_pred + + +def eval_clustering_stop_cret(y_pred, y_pred_prev, stop_cret=1e-3): + stop_flag = False + delta_label = np.sum(y_pred != y_pred_prev).astype(np.float32) / y_pred.shape[0] + print('delta_label ', delta_label) + y_pred_prev = np.copy(y_pred) + if delta_label < stop_cret: + print('delta_label ', delta_label, '< tol ', stop_cret) + print('Reached tolerance threshold. Stopping training if past min epochs.') + stop_flag = True + return stop_flag, y_pred_prev, delta_label + + +def cluster(dataset, encoder: EncoderWrapper, k, device): + batch_size = 1024 + data_loader = DataLoader( + dataset, batch_size, True + ) + encoded_data = torch.tensor([], device=device) + encoder.eval() + with torch.no_grad(): + for it, data in enumerate(data_loader): + data = data[0] + hidden_state = encoder(data.to(device)).to(device) + encoded_data = torch.cat((encoded_data, hidden_state), dim=0) + + kmeans = KMeans( + n_clusters=k, + init='k-means++', + n_init=10, + max_iter=1000 + ) + encoded_data = encoded_data.to('cpu').detach().numpy() + print('Initializing cluster centers with k-means...') + kmeans.fit(encoded_data) + print('Clustering finished!') + return kmeans.cluster_centers_ \ No newline at end of file diff --git a/epistemic_uncertainty/model/lstm/__init__.py b/epistemic_uncertainty/model/lstm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/model/lstm/lstm.py b/epistemic_uncertainty/model/lstm/lstm.py new file mode 100644 index 0000000..41c1601 --- /dev/null +++ b/epistemic_uncertainty/model/lstm/lstm.py @@ -0,0 +1,89 @@ +import torch.nn as nn +import torch as torch + + +class LstmEncoder(nn.Module): + def __init__(self, pose_dim=66, h_dim=256, num_layers=3, dropout=0.2, dev='cuda'): + super(LstmEncoder, self).__init__() + self.pose_dim = pose_dim + self.h_dim = h_dim + self.dev = dev + self.num_layers = num_layers + self.encoder = nn.LSTM( + input_size=self.pose_dim, + hidden_size=h_dim, + num_layers=num_layers, + dropout=dropout, + batch_first=True, + + ).to(dev) + + def forward(self, x): + batch, seq_len, l = x.shape + state_tuple = (torch.zeros(self.num_layers, batch, self.h_dim, device=self.dev), + torch.zeros(self.num_layers, batch, self.h_dim, device=self.dev)) + x = x.contiguous() + _, state_tuple = self.encoder(x, state_tuple) + + last_frame = x[:, -1, :] # dim: (batch, pose_dim) + state_tuple = state_tuple[0][-1, :, :].unsqueeze(0), state_tuple[1][-1, :, :].unsqueeze(0) + return last_frame, state_tuple + + +class LstmDecoder(nn.Module): + def __init__(self, pose_dim=66, h_dim=256, num_layers=1, dropout=0.2, seq_len=25, dev='cuda'): + super(LstmDecoder, self).__init__() + self.pose_dim = pose_dim + self.seq_len = seq_len + self.h_dim = h_dim + self.dev = dev + self.decoder = nn.LSTM( + input_size=self.pose_dim, + hidden_size=h_dim, + num_layers=num_layers, + dropout=dropout, + batch_first=True + ).to(dev) + self.hidden_to_input_space = nn.Linear(h_dim, pose_dim).to(dev) + + def forward(self, first_input, init_state_tuple): + state_tuple = init_state_tuple + batch, _ = first_input.shape + current_input = first_input.unsqueeze(1) + pred_s_g = torch.tensor([], device=self.dev) + + for i in range(self.seq_len): + output, state_tuple = self.decoder(current_input, state_tuple) + current_input = self.hidden_to_input_space(output.view(-1, self.h_dim)) + current_input = current_input.unsqueeze(1) + pred_s_g = torch.cat((pred_s_g, current_input), dim=1) + + return pred_s_g # dim: (batch, seq_len, pos_dim) + + +class LstmAutoEncoder(nn.Module): + def __init__(self, pose_dim=66, h_dim=256, num_layers=3, dropout=0.2, seq_len=25, dev='cuda'): + super(LstmAutoEncoder, self).__init__() + + self.encoder = LstmEncoder(pose_dim, h_dim, num_layers, dropout, dev) + self.decoder = LstmDecoder(pose_dim, h_dim, 1, dropout, seq_len, dev) + self.outdim = 3 + + def forward(self, x): + last_output, decoder_init_state = self.encoder(x) + return self.decoder(last_output, decoder_init_state) + + def encode(self, x): + _, hidden_state = self.encoder(x) + cat_hidden_state = torch.cat((hidden_state[0].squeeze(0), hidden_state[1].squeeze(0)), dim=1) + return cat_hidden_state + + +class EncoderWrapper(nn.Module): + def __init__(self, model): + super(EncoderWrapper, self).__init__() + self.model = model + + def forward(self, x): + z = self.model.encode(x) + return z diff --git a/epistemic_uncertainty/model/lstm/train_lstm.py b/epistemic_uncertainty/model/lstm/train_lstm.py new file mode 100644 index 0000000..a22dc1c --- /dev/null +++ b/epistemic_uncertainty/model/lstm/train_lstm.py @@ -0,0 +1,115 @@ +import time +from argparse import Namespace +from functools import partial + +import numpy as np +import torch.nn as nn +from torch import optim +from torch.utils.data import DataLoader +from tqdm import tqdm + +from .lstm import LstmAutoEncoder +from ...utils.train_utils import * + + +def lstm_train_epoch(lstm_model: LstmAutoEncoder, train_loader: DataLoader, loss_func, alpha, optimizer, dev='cuda'): + """ + Train epoch while training the LSTM model + :param lstm_model: + :param train_loader: + :param loss_func: + :param alpha: + :param optimizer: + :param dev: + :return: + """ + lstm_model.train() + loss_list = [] + for it, data_arr in enumerate(tqdm(train_loader)): + data = data_arr[0].to(dev, non_blocking=True) + output = lstm_model(data).to(dev) + reconstruction_loss = loss_func(output, data) + reg_loss = calc_reg_loss(lstm_model) + loss = reconstruction_loss + 1e-3 * alpha * reg_loss + loss.backward() + optimizer.step() + optimizer.zero_grad() + loss_list.append(loss.item()) + return loss_list + + +def lstm_eval_epoch(lstm_model: LstmAutoEncoder, valid_loader: DataLoader, loss_func, alpha, dev='cuda'): + """ + Evaluation epoch while training the LSTM model + :param lstm_model: + :param valid_loader: + :param loss_func: + :param alpha: + :param dev: + :return: + """ + lstm_model.eval() + loss_list = [] + for it, data_arr in enumerate(tqdm(valid_loader)): + data = data_arr[0].to(dev, non_blocking=True) + output = lstm_model(data).to(dev) + reconstruction_loss = loss_func(output, data) + reg_loss = calc_reg_loss(lstm_model) + loss = reconstruction_loss + 1e-3 * alpha * reg_loss + loss_list.append(loss.item()) + return loss_list + + +def train_lstm_model(lstm_cfg, lstm_model: LstmAutoEncoder, train_loader: DataLoader, valid_loader: DataLoader, dev): + optimizer = init_optimizer(lstm_model, lstm_cfg.optimizer, lr=lstm_cfg.lr) + scheduler = init_scheduler(optimizer, lstm_cfg.scheduler, lr=lstm_cfg.lr, epochs=lstm_cfg.epochs) + lstm_model.train() + lstm_model.to(dev) + loss_func = nn.MSELoss() + for epoch in range(lstm_cfg.epochs): + ep_start_time = time.time() + print("Started epoch {}".format(epoch)) + train_loss = lstm_train_epoch(lstm_model, train_loader, loss_func, lstm_cfg.alpha, optimizer, dev) + new_lr = adjust_lr(epoch, lstm_cfg.lr, lstm_cfg.lr_decay, optimizer, scheduler) + print('lr: {0:.3e}'.format(new_lr)) + eval_loss = lstm_eval_epoch(lstm_model, valid_loader, loss_func, lstm_cfg.alpha, dev) + print(f'Epoch {epoch + 1}: Training loss = {np.mean(train_loss)} - Eval loss = {np.mean(eval_loss)}, ' + f'took: {time.time() - ep_start_time} seconds') + + +def init_optimizer(lstm_model: LstmAutoEncoder, type_str, **kwargs): + """ + Initializes an optimizer for the given LSTM model. + :param lstm_model: + :param type_str: + :param kwargs: + :return: + """ + if type_str.lower() == 'adam': + optimizer = partial(optim.Adam, **kwargs) + else: + return None + + return optimizer(lstm_model.parameters()) + + +def init_scheduler(optimizer, type_str, lr, epochs): + """ + Initializes a scheduler for the given optimizer + :param optimizer: + :param type_str: + :param lr: + :param epochs: + :return: + """ + scheduler = None + if (type_str.lower() == 'tri') and (epochs >= 8): + scheduler = partial(optim.lr_scheduler.CyclicLR, + base_lr=lr / 10, max_lr=lr * 10, + step_size_up=epochs // 8, + mode='triangular2', + cycle_momentum=False) + else: + print("Unable to initialize scheduler, defaulting to exp_decay") + + return scheduler(optimizer) diff --git a/epistemic_uncertainty/model/pgbig/BaseModel.py b/epistemic_uncertainty/model/pgbig/BaseModel.py new file mode 100644 index 0000000..881e52b --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/BaseModel.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import print_function + +import torch.nn as nn +import torch +from torch.nn.parameter import Parameter +import math + +class GraphConvolution(nn.Module): + """ + adapted from : https://github.com/tkipf/gcn/blob/92600c39797c2bfb61a508e52b88fb554df30177/gcn/layers.py#L132 + """ + + def __init__(self, in_c, out_c, node_n = 22, seq_len = 35, bias=True): + super(GraphConvolution, self).__init__() + self.in_features = in_c + self.out_features = out_c + self.att = Parameter(torch.FloatTensor(node_n, node_n)) + self.weight_seq = Parameter(torch.FloatTensor(seq_len, seq_len)) + + self.weight_c = Parameter(torch.FloatTensor(in_c, out_c)) + + if bias: + self.bias = Parameter(torch.FloatTensor(seq_len)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + self.support = None + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.att.size(1)) + self.weight_c.data.uniform_(-stdv, stdv) + self.weight_seq.data.uniform_(-stdv, stdv) + self.att.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, input): + #input [b,c,22,35] + + #先进行图卷积再进行空域卷积 + # [b,c,22,35] -> [b,35,22,c] -> [b,35,22,c] + support = torch.matmul(self.att, input.permute(0, 3, 2, 1)) + + # [b,35,22,c] -> [b,35,22,64] + output_gcn = torch.matmul(support, self.weight_c) + + + #进行空域卷积 + # [b,35,22,64] -> [b,22,64,35] + output_fc = torch.matmul(output_gcn.permute(0, 2, 3, 1), self.weight_seq).permute(0, 2, 1, 3).contiguous() + + + if self.bias is not None: + return (output_fc + self.bias) + else: + return output_fc + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GC_Block(nn.Module): + def __init__(self, channal, p_dropout, bias=True, node_n=22, seq_len = 20): + """ + Define a residual block of GCN + """ + super(GC_Block, self).__init__() + self.in_features = channal + self.out_features = channal + + self.gc1 = GraphConvolution(channal, channal, node_n=node_n, seq_len=seq_len, bias=bias) + self.bn1 = nn.BatchNorm1d(channal*node_n*seq_len) + + self.gc2 = GraphConvolution(channal, channal, node_n=node_n, seq_len=seq_len, bias=bias) + self.bn2 = nn.BatchNorm1d(channal*node_n*seq_len) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + y = self.gc2(y) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn2(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + return y + x + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + +class GCN(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1 ): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(out_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=out_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=out_channal, out_c=in_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + # b, n, f = y.shape + # y = self.bn2(y.view(b, -1)).view(b, n, f) + # y = self.act_f(y) + # y = self.do(y) + + return y + x + +class GCN_encoder(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1 ): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN_encoder, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(out_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=out_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=out_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(out_channal*node_n*seq_len) + self.reshape_conv = torch.nn.Conv2d(in_channels=in_channal, out_channels=out_channal, kernel_size=(1, 1)) + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + # b, c, n, l = y.shape + # y = self.bn2(y.view(b, -1)).view(b, c, n, l).contiguous() + # y = self.act_f(y) + # y = self.do(y) + + return y + self.reshape_conv(x) + +class GCN_decoder(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN_decoder, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=in_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=in_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.reshape_conv = torch.nn.Conv2d(in_channels=in_channal, out_channels=out_channal, kernel_size=(1, 1)) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + self.reshape_conv(x) + + return y + + diff --git a/epistemic_uncertainty/model/pgbig/stage_4.py b/epistemic_uncertainty/model/pgbig/stage_4.py new file mode 100644 index 0000000..118e125 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/stage_4.py @@ -0,0 +1,150 @@ +from torch.nn import Module +from torch import nn +import torch +# import model.transformer_base +import math +from model.pgbig import BaseModel as BaseBlock +import numpy as np +import torch.nn.functional as F +from torch.nn.parameter import Parameter +from .utils import util +from .utils.opt import Options +""" +在model1的基础上添加st_gcn,修改 bn +""" + +class MultiStageModel(Module): + def __init__(self, in_features, num_stages, d_model, kernel_size, drop_out, input_n, output_n, dct_n, cuda_idx): + super(MultiStageModel, self).__init__() + + self.kernel_size = kernel_size + self.d_model = d_model + # self.seq_in = seq_in + self.dct_n = dct_n + # ks = int((kernel_size + 1) / 2) + assert kernel_size == 10 + + self.in_features = in_features + self.num_stage = num_stages + self.node_n = self.in_features//3 + + self.encoder_layer_num = 1 + self.decoder_layer_num = 2 + + self.input_n = input_n + self.output_n = output_n + + self.gcn_encoder1 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder1 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n*2, + p_dropout=drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder2 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder2 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder3 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder3 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder4 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder4 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=drop_out, + num_stage=self.decoder_layer_num) + + def forward(self, src, input_n=10, output_n=10, itera=1): + output_n = self.output_n + input_n = self.input_n + + bs = src.shape[0] + # [2000,512,22,20] + dct_n = self.dct_n + idx = list(range(self.kernel_size)) + [self.kernel_size -1] * output_n + # [b,20,66] + input_gcn = src[:, idx].clone() + + dct_m, idct_m = util.get_dct_matrix(input_n + output_n) + print(dct_m.shape) + dct_m = torch.from_numpy(dct_m).float().to('cuda:0') + idct_m = torch.from_numpy(idct_m).float().to('cuda:0') + + # [b,20,66] -> [b,66,20] + input_gcn_dct = torch.matmul(dct_m[:dct_n], input_gcn).permute(0, 2, 1) + + # [b,66,20]->[b,22,3,20]->[b,3,22,20]->[b,512,22,20] + input_gcn_dct = input_gcn_dct.reshape(bs, self.node_n, -1, self.dct_n).permute(0, 2, 1, 3) + + #stage1 + latent_gcn_dct = self.gcn_encoder1(input_gcn_dct) + #[b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_1 = self.gcn_decoder1(latent_gcn_dct)[:, :, :, :dct_n] + + #stage2 + latent_gcn_dct = self.gcn_encoder2(output_dct_1) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_2 = self.gcn_decoder2(latent_gcn_dct)[:, :, :, :dct_n] + + #stage3 + latent_gcn_dct = self.gcn_encoder3(output_dct_2) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_3 = self.gcn_decoder3(latent_gcn_dct)[:, :, :, :dct_n] + + #stage4 + latent_gcn_dct = self.gcn_encoder4(output_dct_3) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_4 = self.gcn_decoder4(latent_gcn_dct)[:, :, :, :dct_n] + + output_dct_1 = output_dct_1.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_2 = output_dct_2.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_3 = output_dct_3.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_4 = output_dct_4.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + + # [b,20 66]->[b,20 66] + output_1 = torch.matmul(idct_m[:, :dct_n], output_dct_1.permute(0, 2, 1)) + output_2 = torch.matmul(idct_m[:, :dct_n], output_dct_2.permute(0, 2, 1)) + output_3 = torch.matmul(idct_m[:, :dct_n], output_dct_3.permute(0, 2, 1)) + output_4 = torch.matmul(idct_m[:, :dct_n], output_dct_4.permute(0, 2, 1)) + + return output_4, output_3, output_2, output_1 + +if __name__ == '__main__': + option = Options().parse() + option.d_model = 64 + model = MultiStageModel(opt=option).cuda() + print(">>> total params: {:.2f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0)) + src = torch.FloatTensor(torch.randn((32, 35, 66))).cuda() + output, att_map,zero = model(src) diff --git a/epistemic_uncertainty/model/pgbig/utils/CMU_motion_3d.py b/epistemic_uncertainty/model/pgbig/utils/CMU_motion_3d.py new file mode 100644 index 0000000..2e525c7 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/CMU_motion_3d.py @@ -0,0 +1,46 @@ +from torch.utils.data import Dataset +import numpy as np +from utils import data_utils + + +class CMU_Motion3D(Dataset): + + def __init__(self, opt, split, actions='all'): + + self.path_to_data = opt.data_dir + input_n = opt.input_n + output_n = opt.output_n + + self.split = split + is_all = actions + actions = data_utils.define_actions_cmu(actions) + # actions = ['walking'] + if split == 0: + path_to_data = self.path_to_data + '/train/' + is_test = False + else: + path_to_data = self.path_to_data + '/test/' + is_test = True + + + if not is_test: + all_seqs, dim_ignore, dim_use = data_utils.load_data_cmu_3d_all(opt, path_to_data, actions, + input_n, output_n, + is_test=is_test) + else: + # all_seqs, dim_ignore, dim_use = data_utils.load_data_cmu_3d_all(opt, path_to_data, actions, + # input_n, output_n, + # is_test=is_test) + + all_seqs, dim_ignore, dim_use = data_utils.load_data_cmu_3d_n(opt, path_to_data, actions, + input_n, output_n, + is_test=is_test) + + self.all_seqs = all_seqs + self.dim_used = dim_use + + def __len__(self): + return np.shape(self.all_seqs)[0] + + def __getitem__(self, item): + return self.all_seqs[item] diff --git a/epistemic_uncertainty/model/pgbig/utils/__init__.py b/epistemic_uncertainty/model/pgbig/utils/__init__.py new file mode 100644 index 0000000..713c826 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/__init__.py @@ -0,0 +1 @@ +from utils import * \ No newline at end of file diff --git a/epistemic_uncertainty/model/pgbig/utils/amass.py b/epistemic_uncertainty/model/pgbig/utils/amass.py new file mode 100644 index 0000000..ece9bc9 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/amass.py @@ -0,0 +1,142 @@ +from torch.utils.data import Dataset +import numpy as np +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch +import os +from utils import ang2joint + + +class Datasets(Dataset): + + def __init__(self, opt, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.path_to_data = opt.data_dir + self.split = split + self.in_n = opt.input_n + self.out_n = opt.output_n + # self.sample_rate = opt.sample_rate + self.p3d = [] + self.keys = [] + self.data_idx = [] + self.joint_used = np.arange(4, 22) + seq_len = self.in_n + self.out_n + + # amass_splits = [ + # ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], + # ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], + # ['BioMotionLab_NTroje'], + # ] + + amass_splits = [ + ['ACCAD'], + ['ACCAD'], + ['ACCAD'], + ] + + #缺失了MPI_Limites,所以我们用MPI_mosh 代替 + # amass_splits = [ + # ['CMU', 'MPI_mosh', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], + # ['HumanEva', 'MPI_HDM05', 'SFU'], + # ['BioMotionLab_NTroje'], + # ] + + # amass_splits = [['BioMotionLab_NTroje'], ['HumanEva'], ['SSM_synced']] + # amass_splits = [['HumanEva'], ['HumanEva'], ['HumanEva']] + # amass_splits[0] = list( + # set(amass_splits[0]).difference(set(amass_splits[1] + amass_splits[2]))) + + # from human_body_prior.body_model.body_model import BodyModel + # from smplx import lbs + # root_path = os.path.dirname(__file__) + # bm_path = root_path[:-6] + '/body_models/smplh/neutral/model.npz' + # bm = BodyModel(bm_path=bm_path, num_betas=16, batch_size=1, model_type='smplh') + # beta_mean = np.array([0.41771687, 0.25984767, 0.20500051, 0.13503872, 0.25965645, -2.10198147, -0.11915666, + # -0.5498772, 0.30885323, 1.4813145, -0.60987528, 1.42565269, 2.45862726, 0.23001716, + # -0.64180912, 0.30231911]) + # beta_mean = torch.from_numpy(beta_mean).unsqueeze(0).float() + # # Add shape contribution + # v_shaped = bm.v_template + lbs.blend_shapes(beta_mean, bm.shapedirs) + # # Get the joints + # # NxJx3 array + # p3d0 = lbs.vertices2joints(bm.J_regressor, v_shaped) # [1,52,3] + # p3d0 = (p3d0 - p3d0[:, 0:1, :]).float().cuda().cpu().data.numpy() + # parents = bm.kintree_table.data.numpy()[0, :] + # np.savez_compressed('smpl_skeleton.npz', p3d0=p3d0, parents=parents) + + # load mean skeleton + # skel = np.load('./body_models/smpl_skeleton.npz') + # p3d0 = torch.from_numpy(skel['p3d0']).float().cuda() + # parents = skel['parents'] + parent = {} + # for i in range(len(parents)): + # parent[i] = parents[i] + n = 0 + for ds in amass_splits[split]: + if not os.path.isdir(self.path_to_data + ds): + print(ds) + continue + print('>>> loading {}'.format(ds)) + for sub in os.listdir(self.path_to_data + ds): + if not os.path.isdir(self.path_to_data + ds + '/' + sub): + continue + for act in os.listdir(self.path_to_data + ds + '/' + sub): + if not act.endswith('.npz'): + continue + # if not ('walk' in act or 'jog' in act or 'run' in act or 'treadmill' in act): + # continue + pose_all = np.load(self.path_to_data + ds + '/' + sub + '/' + act) + try: + poses = pose_all['poses'] + except: + print('no poses at {}_{}_{}'.format(ds, sub, act)) + continue + frame_rate = pose_all['mocap_framerate'] + # gender = pose_all['gender'] + # dmpls = pose_all['dmpls'] + # betas = pose_all['betas'] + # trans = pose_all['trans'] + fn = poses.shape[0] + sample_rate = int(frame_rate // 25) + fidxs = range(0, fn, sample_rate) + fn = len(fidxs) + poses = poses[fidxs] + poses = torch.from_numpy(poses).float().cuda() + poses = poses.reshape([fn, -1, 3]) + # remove global rotation + poses[:, 0] = 0 + # p3d0_tmp = p3d0.repeat([fn, 1, 1]) + p3d = poses + #p3d = ang2joint.ang2joint(p3d0_tmp, poses, parent) + # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy() + self.p3d.append(p3d[:,:22].reshape(-1, 22*3).cpu().data.numpy()) + #self.p3d.append(p3d.reshape(-1, 52 * 3).cpu().data.numpy()) + if split != 0: + valid_frames = np.arange(0, fn - seq_len + 1, 10) + else: + valid_frames = np.arange(0, fn - seq_len + 1, opt.skip_rate) + + # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames) + self.keys.append((ds, sub, act)) + tmp_data_idx_1 = [n] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + n += 1 + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.p3d[key][fs] # , key \ No newline at end of file diff --git a/epistemic_uncertainty/model/pgbig/utils/ang2joint.py b/epistemic_uncertainty/model/pgbig/utils/ang2joint.py new file mode 100644 index 0000000..c0d9dff --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/ang2joint.py @@ -0,0 +1,124 @@ +import torch + + +def ang2joint(p3d0, pose, + parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, + 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): + """ + + :param p3d0:[batch_size, joint_num, 3] + :param pose:[batch_size, joint_num, 3] + :param parent: + :return: + """ + # model_path = './model.npz' + # params = np.load(model_path, allow_pickle=True) + # kintree_table = params['kintree_table'] + batch_num = p3d0.shape[0] + # id_to_col = {kintree_table[1, i]: i + # for i in range(kintree_table.shape[1])} + # parent = { + # i: id_to_col[kintree_table[0, i]] + # for i in range(1, kintree_table.shape[1]) + # } + # parent = {1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13, + # 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21} + jnum = len(parent.keys()) + # v_shaped = torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template + # J = torch.matmul(self.J_regressor, v_shaped) + # face_J = v_shaped[:, [333, 2801, 6261], :] + J = p3d0 + R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) + results = [] + results.append( + with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) + ) + # for i in range(1, kintree_table.shape[1]): + for i in range(1, jnum): + results.append( + torch.matmul( + results[parent[i]], + with_zeros( + torch.cat( + (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), + dim=2 + ) + ) + ) + ) + + stacked = torch.stack(results, dim=1) + J_transformed = stacked[:, :, :3, 3] + return J_transformed + + +def rodrigues(r): + """ + Rodrigues' rotation formula that turns axis-angle tensor into rotation + matrix in a batch-ed manner. + + Parameter: + ---------- + r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3]. + + Return: + ------- + Rotation matrix of shape [batch_size * angle_num, 3, 3]. + + """ + eps = r.clone().normal_(std=1e-8) + theta = torch.norm(r + eps, dim=(1, 2), keepdim=True) + # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple + theta_dim = theta.shape[0] + r_hat = r / theta + cos = torch.cos(theta) + z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device) + m = torch.stack( + (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick, + -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1) + m = torch.reshape(m, (-1, 3, 3)) + i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) \ + + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device) + A = r_hat.permute(0, 2, 1) + dot = torch.matmul(A, r_hat) + R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m + return R + + +def with_zeros(x): + """ + Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. + + Parameter: + --------- + x: Tensor to be appended. + + Return: + ------ + Tensor after appending of shape [4,4] + + """ + ones = torch.tensor( + [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float + ).expand(x.shape[0], -1, -1).to(x.device) + ret = torch.cat((x, ones), dim=1) + return ret + + +def pack(x): + """ + Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor. + + Parameter: + ---------- + x: A tensor of shape [batch_size, 4, 1] + + Return: + ------ + A tensor of shape [batch_size, 4, 4] after appending. + + """ + zeros43 = torch.zeros( + (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device) + ret = torch.cat((zeros43, x), dim=3) + return ret diff --git a/epistemic_uncertainty/model/pgbig/utils/data_utils.py b/epistemic_uncertainty/model/pgbig/utils/data_utils.py new file mode 100644 index 0000000..da74323 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/data_utils.py @@ -0,0 +1,797 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import torch +# from torch.autograd.variable import Variable +import os +from . import forward_kinematics + + +def rotmat2euler(R): + """ + Converts a rotation matrix to Euler angles + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 + + Args + R: a 3x3 rotation matrix + Returns + eul: a 3x1 Euler angle representation of R + """ + if R[0, 2] == 1 or R[0, 2] == -1: + # special case + E3 = 0 # set arbitrarily + dlta = np.arctan2(R[0, 1], R[0, 2]); + + if R[0, 2] == -1: + E2 = np.pi / 2; + E1 = E3 + dlta; + else: + E2 = -np.pi / 2; + E1 = -E3 + dlta; + + else: + E2 = -np.arcsin(R[0, 2]) + E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) + E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) + + eul = np.array([E1, E2, E3]); + return eul + + +def rotmat2quat(R): + """ + Converts a rotation matrix to a quaternion + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 + + Args + R: 3x3 rotation matrix + Returns + q: 1x4 quaternion + """ + rotdiff = R - R.T; + + r = np.zeros(3) + r[0] = -rotdiff[1, 2] + r[1] = rotdiff[0, 2] + r[2] = -rotdiff[0, 1] + sintheta = np.linalg.norm(r) / 2; + r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); + + costheta = (np.trace(R) - 1) / 2; + + theta = np.arctan2(sintheta, costheta); + + q = np.zeros(4) + q[0] = np.cos(theta / 2) + q[1:] = r0 * np.sin(theta / 2) + return q + + +def rotmat2expmap(R): + return quat2expmap(rotmat2quat(R)); + + +def expmap2rotmat(r): + """ + Converts an exponential map angle to a rotation matrix + Matlab port to python for evaluation purposes + I believe this is also called Rodrigues' formula + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m + + Args + r: 1x3 exponential map + Returns + R: 3x3 rotation matrix + """ + theta = np.linalg.norm(r) + r0 = np.divide(r, theta + np.finfo(np.float32).eps) + r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) + r0x = r0x - r0x.T + R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); + return R + + +def quat2expmap(q): + """ + Converts a quaternion to an exponential map + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 + + Args + q: 1x4 quaternion + Returns + r: 1x3 exponential map + Raises + ValueError if the l2 norm of the quaternion is not close to 1 + """ + if (np.abs(np.linalg.norm(q) - 1) > 1e-3): + raise (ValueError, "quat2expmap: input quaternion is not norm 1") + + sinhalftheta = np.linalg.norm(q[1:]) + coshalftheta = q[0] + + r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); + theta = 2 * np.arctan2(sinhalftheta, coshalftheta) + theta = np.mod(theta + 2 * np.pi, 2 * np.pi) + + if theta > np.pi: + theta = 2 * np.pi - theta + r0 = -r0 + + r = r0 * theta + return r + + +def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): + """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 + + Args + normalizedData: nxd matrix with normalized data + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + origData: data originally used to + """ + T = normalizedData.shape[0] + D = data_mean.shape[0] + + origData = np.zeros((T, D), dtype=np.float32) + dimensions_to_use = [] + for i in range(D): + if i in dimensions_to_ignore: + continue + dimensions_to_use.append(i) + dimensions_to_use = np.array(dimensions_to_use) + + if one_hot: + origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] + else: + origData[:, dimensions_to_use] = normalizedData + + # potentially ineficient, but only done once per experiment + stdMat = data_std.reshape((1, D)) + stdMat = np.repeat(stdMat, T, axis=0) + meanMat = data_mean.reshape((1, D)) + meanMat = np.repeat(meanMat, T, axis=0) + origData = np.multiply(origData, stdMat) + meanMat + return origData + + +def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): + """ + Converts the output of the neural network to a format that is more easy to + manipulate for, e.g. conversion to other format or visualization + + Args + poses: The output from the TF model. A list with (seq_length) entries, + each with a (batch_size, dim) output + Returns + poses_out: A tensor of size (batch_size, seq_length, dim) output. Each + batch is an n-by-d sequence of poses. + """ + seq_len = len(poses) + if seq_len == 0: + return [] + + batch_size, dim = poses[0].shape + + poses_out = np.concatenate(poses) + poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) + poses_out = np.transpose(poses_out, [1, 0, 2]) + + poses_out_list = [] + for i in xrange(poses_out.shape[0]): + poses_out_list.append( + unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) + + return poses_out_list + + +def readCSVasFloat(filename): + """ + Borrowed from SRNN code. Reads a csv and returns a float matrix. + https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 + + Args + filename: string. Path to the csv file + Returns + returnArray: the read data in a float32 matrix + """ + returnArray = [] + lines = open(filename).readlines() + for line in lines: + line = line.strip().split(',') + if len(line) > 0: + returnArray.append(np.array([np.float32(x) for x in line])) + + returnArray = np.array(returnArray) + return returnArray + + +def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): + """ + Normalize input data by removing unused dimensions, subtracting the mean and + dividing by the standard deviation + + Args + data: nx99 matrix with data to normalize + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dim_to_use: vector with dimensions used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + data_out: the passed data matrix, but normalized + """ + data_out = {} + nactions = len(actions) + + if not one_hot: + # No one-hot encoding... no need to do anything special + for key in data.keys(): + data_out[key] = np.divide((data[key] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + + else: + # TODO hard-coding 99 dimensions for un-normalized human poses + for key in data.keys(): + data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) + + return data_out + + +def normalization_stats(completeData): + """" + Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 + + Args + completeData: nx99 matrix with data to normalize + Returns + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + dimensions_to_use: vector with dimensions used by the model + """ + data_mean = np.mean(completeData, axis=0) + data_std = np.std(completeData, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + + data_std[dimensions_to_ignore] = 1.0 + + return data_mean, data_std, dimensions_to_ignore, dimensions_to_use + + +def define_actions(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + if action in actions: + return [action] + + if action == "all": + return actions + + if action == "all_srnn": + return ["walking", "eating", "smoking", "discussion"] + + raise (ValueError, "Unrecognized action: %d" % action) + + +"""all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" + + +def define_actions_cmu(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", + "washwindow"] + if action in actions: + return [action] + + if action == "all": + return actions + + raise (ValueError, "Unrecognized action: %d" % action) + + +def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + +def load_data_cmu_3d_8(opt, path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = torch.from_numpy(action_sequence).float().to(opt.cuda_idx) + xyz = expmap2xyz_torch_cmu(opt, exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + action_sequence = xyz + + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + + + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + +def load_data_cmu_3d_n(opt, path_to_dataset, actions, input_n, output_n, is_test=False): + test_sample_num = opt.test_sample_num + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + if 'txt' in _: + count = count + 1 + for examp_index in np.arange(count): + print('eval or test read {}'.format(action)) + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = torch.from_numpy(action_sequence).float().to(opt.cuda_idx) + xyz = expmap2xyz_torch_cmu(opt, exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + action_sequence = xyz + + save_filename = '{}/{}/{}_{}.npy'.format(path_to_dataset, action, action, examp_index + 1) + np.save(save_filename, xyz) + + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + + + if (not is_test) or test_sample_num<0: + #如果不是测试的话就不用随机采样 + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + #这里为什么source_seq_len 被固定为50帧?因为为了和之前一个方法保持一致,实际运行过程中会根据实际的输入长度进行调整。 + source_seq_len = 50 + target_seq_len = output_n + total_frames = source_seq_len + target_seq_len + batch_size = test_sample_num + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + return sampled_seq, dimensions_to_ignore, dimensions_to_use + +def load_data_cmu_3d_all(opt, path_to_dataset, actions, input_n, output_n, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + if '.txt' in _: + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + print('read ' + filename) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = torch.from_numpy(action_sequence).float().to(opt.cuda_idx) + xyz = expmap2xyz_torch_cmu(opt, exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + + save_filename = '{}/{}/{}_{}.npy'.format(path_to_dataset, action, action, examp_index + 1) + np.save(save_filename, xyz) + action_sequence = xyz + + # 以间距为2进行采样 调整帧率 + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + + # 采样的样本数 + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + + return sampled_seq, dimensions_to_ignore, dimensions_to_use + + +def rotmat2euler_torch(opt, R): + """ + Converts a rotation matrix to euler angles + batch pytorch version ported from the corresponding numpy method above + + :param R:N*3*3 + :return: N*3 + """ + n = R.data.shape[0] + eul = torch.zeros(n, 3).float().to(opt.cuda_idx) + idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() + idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() + if len(idx_spec1) > 0: + R_spec1 = R[idx_spec1, :, :] + eul_spec1 = torch.zeros(len(idx_spec1), 3).float().to(opt.cuda_idx) + eul_spec1[:, 2] = 0 + eul_spec1[:, 1] = -np.pi / 2 + delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) + eul_spec1[:, 0] = delta + eul[idx_spec1, :] = eul_spec1 + + if len(idx_spec2) > 0: + R_spec2 = R[idx_spec2, :, :] + eul_spec2 = torch.zeros(len(idx_spec2), 3).float().to(opt.cuda_idx) + eul_spec2[:, 2] = 0 + eul_spec2[:, 1] = np.pi / 2 + delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) + eul_spec2[:, 0] = delta + eul[idx_spec2] = eul_spec2 + + idx_remain = np.arange(0, n) + idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() + if len(idx_remain) > 0: + R_remain = R[idx_remain, :, :] + eul_remain = torch.zeros(len(idx_remain), 3).float().to(opt.cuda_idx) + eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) + eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), + R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) + eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), + R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) + eul[idx_remain, :] = eul_remain + + return eul + + +def rotmat2quat_torch(R): + """ + Converts a rotation matrix to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N * 3 * 3 + :return: N * 4 + """ + rotdiff = R - R.transpose(1, 2) + r = torch.zeros_like(rotdiff[:, 0]) + r[:, 0] = -rotdiff[:, 1, 2] + r[:, 1] = rotdiff[:, 0, 2] + r[:, 2] = -rotdiff[:, 0, 1] + r_norm = torch.norm(r, dim=1) + sintheta = r_norm / 2 + r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) + t1 = R[:, 0, 0] + t2 = R[:, 1, 1] + t3 = R[:, 2, 2] + costheta = (t1 + t2 + t3 - 1) / 2 + theta = torch.atan2(sintheta, costheta) + q = torch.zeros(R.shape[0], 4).float().cuda() + q[:, 0] = torch.cos(theta / 2) + q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) + + return q + + +def expmap2quat_torch(exp): + """ + Converts expmap to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N*3 + :return: N*4 + """ + theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) + v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) + sinhalf = torch.sin(theta / 2) + coshalf = torch.cos(theta / 2) + q1 = torch.mul(v, sinhalf.repeat(1, 3)) + q = torch.cat((coshalf, q1), dim=1) + return q + + +def expmap2rotmat_torch(opt,r): + """ + Converts expmap matrix to rotation + batch pytorch version ported from the corresponding method above + :param r: N*3 + :return: N*3*3 + """ + theta = torch.norm(r, 2, 1) + r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) + r1 = torch.zeros_like(r0).repeat(1, 3) + r1[:, 1] = -r0[:, 2] + r1[:, 2] = r0[:, 1] + r1[:, 5] = -r0[:, 0] + r1 = r1.view(-1, 3, 3) + r1 = r1 - r1.transpose(1, 2) + n = r1.data.shape[0] + R = torch.eye(3, 3).repeat(n, 1, 1).float().to(opt.cuda_idx) + torch.mul( + torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( + (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) + return R + + +def expmap2xyz_torch(opt, expmap): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() + xyz = forward_kinematics.fkl_torch(opt, expmap, parent, offset, rotInd, expmapInd) + return xyz + +def expmap2xyz_torch_cmu(opt, expmap): + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables_cmu() + xyz = forward_kinematics.fkl_torch(opt, expmap, parent, offset, rotInd, expmapInd) + + return xyz +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + + +def find_indices_n(frame_num1, frame_num2, seq_len, input_n=10, test_sample_num=256): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + + sample_num = test_sample_num//2 + for _ in np.arange(0, sample_num): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + +def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 4): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + # print("subact1 {}".format(idx_ran1)) + # print("subact2 {}".format(idx_ran2)) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 diff --git a/epistemic_uncertainty/model/pgbig/utils/dpw3_3d.py b/epistemic_uncertainty/model/pgbig/utils/dpw3_3d.py new file mode 100644 index 0000000..06b90d8 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/dpw3_3d.py @@ -0,0 +1,69 @@ +from torch.utils.data import Dataset +import pickle as pkl +import numpy as np +from os import walk +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch + + +class Datasets(Dataset): + + def __init__(self, opt, actions=None, split=0): + + path_to_data = opt.data_dir + input_n = opt.input_n + output_n = opt.output_n + + if split == 1: + their_input_n = 50 + else: + their_input_n = input_n + + seq_len = their_input_n + output_n + + if split == 0: + self.data_path = path_to_data + '/train/' + elif split == 1: + self.data_path = path_to_data + '/validation/' + elif split == 2: + self.data_path = path_to_data + '/test/' + all_seqs = [] + files = [] + + # load data + for (dirpath, dirnames, filenames) in walk(self.data_path): + files.extend(filenames) + for f in files: + with open(self.data_path + f, 'rb') as f: + data = pkl.load(f, encoding='latin1') + joint_pos = data['jointPositions'] + for i in range(len(joint_pos)): + seqs = joint_pos[i] + seqs = seqs - seqs[:, 0:3].repeat(24, axis=0).reshape(-1, 72) + n_frames = seqs.shape[0] + fs = np.arange(0, n_frames - seq_len + 1) + fs_sel = fs + for j in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + j + 1)) + fs_sel = fs_sel.transpose() + seq_sel = seqs[fs_sel, :] + if len(all_seqs) == 0: + all_seqs = seq_sel + else: + all_seqs = np.concatenate((all_seqs, seq_sel), axis=0) + + # self.all_seqs = all_seqs[:, (their_input_n - input_n):, :] + + self.dim_used = np.array(range(3, all_seqs.shape[2])) + #all_seqs = all_seqs[:, (their_input_n - input_n):, 3:] + all_seqs = all_seqs[:, (their_input_n - input_n):, :] + self.all_seqs = all_seqs * 1000 + + def __len__(self): + return np.shape(self.all_seqs)[0] + + def __getitem__(self, item): + return self.all_seqs[item] \ No newline at end of file diff --git a/epistemic_uncertainty/model/pgbig/utils/forward_kinematics.py b/epistemic_uncertainty/model/pgbig/utils/forward_kinematics.py new file mode 100644 index 0000000..05dd556 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/forward_kinematics.py @@ -0,0 +1,288 @@ +import numpy as np +import torch +from torch.autograd.variable import Variable +from utils import data_utils + + +def fkl(angles, parent, offset, rotInd, expmapInd): + """ + Convert joint angles and bone lenghts into the 3d points of a person. + + adapted from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 + + which originaly based on expmap2xyz.m, available at + https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m + Args + angles: 99-long vector with 3d position and 3d joint angles in expmap format + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + Returns + xyz: 32x3 3d points that represent a person in 3d space + """ + + assert len(angles) == 99 + + # Structure that indicates parents for each joint + njoints = 32 + xyzStruct = [dict() for x in range(njoints)] + + for i in np.arange(njoints): + + # if not rotInd[i]: # If the list is empty + # xangle, yangle, zangle = 0, 0, 0 + # else: + # xangle = angles[rotInd[i][0] - 1] + # yangle = angles[rotInd[i][1] - 1] + # zangle = angles[rotInd[i][2] - 1] + if i == 0: + xangle = angles[0] + yangle = angles[1] + zangle = angles[2] + thisPosition = np.array([xangle, yangle, zangle]) + else: + thisPosition = np.array([0, 0, 0]) + + r = angles[expmapInd[i]] + + thisRotation = data_utils.expmap2rotmat(r) + + if parent[i] == -1: # Root node + xyzStruct[i]['rotation'] = thisRotation + xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition + else: + xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ + xyzStruct[parent[i]]['xyz'] + xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) + + xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] + xyz = np.array(xyz).squeeze() + # xyz = xyz[:, [0, 2, 1]] + # xyz = xyz[:,[2,0,1]] + + return xyz + + +def _some_variables(): + """ + borrowed from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 + + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, + 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 + + offset = np.array( + [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, + -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, + 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, + 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, + 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, + 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, + 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, + 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[5, 6, 4], + [8, 9, 7], + [11, 12, 10], + [14, 15, 13], + [17, 18, 16], + [], + [20, 21, 19], + [23, 24, 22], + [26, 27, 25], + [29, 30, 28], + [], + [32, 33, 31], + [35, 36, 34], + [38, 39, 37], + [41, 42, 40], + [], + [44, 45, 43], + [47, 48, 46], + [50, 51, 49], + [53, 54, 52], + [56, 57, 55], + [], + [59, 60, 58], + [], + [62, 63, 61], + [65, 66, 64], + [68, 69, 67], + [71, 72, 70], + [74, 75, 73], + [], + [77, 78, 76], + []] + + expmapInd = np.split(np.arange(4, 100) - 1, 32) + + return parent, offset, rotInd, expmapInd + + +def _some_variables_cmu(): + """ + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, + 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 + + offset = 70 * np.array( + [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, + 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, + 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, + -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, + 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, + 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, + 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, + 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, + 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, + 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, + -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, + -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[6, 5, 4], + [9, 8, 7], + [12, 11, 10], + [15, 14, 13], + [18, 17, 16], + [21, 20, 19], + [], + [24, 23, 22], + [27, 26, 25], + [30, 29, 28], + [33, 32, 31], + [36, 35, 34], + [], + [39, 38, 37], + [42, 41, 40], + [45, 44, 43], + [48, 47, 46], + [51, 50, 49], + [54, 53, 52], + [], + [57, 56, 55], + [60, 59, 58], + [63, 62, 61], + [66, 65, 64], + [69, 68, 67], + [72, 71, 70], + [], + [75, 74, 73], + [], + [78, 77, 76], + [81, 80, 79], + [84, 83, 82], + [87, 86, 85], + [90, 89, 88], + [93, 92, 91], + [], + [96, 95, 94], + []] + posInd = [] + for ii in np.arange(38): + if ii == 0: + posInd.append([1, 2, 3]) + else: + posInd.append([]) + + expmapInd = np.split(np.arange(4, 118) - 1, 38) + + return parent, offset, posInd, expmapInd + + +def fkl_torch(opt, angles, parent, offset, rotInd, expmapInd): + """ + pytorch version of fkl. + + convert joint angles to joint locations + batch pytorch version of the fkl() method above + :param angles: N*99 + :param parent: + :param offset: + :param rotInd: + :param expmapInd: + :return: N*joint_n*3 + """ + n = angles.data.shape[0] + j_n = offset.shape[0] + p3d = Variable(torch.from_numpy(offset)).float().to(opt.cuda_idx).unsqueeze(0).repeat(n, 1, 1) + angles = angles[:, 3:].contiguous().view(-1, 3) + R = data_utils.expmap2rotmat_torch(opt, angles).view(n, j_n, 3, 3) + for i in np.arange(1, j_n): + if parent[i] > 0: + R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() + p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] + return p3d + + +def main(): + # Load all the data + parent, offset, rotInd, expmapInd = _some_variables() + + # numpy implementation + # with h5py.File('samples.h5', 'r') as h5f: + # expmap_gt = h5f['expmap/gt/walking_0'][:] + # expmap_pred = h5f['expmap/preds/walking_0'][:] + expmap_pred = np.array( + [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, + -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, + 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, + 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, + -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, + -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + expmap_gt = np.array( + [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, + -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, + 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, + 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, + -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, + -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) + xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) + + exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda() + xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) + xyz = xyz.cpu().data.numpy() + print(xyz) + + +if __name__ == '__main__': + main() diff --git a/epistemic_uncertainty/model/pgbig/utils/h36motion.py b/epistemic_uncertainty/model/pgbig/utils/h36motion.py new file mode 100644 index 0000000..5d8667d --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/h36motion.py @@ -0,0 +1,242 @@ +from torch.utils.data import Dataset +import numpy as np +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch + +class Datasets(Dataset): + + def __init__(self, opt, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + #self.path_to_data = "./datasets/h3.6m/" + self.path_to_data = opt.data_dir + self.split = split + self.in_n = opt.input_n + self.out_n = opt.output_n + self.sample_rate = 2 + self.seq = {} + self.data_idx = [] + + self.dimensions_to_use = np.array( + [6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]) + self.dimensions_to_ignore = np.array( + [[0, 1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 19, 20, 25, 26, 31, 32, 33, 34, 35, 48, 49, 50, 58, + 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98]]) + + seq_len = self.in_n + self.out_n + subs = np.array([[1, 6, 7, 8, 9], [11], [5]]) + # acts = data_utils.define_actions(actions) + if actions is None: + acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + else: + acts = actions + # subs = np.array([[1], [11], [5]]) + # acts = ['walking'] + + subs = subs[split] + + for subj in subs: + for action_idx in np.arange(len(acts)): + action = acts[action_idx] + if self.split <= 1 or opt.test_sample_num < 0: + for subact in [1, 2]: # subactions + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape + even_list = range(0, n, self.sample_rate) + num_frames = len(even_list) + the_sequence = np.array(the_sequence[even_list, :]) + # the_sequence = torch.from_numpy(the_sequence).float().cuda() + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + # p3d = data_utils.expmap2xyz_torch(the_sequence) + self.seq[(subj, action, subact)] = the_sequence + + valid_frames = np.arange(0, num_frames - seq_len + 1, opt.skip_rate) + + tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) + the_sequence1 = data_utils.readCSVasFloat(filename) + n, d = the_sequence1.shape + even_list = range(0, n, self.sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(the_sequence1[even_list, :]) + # the_seq1 = torch.from_numpy(the_sequence1).float().cuda() + the_sequence1[:, 0:6] = 0 + # p3d1 = data_utils.expmap2xyz_torch(the_seq1) + self.seq[(subj, action, 1)] = the_sequence1 + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) + the_sequence2 = data_utils.readCSVasFloat(filename) + n, d = the_sequence2.shape + even_list = range(0, n, self.sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(the_sequence2[even_list, :]) + # the_seq2 = torch.from_numpy(the_sequence2).float().cuda() + the_sequence2[:, 0:6] = 0 + # p3d2 = data_utils.expmap2xyz_torch(the_seq2) + self.seq[(subj, action, 2)] = the_sequence2 + + # fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + # input_n=self.in_n) + fs_sel1, fs_sel2 = data_utils.find_indices_srnn(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [(subj, action, 1)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [(subj, action, 2)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.seq[key][fs] + + +class Datasets(Dataset): + + def __init__(self, opt, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + #self.path_to_data = "./datasets/h3.6m/" + self.path_to_data = opt.data_dir + self.split = split + self.in_n = opt.input_n + self.out_n = opt.output_n + self.sample_rate = 2 + self.seq = {} + self.data_idx = [] + + self.dimensions_to_use = np.array( + [6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]) + self.dimensions_to_ignore = np.array( + [[0, 1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 19, 20, 25, 26, 31, 32, 33, 34, 35, 48, 49, 50, 58, + 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98]]) + + seq_len = self.in_n + self.out_n + subs = np.array([[1, 6, 7, 8, 9], [11], [5]]) + # acts = data_utils.define_actions(actions) + if actions is None: + acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + else: + acts = actions + # subs = np.array([[1], [11], [5]]) + # acts = ['walking'] + + subs = subs[split] + + for subj in subs: + for action_idx in np.arange(len(acts)): + action = acts[action_idx] + if self.split <= 1: + for subact in [1, 2]: # subactions + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape + even_list = range(0, n, self.sample_rate) + num_frames = len(even_list) + the_sequence = np.array(the_sequence[even_list, :]) + # the_sequence = torch.from_numpy(the_sequence).float().cuda() + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + # p3d = data_utils.expmap2xyz_torch(the_sequence) + self.seq[(subj, action, subact)] = the_sequence + + valid_frames = np.arange(0, num_frames - seq_len + 1, opt.skip_rate) + + tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) + the_sequence1 = data_utils.readCSVasFloat(filename) + n, d = the_sequence1.shape + even_list = range(0, n, self.sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(the_sequence1[even_list, :]) + # the_seq1 = torch.from_numpy(the_sequence1).float().cuda() + the_sequence1[:, 0:6] = 0 + # p3d1 = data_utils.expmap2xyz_torch(the_seq1) + self.seq[(subj, action, 1)] = the_sequence1 + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) + the_sequence2 = data_utils.readCSVasFloat(filename) + n, d = the_sequence2.shape + even_list = range(0, n, self.sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(the_sequence2[even_list, :]) + # the_seq2 = torch.from_numpy(the_sequence2).float().cuda() + the_sequence2[:, 0:6] = 0 + # p3d2 = data_utils.expmap2xyz_torch(the_seq2) + self.seq[(subj, action, 2)] = the_sequence2 + + # fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + # input_n=self.in_n) + fs_sel1, fs_sel2 = data_utils.find_indices_srnn(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [(subj, action, 1)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [(subj, action, 2)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.seq[key][fs] diff --git a/epistemic_uncertainty/model/pgbig/utils/h36motion3d.py b/epistemic_uncertainty/model/pgbig/utils/h36motion3d.py new file mode 100644 index 0000000..eee756b --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/h36motion3d.py @@ -0,0 +1,151 @@ +from torch.utils.data import Dataset +import numpy as np +from utils import data_utils +import torch +from scipy import signal +import os + +class Datasets(Dataset): + def __init__(self, opt, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.opt = opt + self.path_to_data = opt.data_dir + self.split = split + self.in_n = opt.input_n + self.out_n = opt.output_n + self.sample_rate = 2 + self.p3d = {} + self.data_idx = [] + seq_len = self.in_n + self.out_n + subs = np.array([[1, 6, 7, 8, 9], [11], [5]], dtype=object) + # acts = data_utils.define_actions(actions) + if actions is None: + acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + else: + acts = [actions] + # subs = np.array([[1], [11], [5]]) + # acts = ['walking'] + # 32 human3.6 joint name: + joint_name = ["Hips", "RightUpLeg", "RightLeg", "RightFoot", "RightToeBase", "Site", "LeftUpLeg", "LeftLeg", + "LeftFoot", + "LeftToeBase", "Site", "Spine", "Spine1", "Neck", "Head", "Site", "LeftShoulder", "LeftArm", + "LeftForeArm", + "LeftHand", "LeftHandThumb", "Site", "L_Wrist_End", "Site", "RightShoulder", "RightArm", + "RightForeArm", + "RightHand", "RightHandThumb", "Site", "R_Wrist_End", "Site"] + + subs = subs[split] + key = 0 + for subj in subs: + for action_idx in np.arange(len(acts)): + action = acts[action_idx] + if self.split <= 1 or opt.test_sample_num < 0: + for subact in [1, 2]: # subactions + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape + even_list = range(0, n, self.sample_rate) + num_frames = len(even_list) + the_sequence = np.array(the_sequence[even_list, :]) + the_sequence = torch.from_numpy(the_sequence).float().to(self.opt.cuda_idx) + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + p3d = data_utils.expmap2xyz_torch(self.opt, the_sequence) + # self.p3d[(subj, action, subact)] = p3d.view(num_frames, -1).cpu().data.numpy() + self.p3d[key] = p3d.view(num_frames, -1).cpu().data.numpy() + valid_frames = np.arange(0, num_frames - seq_len + 1, opt.skip_rate) + # tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + key += 1 + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) + the_sequence1 = data_utils.readCSVasFloat(filename) + n, d = the_sequence1.shape + even_list = range(0, n, self.sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(the_sequence1[even_list, :]) + the_seq1 = torch.from_numpy(the_sequence1).float().to(self.opt.cuda_idx) + the_seq1[:, 0:6] = 0 + p3d1 = data_utils.expmap2xyz_torch(self.opt, the_seq1) + # self.p3d[(subj, action, 1)] = p3d1.view(num_frames1, -1).cpu().data.numpy() + self.p3d[key] = p3d1.view(num_frames1, -1).cpu().data.numpy() + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) + the_sequence2 = data_utils.readCSVasFloat(filename) + n, d = the_sequence2.shape + even_list = range(0, n, self.sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(the_sequence2[even_list, :]) + the_seq2 = torch.from_numpy(the_sequence2).float().to(self.opt.cuda_idx) + the_seq2[:, 0:6] = 0 + p3d2 = data_utils.expmap2xyz_torch(self.opt, the_seq2) + + # self.p3d[(subj, action, 2)] = p3d2.view(num_frames2, -1).cpu().data.numpy() + self.p3d[key + 1] = p3d2.view(num_frames2, -1).cpu().data.numpy() + #[n, 35] + fs_sel1, fs_sel2 = data_utils.find_indices_n(num_frames1, num_frames2, seq_len, + input_n=self.in_n, + test_sample_num=opt.test_sample_num) + + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [key + 1] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + key += 2 + + # ignore constant joints and joints at same position with other joints + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + + #[20, 96] + src = self.p3d[key][fs] + return src + + +if __name__ == '__main__': + from utils.opt import Options + opt = Options().parse() + opt.test_sample_num = 8 + + data_set = Datasets(opt, split=2) + data_set_len = len(data_set) + data_list = [] + + for i in data_set: + data_list.append(i) + data_list = np.array(data_list) + np.save('./complete_data.npy', data_list) + print(1) + diff --git a/epistemic_uncertainty/model/pgbig/utils/log.py b/epistemic_uncertainty/model/pgbig/utils/log.py new file mode 100644 index 0000000..567b0e6 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/log.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import json +import os +import torch +import pandas as pd +import numpy as np + + +def save_csv_log(opt, head, value, is_create=False, file_name='test'): + if len(value.shape) < 2: + value = np.expand_dims(value, axis=0) + df = pd.DataFrame(value) + file_path = opt.ckpt + '/{}.csv'.format(file_name) + print(file_path) + if not os.path.exists(file_path) or is_create: + df.to_csv(file_path, header=head, index=False) + else: + with open(file_path, 'a') as f: + df.to_csv(f, header=False, index=False) + +def save_csv_eval_log(opt, head, value, is_create=False, file_name='test'): + if len(value.shape) < 2: + value = np.expand_dims(value, axis=0) + df = pd.DataFrame(value) + test_sample_num = opt.test_sample_num + if test_sample_num == -1: + test_sample_num = 'all' + file_path = opt.ckpt + '/{}_{}_eval.csv'.format(file_name, test_sample_num) + print(file_path) + if not os.path.exists(file_path) or is_create: + df.to_csv(file_path, header=head, index=False) + else: + with open(file_path, 'a') as f: + df.to_csv(f, header=False, index=False) + +def save_ckpt(state, is_best=True, file_name=['ckpt_best.pth.tar', 'ckpt_last.pth.tar'], opt=None): + file_path = os.path.join(opt.ckpt, file_name[1]) + torch.save(state, file_path) + if is_best: + file_path = os.path.join(opt.ckpt, file_name[0]) + torch.save(state, file_path) + + +def save_options(opt): + with open(opt.ckpt + '/option.json', 'w') as f: + f.write(json.dumps(vars(opt), sort_keys=False, indent=4)) diff --git a/epistemic_uncertainty/model/pgbig/utils/opt.py b/epistemic_uncertainty/model/pgbig/utils/opt.py new file mode 100644 index 0000000..be4dac1 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/opt.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import argparse +from pprint import pprint +from . import log +import sys + + +class Options: + def __init__(self): + self.parser = argparse.ArgumentParser() + self.opt = None + + def _initial(self): + # =============================================================== + # General options + # =============================================================== + self.parser.add_argument('--cuda_idx', type=str, default='cuda:0', help='cuda idx') + self.parser.add_argument('--data_dir', type=str, + default='/media/mtz/076f660b-b7de-4646-833c-0b7466f35185/data_set/h3.6m/dataset/', + help='path to dataset') + self.parser.add_argument('--rep_pose_dir', type=str, + default='./rep_pose/rep_pose.txt',help='path to dataset') + self.parser.add_argument('--exp', type=str, default='test', help='ID of experiment') + self.parser.add_argument('--is_eval', dest='is_eval', action='store_true', + help='whether it is to evaluate the model') + self.parser.add_argument('--ckpt', type=str, default='checkpoint/', help='path to save checkpoint') + self.parser.add_argument('--skip_rate', type=int, default=1, help='skip rate of samples') + self.parser.add_argument('--skip_rate_test', type=int, default=1, help='skip rate of samples for test') + self.parser.add_argument('--extra_info', type=str, default='', help='extra information') + + # =============================================================== + # Model options + # =============================================================== + # self.parser.add_argument('--input_size', type=int, default=2048, help='the input size of the neural net') + # self.parser.add_argument('--output_size', type=int, default=85, help='the output size of the neural net') + self.parser.add_argument('--in_features', type=int, default=66, help='size of each model layer') + self.parser.add_argument('--num_stage', type=int, default=12, help='size of each model layer') + self.parser.add_argument('--d_model', type=int, default=64, help='past frame number') + self.parser.add_argument('--kernel_size', type=int, default=10, help='past frame number') + self.parser.add_argument('--drop_out', type=float, default=0.3, help='drop out probability') + + # =============================================================== + # Running options + # =============================================================== + self.parser.add_argument('--encoder_n', type=int, default=6, help='encoder layer num') + self.parser.add_argument('--decoder_n', type=int, default=6, help='decoder layer num') + self.parser.add_argument('--rep_pose_size', type=int, default=2000, help='rep_pose_size') + self.parser.add_argument('--updata_rate', type=float, default=0.3, help='rep pose updata_rate') + self.parser.add_argument('--input_n', type=int, default=50, help='past frame number') + self.parser.add_argument('--output_n', type=int, default=10, help='future frame number') + self.parser.add_argument('--dct_n', type=int, default=20, help='future frame number') + self.parser.add_argument('--lr_now', type=float, default=0.005) + self.parser.add_argument('--max_norm', type=float, default=10000) + self.parser.add_argument('--epoch', type=int, default=100) + self.parser.add_argument('--batch_size', type=int, default=32) + self.parser.add_argument('--test_batch_size', type=int, default=32) + self.parser.add_argument('--is_load', dest='is_load', action='store_true', + help='whether to load existing model') + self.parser.add_argument('--test_sample_num', type=int, default=256, help='the num of sample, ' + 'that sampled from test dataset' + '{8,256,-1(all dataset)}') + + def _print(self): + print("\n==================Options=================") + pprint(vars(self.opt), indent=4) + print("==========================================\n") + + def parse(self, makedir=True): + self._initial() + self.opt = self.parser.parse_args() + + # if not self.opt.is_eval: + script_name = os.path.basename(sys.argv[0])[:-3] + if self.opt.test_sample_num == -1: + test_sample_num = 'all' + else: + test_sample_num = self.opt.test_sample_num + + if self.opt.test_sample_num == -2: + test_sample_num = '8_256_all' + + log_name = '{}_{}_in{}_out{}_ks{}_dctn{}_dropout_{}_lr_{}_d_model_{}_e_{}_d_{}'.format(script_name, + test_sample_num, + self.opt.input_n, + self.opt.output_n, + self.opt.kernel_size, + self.opt.dct_n, + self.opt.drop_out, + self.opt.lr_now, + self.opt.d_model, + self.opt.encoder_n, + self.opt.decoder_n, + ) + self.opt.exp = log_name + # do some pre-check + ckpt = os.path.join(self.opt.ckpt, self.opt.exp) + if makedir==True: + if not os.path.isdir(ckpt): + os.makedirs(ckpt) + log.save_options(self.opt) + self.opt.ckpt = ckpt + log.save_options(self.opt) + + self._print() + # log.save_options(self.opt) + return self.opt diff --git a/epistemic_uncertainty/model/pgbig/utils/util.py b/epistemic_uncertainty/model/pgbig/utils/util.py new file mode 100644 index 0000000..c071589 --- /dev/null +++ b/epistemic_uncertainty/model/pgbig/utils/util.py @@ -0,0 +1,68 @@ +import torch +import numpy as np + + +def lr_decay_mine(optimizer, lr_now, gamma): + lr = lr_now * gamma + for param_group in optimizer.param_groups: + param_group['lr'] = lr + return lr + + +def orth_project(cam, pts): + """ + + :param cam: b*[s,tx,ty] + :param pts: b*k*3 + :return: + """ + s = cam[:, 0:1].unsqueeze(1).repeat(1, pts.shape[1], 2) + T = cam[:, 1:].unsqueeze(1).repeat(1, pts.shape[1], 1) + + return torch.mul(s, pts[:, :, :2] + T) + + +def opt_cam(x, x_target): + """ + :param x: N K 3 or N K 2 + :param x_target: N K 3 or N K 2 + :return: + """ + if x_target.shape[2] == 2: + vis = torch.ones_like(x_target[:, :, :1]) + else: + vis = (x_target[:, :, :1] > 0).float() + vis[:, :2] = 0 + xxt = x_target[:, :, :2] + xx = x[:, :, :2] + x_vis = vis * xx + xt_vis = vis * xxt + num_vis = torch.sum(vis, dim=1, keepdim=True) + mu1 = torch.sum(x_vis, dim=1, keepdim=True) / num_vis + mu2 = torch.sum(xt_vis, dim=1, keepdim=True) / num_vis + xmu = vis * (xx - mu1) + xtmu = vis * (xxt - mu2) + + eps = 1e-6 * torch.eye(2).float().cuda() + Ainv = torch.inverse(torch.matmul(xmu.transpose(1, 2), xmu) + eps.unsqueeze(0)) + B = torch.matmul(xmu.transpose(1, 2), xtmu) + tmp_s = torch.matmul(Ainv, B) + scale = ((tmp_s[:, 0, 0] + tmp_s[:, 1, 1]) / 2.0).unsqueeze(1) + + scale = torch.clamp(scale, 0.7, 10) + trans = mu2.squeeze(1) / scale - mu1.squeeze(1) + opt_cam = torch.cat([scale, trans], dim=1) + return opt_cam + + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + diff --git a/epistemic_uncertainty/model/sts_gcn/__init__.py b/epistemic_uncertainty/model/sts_gcn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/model/sts_gcn/sts_gcn.py b/epistemic_uncertainty/model/sts_gcn/sts_gcn.py new file mode 100644 index 0000000..8157aec --- /dev/null +++ b/epistemic_uncertainty/model/sts_gcn/sts_gcn.py @@ -0,0 +1,225 @@ +import torch +import torch.nn as nn +import math + + +class ConvTemporalGraphical(nn.Module): + # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py + r"""The basic module for applying a graph convolution. + Args: + in_channels (int): Number of channels in the input sequence data + out_channels (int): Number of channels produced by the convolution + kernel_size (int): Size of the graph convolving kernel + t_kernel_size (int): Size of the temporal convolving kernel + t_stride (int, optional): Stride of the temporal convolution. Default: 1 + t_padding (int, optional): Temporal zero-padding added to both sides of + the input. Default: 0 + t_dilation (int, optional): Spacing between temporal kernel elements. + Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. + Default: ``True`` + Shape: + - Input: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Output: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + """ + + def __init__(self, + time_dim, + joints_dim + ): + super(ConvTemporalGraphical, self).__init__() + + self.A = nn.Parameter(torch.FloatTensor(time_dim, joints_dim, + joints_dim)) # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix) + stdv = 1. / math.sqrt(self.A.size(1)) + self.A.data.uniform_(-stdv, stdv) + + self.T = nn.Parameter(torch.FloatTensor(joints_dim, time_dim, time_dim)) + stdv = 1. / math.sqrt(self.T.size(1)) + self.T.data.uniform_(-stdv, stdv) + ''' + self.prelu = nn.PReLU() + + self.Z=nn.Parameter(torch.FloatTensor(joints_dim, joints_dim, time_dim, time_dim)) + stdv = 1. / math.sqrt(self.Z.size(2)) + self.Z.data.uniform_(-stdv,stdv) + ''' + + def forward(self, x): + x = torch.einsum('nctv,vtq->ncqv', (x, self.T)) + ## x=self.prelu(x) + x = torch.einsum('nctv,tvw->nctw', (x, self.A)) + ## x = torch.einsum('nctv,wvtq->ncqw', (x, self.Z)) + return x.contiguous() + + +class ST_GCNN_layer(nn.Module): + """ + Shape: + - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format + - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + :in_channels= dimension of coordinates + : out_channels=dimension of coordinates + + + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + time_dim, + joints_dim, + dropout, + bias=True): + + super(ST_GCNN_layer, self).__init__() + self.kernel_size = kernel_size + assert self.kernel_size[0] % 2 == 1 + assert self.kernel_size[1] % 2 == 1 + padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2) + + self.gcn = ConvTemporalGraphical(time_dim, joints_dim) # the convolution layer + + self.tcn = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels, + (self.kernel_size[0], self.kernel_size[1]), + (stride, stride), + padding, + ), + nn.BatchNorm2d(out_channels), + nn.Dropout(dropout, inplace=True), + ) + + if stride != 1 or in_channels != out_channels: + + self.residual = nn.Sequential(nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=(1, 1)), + nn.BatchNorm2d(out_channels), + ) + + + else: + self.residual = nn.Identity() + + self.prelu = nn.PReLU() + + def forward(self, x): + # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size) + res = self.residual(x) + x = self.gcn(x) + x = self.tcn(x) + x = x + res + x = self.prelu(x) + return x + + +class CNN_layer( + nn.Module): # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension) + + def __init__(self, + in_channels, + out_channels, + kernel_size, + dropout, + bias=True): + super(CNN_layer, self).__init__() + self.kernel_size = kernel_size + padding = ( + (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained + assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1 + + self.block = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding) + , nn.BatchNorm2d(out_channels), nn.Dropout(dropout, inplace=True)] + + self.block = nn.Sequential(*self.block) + + def forward(self, x): + output = self.block(x) + return output + + +# In[11]: + + +class STSGCN(nn.Module): + """ + Shape: + - Input[0]: Input sequence in :math:`(N, in_channels,T_in, V)` format + - Output[0]: Output sequence in :math:`(N,T_out,in_channels, V)` format + where + :math:`N` is a batch size, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + :in_channels=number of channels for the coordiantes(default=3) + + + """ + + def __init__(self, + input_channels, + input_time_frame, + output_time_frame, + st_gcnn_dropout, + joints_to_consider, + n_txcnn_layers, + txc_kernel_size, + txc_dropout, + bias=True): + + super(STSGCN, self).__init__() + self.input_time_frame = input_time_frame + self.output_time_frame = output_time_frame + self.joints_to_consider = joints_to_consider + self.st_gcnns = nn.ModuleList() + self.n_txcnn_layers = n_txcnn_layers + self.txcnns = nn.ModuleList() + + self.st_gcnns.append(ST_GCNN_layer(input_channels, 64, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + self.st_gcnns.append(ST_GCNN_layer(64, 32, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + self.st_gcnns.append(ST_GCNN_layer(32, 64, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + self.st_gcnns.append(ST_GCNN_layer(64, input_channels, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V) + self.txcnns.append(CNN_layer(input_time_frame, output_time_frame, txc_kernel_size, + txc_dropout)) # with kernel_size[3,3] the dimensinons of C,V will be maintained + for i in range(1, n_txcnn_layers): + self.txcnns.append(CNN_layer(output_time_frame, output_time_frame, txc_kernel_size, txc_dropout)) + + self.prelus = nn.ModuleList() + for j in range(n_txcnn_layers): + self.prelus.append(nn.PReLU()) + + def forward(self, x): + for gcn in (self.st_gcnns): + x = gcn(x) + + x = x.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV) + + x = self.prelus[0](self.txcnns[0](x)) + + for i in range(1, self.n_txcnn_layers): + x = self.prelus[i](self.txcnns[i](x)) + x # residual connection + + return x \ No newline at end of file diff --git a/epistemic_uncertainty/model/sts_gcn/utils/data_utils.py b/epistemic_uncertainty/model/sts_gcn/utils/data_utils.py new file mode 100644 index 0000000..8c6be7a --- /dev/null +++ b/epistemic_uncertainty/model/sts_gcn/utils/data_utils.py @@ -0,0 +1,658 @@ +import numpy as np +import random +from math import * +import torch as torch +from . import forward_kinematics + +device = 'cuda' if torch.cuda.is_available() else 'cpu' + + +def rotmat2euler(R): + """ + Converts a rotation matrix to Euler angles + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 + + Args + R: a 3x3 rotation matrix + Returns + eul: a 3x1 Euler angle representation of R + """ + if R[0, 2] == 1 or R[0, 2] == -1: + E3 = 0 + dlta = np.arctan2(R[0, 1], R[0, 2]) + + if R[0, 2] == -1: + E2 = np.pi / 2 + E1 = E3 + dlta + else: + E2 = -np.pi / 2 + E1 = -E3 + dlta + + else: + E2 = -np.arcsin(R[0, 2]) + E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) + E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) + + eul = np.array([E1, E2, E3]) + return eul + + +def rotmat2quat(R): + """ + Converts a rotation matrix to a quaternion + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 + + Args + R: 3x3 rotation matrix + Returns + q: 1x4 quaternion + """ + rotdiff = R - R.T; + + r = np.zeros(3) + r[0] = -rotdiff[1, 2] + r[1] = rotdiff[0, 2] + r[2] = -rotdiff[0, 1] + sintheta = np.linalg.norm(r) / 2; + r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); + + costheta = (np.trace(R) - 1) / 2; + + theta = np.arctan2(sintheta, costheta); + + q = np.zeros(4) + q[0] = np.cos(theta / 2) + q[1:] = r0 * np.sin(theta / 2) + return q + + +def rotmat2expmap(R): + return quat2expmap(rotmat2quat(R)); + + +def expmap2rotmat(r): + """ + Converts an exponential map angle to a rotation matrix + Matlab port to python for evaluation purposes + I believe this is also called Rodrigues' formula + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m + + Args + r: 1x3 exponential map + Returns + R: 3x3 rotation matrix + """ + theta = np.linalg.norm(r) + r0 = np.divide(r, theta + np.finfo(np.float32).eps) + r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) + r0x = r0x - r0x.T + R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); + return R + + +def quat2expmap(q): + """ + Converts a quaternion to an exponential map + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 + + Args + q: 1x4 quaternion + Returns + r: 1x3 exponential map + Raises + ValueError if the l2 norm of the quaternion is not close to 1 + """ + if (np.abs(np.linalg.norm(q) - 1) > 1e-3): + raise (ValueError, "quat2expmap: input quaternion is not norm 1") + + sinhalftheta = np.linalg.norm(q[1:]) + coshalftheta = q[0] + + r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); + theta = 2 * np.arctan2(sinhalftheta, coshalftheta) + theta = np.mod(theta + 2 * np.pi, 2 * np.pi) + + if theta > np.pi: + theta = 2 * np.pi - theta + r0 = -r0 + + r = r0 * theta + return r + + +def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): + """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 + + Args + normalizedData: nxd matrix with normalized data + data_mean: vector of mean used to scale the data + data_std: vector of standard deviation used to scale the data + dimensions_to_ignore: vector with dimensions not used by the model + ACTIONS: list of strings with the encoded ACTIONS + one_hot: whether the data comes with one-hot encoding + Returns + origData: data originally used to + """ + T = normalizedData.shape[0] + D = data_mean.shape[0] + + origData = np.zeros((T, D), dtype=np.float32) + dimensions_to_use = [] + for i in range(D): + if i in dimensions_to_ignore: + continue + dimensions_to_use.append(i) + dimensions_to_use = np.array(dimensions_to_use) + + if one_hot: + origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] + else: + origData[:, dimensions_to_use] = normalizedData + + stdMat = data_std.reshape((1, D)) + stdMat = np.repeat(stdMat, T, axis=0) + meanMat = data_mean.reshape((1, D)) + meanMat = np.repeat(meanMat, T, axis=0) + origData = np.multiply(origData, stdMat) + meanMat + return origData + + +def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): + """ + Converts the output of the neural network to a format that is more easy to + manipulate for, e.g. conversion to other format or visualization + + Args + poses: The output from the TF model. A list with (seq_length) entries, + each with a (batch_size, dim) output + Returns + poses_out: A tensor of size (batch_size, seq_length, dim) output. Each + batch is an n-by-d sequence of poses. + """ + seq_len = len(poses) + if seq_len == 0: + return [] + + batch_size, dim = poses[0].shape + + poses_out = np.concatenate(poses) + poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) + poses_out = np.transpose(poses_out, [1, 0, 2]) + + poses_out_list = [] + for i in xrange(poses_out.shape[0]): + poses_out_list.append( + unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) + + return poses_out_list + + +def readCSVasFloat(filename): + """ + Borrowed from SRNN code. Reads a csv and returns a float matrix. + https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 + + Args + filename: string. Path to the csv file + Returns + returnArray: the read data in a float32 matrix + """ + returnArray = [] + lines = open(filename).readlines() + for line in lines: + line = line.strip().split(',') + if len(line) > 0: + returnArray.append(np.array([np.float32(x) for x in line])) + + returnArray = np.array(returnArray) + return returnArray + + +def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): + """ + Normalize input data by removing unused dimensions, subtracting the mean and + dividing by the standard deviation + + Args + data: nx99 matrix with data to scale + data_mean: vector of mean used to scale the data + data_std: vector of standard deviation used to scale the data + dim_to_use: vector with dimensions used by the model + ACTIONS: list of strings with the encoded ACTIONS + one_hot: whether the data comes with one-hot encoding + Returns + data_out: the passed data matrix, but normalized + """ + data_out = {} + nactions = len(actions) + + if not one_hot: + for key in data.keys(): + data_out[key] = np.divide((data[key] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + + else: + for key in data.keys(): + data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) + + return data_out + + +def normalization_stats(completeData): + """" + Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 + + Args + completeData: nx99 matrix with data to scale + Returns + data_mean: vector of mean used to scale the data + data_std: vector of standard deviation used to scale the data + dimensions_to_ignore: vector with dimensions not used by the model + dimensions_to_use: vector with dimensions used by the model + """ + data_mean = np.mean(completeData, axis=0) + data_std = np.std(completeData, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + + data_std[dimensions_to_ignore] = 1.0 + + return data_mean, data_std, dimensions_to_ignore, dimensions_to_use + + +def define_actions(action): + """ + Define the list of ACTIONS we are using. + + Args + action: String with the passed action. Could be "all" + Returns + ACTIONS: List of strings of ACTIONS + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + if action in actions: + return [action] + + if action == "all": + return actions + + if action == "all_srnn": + return ["walking", "eating", "smoking", "discussion"] + + raise (ValueError, "Unrecognized action: %d" % action) + + +"""all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" + + +def define_actions_cmu(action): + """ + Define the list of ACTIONS we are using. + + Args + action: String with the passed action. Could be "all" + Returns + ACTIONS: List of strings of ACTIONS + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", + "washwindow"] + if action in actions: + return [action] + + if action == "all": + return actions + + raise (ValueError, "Unrecognized action: %d" % action) + + +def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = _move_to_device(torch.from_numpy(action_sequence).float()) + xyz = expmap2xyz_torch_cmu(exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + action_sequence = xyz + + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def rotmat2euler_torch(R): + """ + Converts a rotation matrix to euler angles + batch pytorch version ported from the corresponding numpy method above + + :param R:N*3*3 + :return: N*3 + """ + eul = _move_to_device(torch.zeros(n, 3).float()) + idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() + idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() + if len(idx_spec1) > 0: + R_spec1 = R[idx_spec1, :, :] + + eul_spec1 = _move_to_device(torch.zeros(len(idx_spec1), 3).float()) + eul_spec1[:, 2] = 0 + eul_spec1[:, 1] = -np.pi / 2 + delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) + eul_spec1[:, 0] = delta + eul[idx_spec1, :] = eul_spec1 + + if len(idx_spec2) > 0: + R_spec2 = R[idx_spec2, :, :] + eul_spec2 = _move_to_device(torch.zeros(len(idx_spec2), 3).float()) + eul_spec2[:, 2] = 0 + eul_spec2[:, 1] = np.pi / 2 + delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) + eul_spec2[:, 0] = delta + eul[idx_spec2] = eul_spec2 + + idx_remain = np.arange(0, n) + idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() + if len(idx_remain) > 0: + R_remain = R[idx_remain, :, :] + eul_remain = _move_to_device(torch.zeros(len(idx_remain), 3).float()) + eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) + eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), + R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) + eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), + R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) + eul[idx_remain, :] = eul_remain + + return eul + + +def rotmat2quat_torch(R): + """ + Converts a rotation matrix to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N * 3 * 3 + :return: N * 4 + """ + rotdiff = R - R.transpose(1, 2) + r = torch.zeros_like(rotdiff[:, 0]) + r[:, 0] = -rotdiff[:, 1, 2] + r[:, 1] = rotdiff[:, 0, 2] + r[:, 2] = -rotdiff[:, 0, 1] + r_norm = torch.norm(r, dim=1) + sintheta = r_norm / 2 + r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) + t1 = R[:, 0, 0] + t2 = R[:, 1, 1] + t3 = R[:, 2, 2] + costheta = (t1 + t2 + t3 - 1) / 2 + theta = torch.atan2(sintheta, costheta) + q = _move_to_device(torch.zeros(R.shape[0], 4).float()) + q[:, 0] = torch.cos(theta / 2) + q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) + + return q + + +def expmap2quat_torch(exp): + """ + Converts expmap to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N*3 + :return: N*4 + """ + theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) + v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) + sinhalf = torch.sin(theta / 2) + coshalf = torch.cos(theta / 2) + q1 = torch.mul(v, sinhalf.repeat(1, 3)) + q = torch.cat((coshalf, q1), dim=1) + return q + + +def expmap2rotmat_torch(r): + """ + Converts expmap matrix to rotation + batch pytorch version ported from the corresponding method above + :param r: N*3 + :return: N*3*3 + """ + theta = torch.norm(r, 2, 1) + r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) + r1 = torch.zeros_like(r0).repeat(1, 3) + r1[:, 1] = -r0[:, 2] + r1[:, 2] = r0[:, 1] + r1[:, 5] = -r0[:, 0] + r1 = r1.view(-1, 3, 3) + r1 = r1 - r1.transpose(1, 2) + n = r1.data.shape[0] + R = _move_to_device(torch.eye(3, 3).repeat(n, 1, 1).float()) + torch.mul( + torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( + (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) + return R + + +def expmap2xyz_torch(expmap): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() + xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) + return xyz + + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + + +def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 128): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + +def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 4): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + +def _move_to_device(obj): + if device == 'cuda': + return obj.cuda() + return obj diff --git a/epistemic_uncertainty/model/sts_gcn/utils/forward_kinematics.py b/epistemic_uncertainty/model/sts_gcn/utils/forward_kinematics.py new file mode 100644 index 0000000..8587545 --- /dev/null +++ b/epistemic_uncertainty/model/sts_gcn/utils/forward_kinematics.py @@ -0,0 +1,288 @@ +import numpy as np +import torch +from torch.autograd.variable import Variable +from . import data_utils + + +def fkl(angles, parent, offset, rotInd, expmapInd): + """ + Convert joint angles and bone lenghts into the 3d points of a person. + + adapted from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 + + which originaly based on expmap2xyz.m, available at + https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m + Args + angles: 99-long vector with 3d position and 3d joint angles in expmap format + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + Returns + xyz: 32x3 3d points that represent a person in 3d space + """ + + assert len(angles) == 99 + + # Structure that indicates parents for each joint + njoints = 32 + xyzStruct = [dict() for x in range(njoints)] + + for i in np.arange(njoints): + + # if not rotInd[i]: # If the list is empty + # xangle, yangle, zangle = 0, 0, 0 + # else: + # xangle = angles[rotInd[i][0] - 1] + # yangle = angles[rotInd[i][1] - 1] + # zangle = angles[rotInd[i][2] - 1] + if i == 0: + xangle = angles[0] + yangle = angles[1] + zangle = angles[2] + thisPosition = np.array([xangle, yangle, zangle]) + else: + thisPosition = np.array([0, 0, 0]) + + r = angles[expmapInd[i]] + + thisRotation = data_utils.expmap2rotmat(r) + + if parent[i] == -1: # Root node + xyzStruct[i]['rotation'] = thisRotation + xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition + else: + xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ + xyzStruct[parent[i]]['xyz'] + xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) + + xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] + xyz = np.array(xyz).squeeze() + # xyz = xyz[:, [0, 2, 1]] + # xyz = xyz[:,[2,0,1]] + + return xyz + + +def _some_variables(): + """ + borrowed from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 + + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, + 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 + + offset = np.array( + [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, + -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, + 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, + 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, + 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, + 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, + 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, + 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[5, 6, 4], + [8, 9, 7], + [11, 12, 10], + [14, 15, 13], + [17, 18, 16], + [], + [20, 21, 19], + [23, 24, 22], + [26, 27, 25], + [29, 30, 28], + [], + [32, 33, 31], + [35, 36, 34], + [38, 39, 37], + [41, 42, 40], + [], + [44, 45, 43], + [47, 48, 46], + [50, 51, 49], + [53, 54, 52], + [56, 57, 55], + [], + [59, 60, 58], + [], + [62, 63, 61], + [65, 66, 64], + [68, 69, 67], + [71, 72, 70], + [74, 75, 73], + [], + [77, 78, 76], + []] + + expmapInd = np.split(np.arange(4, 100) - 1, 32) + + return parent, offset, rotInd, expmapInd + + +def _some_variables_cmu(): + """ + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, + 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 + + offset = 70 * np.array( + [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, + 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, + 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, + -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, + 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, + 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, + 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, + 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, + 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, + 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, + -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, + -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[6, 5, 4], + [9, 8, 7], + [12, 11, 10], + [15, 14, 13], + [18, 17, 16], + [21, 20, 19], + [], + [24, 23, 22], + [27, 26, 25], + [30, 29, 28], + [33, 32, 31], + [36, 35, 34], + [], + [39, 38, 37], + [42, 41, 40], + [45, 44, 43], + [48, 47, 46], + [51, 50, 49], + [54, 53, 52], + [], + [57, 56, 55], + [60, 59, 58], + [63, 62, 61], + [66, 65, 64], + [69, 68, 67], + [72, 71, 70], + [], + [75, 74, 73], + [], + [78, 77, 76], + [81, 80, 79], + [84, 83, 82], + [87, 86, 85], + [90, 89, 88], + [93, 92, 91], + [], + [96, 95, 94], + []] + posInd = [] + for ii in np.arange(38): + if ii == 0: + posInd.append([1, 2, 3]) + else: + posInd.append([]) + + expmapInd = np.split(np.arange(4, 118) - 1, 38) + + return parent, offset, posInd, expmapInd + + +def fkl_torch(angles, parent, offset, rotInd, expmapInd): + """ + pytorch version of fkl. + + convert joint angles to joint locations + batch pytorch version of the fkl() method above + :param angles: N*99 + :param parent: + :param offset: + :param rotInd: + :param expmapInd: + :return: N*joint_n*3 + """ + n = angles.data.shape[0] + j_n = offset.shape[0] + p3d = Variable(torch.from_numpy(offset)).float().cuda().unsqueeze(0).repeat(n, 1, 1) + angles = angles[:, 3:].contiguous().view(-1, 3) + R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3) + for i in np.arange(1, j_n): + if parent[i] > 0: + R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() + p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] + return p3d + + +def main(): + # Load all the data + parent, offset, rotInd, expmapInd = _some_variables() + + # numpy implementation + # with h5py.File('samples.h5', 'r') as h5f: + # expmap_gt = h5f['expmap/gt/walking_0'][:] + # expmap_pred = h5f['expmap/preds/walking_0'][:] + expmap_pred = np.array( + [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, + -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, + 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, + 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, + -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, + -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + expmap_gt = np.array( + [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, + -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, + 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, + 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, + -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, + -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) + xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) + + exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda() + xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) + xyz = xyz.cpu().data.numpy() + print(xyz) + + +if __name__ == '__main__': + main() diff --git a/epistemic_uncertainty/model/zerovel/zerovel.py b/epistemic_uncertainty/model/zerovel/zerovel.py new file mode 100644 index 0000000..6c31ffa --- /dev/null +++ b/epistemic_uncertainty/model/zerovel/zerovel.py @@ -0,0 +1,16 @@ +import torch +import torch.nn as nn +import numpy as np + +class Zerovel(nn.Module): + def __init__(self, seq_len=25, dev='cuda'): + super(Zerovel, self).__init__() + self.seq_len = seq_len + self.dev = dev + + def forward(self, x): + b, d, l, k = x.shape + x = x + p = x[:, :, -1, :].unsqueeze(-2) + x = p.repeat([1 for _ in range(b - 2)] + [self.seq_len, 1]).squeeze() + return x.permute(0, 2, 1, 3) diff --git a/epistemic_uncertainty/utils/__init__.py b/epistemic_uncertainty/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/epistemic_uncertainty/utils/args.py b/epistemic_uncertainty/utils/args.py new file mode 100644 index 0000000..74edca8 --- /dev/null +++ b/epistemic_uncertainty/utils/args.py @@ -0,0 +1,89 @@ +import argparse +from argparse import ArgumentParser + +ARGS_GROUP = ['main', 'dataset', 'model', 'evaluation'] + + +def get_parser() -> ArgumentParser: + """ + Creates a parser for the arguments + :return ArgumentParser: + """ + parser = argparse.ArgumentParser() + # Main Arguments + main_parser = parser.add_argument_group('main') + main_parser.add_argument('--test', type=bool, default=False, + help='If true, loads a pretrained model and test it on test set.') + main_parser.add_argument('--device', type=str, default='cuda', choices=['cuda', 'cpu'], help='Device to be used') + # Dataset Arguments + dataset_parser = parser.add_argument_group('dataset') + dataset_parser.add_argument('--dataset', type=str, default='Human36m', choices=['Human36m', 'AMASS', '3DPW'], + help='Specifies the dataset to be used.') + dataset_parser.add_argument('--batch_size', type=int, default=64, help='Batch size of the dataloader') + dataset_parser.add_argument('--num_workers', type=int, default=8, help='Number of workers of the dataloader') + dataset_parser.add_argument('--dataset_path', type=str, default='human3_6', help='Path to the dataset') + dataset_parser.add_argument('--fake_labeling', type=bool, default=False, + help='Determines if fake samples are needed to be generated. ' + 'Only applicable to main and divided mode') + dataset_parser.add_argument('--input_n', type=int, default=10, choices=[10, 50], help='Input sequence\'s length') + dataset_parser.add_argument('--output_n', type=int, default=25, help='Output sequence\'s length') + # Model Arguments + model_parser = parser.add_argument_group('model') + model_parser.add_argument('--alpha', type=float, default=0.001, help='Alpha value for weighting L2 regularization') + model_parser.add_argument('--lstm_optimizer', type=str, default='adam', help='LSTM model\'s optimizer') + model_parser.add_argument('--lstm_scheduler', type=str, default='tri', help='LSTM model\'s scheduler') + model_parser.add_argument('--lstm_lr', type=float, default=0.0001, help='LSTM model\'s learning rate') + model_parser.add_argument('--lstm_lr_decay', type=float, default=0.99, help='Optimizer Learning Rate Decay ' + 'Parameter') + model_parser.add_argument('--lstm_epochs', type=int, default=200, help='Number of epochs for LSTM train') + model_parser.add_argument('--lstm_path', type=str, default=None, help='Path to a trained LSTM model') + model_parser.add_argument('--hidden_dim', type=int, default=512, help='Latent space dimension of the LSTM model') + model_parser.add_argument('--encoded_dim', type=int, default=32, help='Latent space dimension of the ' + 'FinalAutoEncoder model') + model_parser.add_argument('--n_clusters', type=int, default=17, help='Number of clusters for DC model. You need to ' + 'determine this with the specific experiment') + model_parser.add_argument('--fake_clusters', type=int, nargs='+', default=None, help='List of indices of the fake ' + 'clusters') + model_parser.add_argument('--k_init_batch', type=int, default=4, help='Batch size for the initial K-means for ' + 'DC model training') + model_parser.add_argument('--dc_lr', type=float, default=0.0005, help='DC model\'s learning rate') + model_parser.add_argument('--dc_lr_decay', type=float, default=0.98, help='Learning rate decay factor per epoch') + model_parser.add_argument('--dc_weight_decay', type=float, default=0.00001, help='DCEC optimizer Weight Decay ' + 'Parameter') + model_parser.add_argument('--dc_gamma', type=float, default=0.6, help='Gamma values for weighting clustering loss') + model_parser.add_argument('--dc_epochs', type=int, default=10, help='Number of epochs for DC train') + model_parser.add_argument('--dc_stop_cret', type=int, default=0.001, help='Stop criteria value for DC model\'s ' + 'train') + model_parser.add_argument('--dc_update_interval', type=float, default=2.0, + help='Update interval for target distribution P. Float, for fractional update') + model_parser.add_argument('--dc_path', type=str, default=None, help='Path to a trained DC model') + model_parser.add_argument('--ae_path', type=str, default=None, help='Path to a trained FinalAutoEncoder model') + model_parser.add_argument('--ae_epochs', type=int, default=30, help='Number of epochs for FinalAutoEncoder train') + # Evaluation Arguments + evaluation_parser = parser.add_argument_group('evaluation') + evaluation_parser.add_argument('--model_dict_path', type=str, default=None, + help='Path to a dictionary of outputs and ground-truths of a prediction model') + evaluation_parser.add_argument('--model_path', type=str, default=None, help='Path to a prediction model') + evaluation_parser.add_argument('--dc_model_path', type=str, default=None, help='Path to a pretrained dc_model.') + evaluation_parser.add_argument('--output_path', type=str, default='output', help='Path to the output results') + # TODO: Complete list of choices (Yashar, ) + evaluation_parser.add_argument('--pred_model', type=str, default=None, choices=['sts', ], + help='Name of the prediction model to be evaluated') + return parser + + +def get_args(parser: ArgumentParser) -> list: + """ + Returns main, dataset, model and evaluation args from a given parser. + :param parser: + :return list: + """ + args = parser.parse_args() + separated_args = [] + + for group in parser._action_groups: + if group.title not in ARGS_GROUP: + continue + group_dict = {a.dest: getattr(args, a.dest, None) for a in group._group_actions} + separated_args.append(argparse.Namespace(**group_dict)) + return separated_args diff --git a/epistemic_uncertainty/utils/dataset_utils.py b/epistemic_uncertainty/utils/dataset_utils.py new file mode 100644 index 0000000..3d0f0b1 --- /dev/null +++ b/epistemic_uncertainty/utils/dataset_utils.py @@ -0,0 +1,26 @@ +import numpy as np + +DIM = 3 +ALL_JOINTS_COUNT = {'human3.6m': 96, 'amass': 66, '3dpw': 66} +ALL_JOINTS = {'human3.6m': np.arange(0, ALL_JOINTS_COUNT['human3.6m']), + 'amass': np.arange(0, ALL_JOINTS_COUNT['amass']), + '3dpw': np.arange(0, ALL_JOINTS_COUNT['3dpw']) + } +JOINTS_TO_INCLUDE = {'human3.6m': np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]), + 'amass': np.arange(4 * DIM, 22 * DIM), '3dpw': np.arange(4 * DIM, 22 * DIM)} +INCLUDED_JOINTS_COUNT = {'human3.6m': 66, 'amass': 54, '3dpw': 54} +JOINTS_TO_IGNORE = {'human3.6m': np.array([48, 49, 50, 60, 61, 62, 69, 70, 71, 72, 73, 74, 84, 85, 86, 93, 94, 95])} +JOINTS_EQUAL = {'human3.6m': np.array([39, 40, 41, 57, 58, 59, 66, 67, 68, 39, 40, 41, 81, 82, 83, 90, 91, 92])} +H36M_SUBJECTS = np.array([[1, 6, 7, 8, 9], [11], [5]], dtype='object') +H36_ACTIONS = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] +SCALE_RATIO = {'human3.6m': 0.001, 'amass': 1, '3dpw': 1} +SKIP_RATE = {'human3.6m': 1, 'amass': 5, '3dpw': 5} +MPJPE_COEFFICIENT = {'human3.6m': 1, 'amass': 1000, '3dpw': 1000} # Multiplied to the mpjpe loss to convert them to +# millimeters +TRAIN_K, VALID_K, TEST_K = 'train', 'validation', 'test' diff --git a/epistemic_uncertainty/utils/evaluation_util.py b/epistemic_uncertainty/utils/evaluation_util.py new file mode 100644 index 0000000..367a620 --- /dev/null +++ b/epistemic_uncertainty/utils/evaluation_util.py @@ -0,0 +1,27 @@ +import numpy as np +from torch.utils.data import DataLoader +from ..model.dc.deep_clustering import DCModel + + +def get_clustering_by_label(dc_model: DCModel, train_data_loader: DataLoader, label, dev='cuda') -> np.ndarray: + """ + Calculates the number of assignee + :param dc_model: + :param train_data_loader: + :param label: real/fake + :param dev: + :return: List of the clusters and the population assigned to them + """ + clusters_assignments = [] + for itr, data_array in enumerate(train_data_loader): + data, actions, labels, _ = data_array + labels = np.array(labels) + indices = np.argwhere(labels == label) + data = data[indices, ...].squeeze().to(dev) + if len(data.shape) == 2: + data = data.unsqueeze(0) + if len(indices) > 0: + pred = list(dc_model.predict(data).cpu().detach().numpy()) + clusters_assignments = clusters_assignments.__add__(pred) + clusters_assignments = np.array(clusters_assignments) + return clusters_assignments diff --git a/epistemic_uncertainty/utils/functions.py b/epistemic_uncertainty/utils/functions.py new file mode 100644 index 0000000..2428522 --- /dev/null +++ b/epistemic_uncertainty/utils/functions.py @@ -0,0 +1,97 @@ +import numpy as np +import torch as torch + +from ..model.sts_gcn.utils import forward_kinematics +from .dataset_utils import JOINTS_TO_IGNORE, JOINTS_EQUAL, JOINTS_TO_INCLUDE + +device = 'cuda' if torch.cuda.is_available() else 'cpu' + + +def scale(x, scale_ratio): + """ + Scales pose data with an arbitrary ratio + :param x: + :param scale_ratio: + :return: + """ + x = x * scale_ratio + return x + + +def rescale_to_original_joints_count(sequence, ground_truth, dataset_name: str): + """ + Adds ignored joints and reconstructs a pose sequence with 32x3 joints for H36M (22 * 3 for AMASS and 3DPW) + :param sequence: + :param ground_truth: + :param dataset_name: + :return: + """ + all_joints_seq = ground_truth.clone() + all_joints_seq[:, :, JOINTS_TO_INCLUDE[dataset_name]] = sequence + if dataset_name in JOINTS_TO_IGNORE: + all_joints_seq[:, :, JOINTS_TO_IGNORE[dataset_name]] = all_joints_seq[:, :, JOINTS_EQUAL[dataset_name]] + return all_joints_seq + + +def readCSVasFloat(filename): + """ + Borrowed from SRNN code. Reads a csv and returns a float matrix. + https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 + + Args + filename: string. Path to the csv file + Returns + returnArray: the read data in a float32 matrix + """ + returnArray = [] + lines = open(filename).readlines() + for line in lines: + line = line.strip().split(',') + if len(line) > 0: + returnArray.append(np.array([np.float32(x) for x in line])) + + returnArray = np.array(returnArray) + return returnArray + + +def expmap2xyz_torch(expmap): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() + xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) + return xyz + + +def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 128): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 diff --git a/epistemic_uncertainty/utils/prediction_util.py b/epistemic_uncertainty/utils/prediction_util.py new file mode 100644 index 0000000..9fd707d --- /dev/null +++ b/epistemic_uncertainty/utils/prediction_util.py @@ -0,0 +1,82 @@ +import torch +from torch.utils.data import DataLoader + +from .dataset_utils import DIM, JOINTS_TO_INCLUDE, INCLUDED_JOINTS_COUNT +from .functions import rescale_to_original_joints_count +from ..model.sts_gcn.sts_gcn import STSGCN +from ..model.pgbig.stage_4 import MultiStageModel +from ..model.zerovel.zerovel import Zerovel + +IDX, INP_K, OUT_K, GT_K = 'index', 'inputs', 'outputs', 'ground_truths' +RJ, NRJ, ABS, DIFF, RT, CMP, ABS_ROC, DIFF_ROC, CLS_MAP = 'Rejected', 'Non-rejected', 'Uncertainty', 'Self-uncertainty', 'Rate', 'Comparing', 'Uncertainty AUROC', 'Self-uncertainty AUROC', 'Clusters transition map' +PRED_MODELS = {'zerovel': Zerovel, 'sts': STSGCN, 'pgbig': MultiStageModel} +PRED_MODELS_ARGS = { + 'sts': {'input_channels': DIM, 'input_time_frame': 10, 'output_time_frame': 25, 'st_gcnn_dropout': 0.1, + 'joints_to_consider': 22, 'n_txcnn_layers': 4, 'txc_kernel_size': [DIM, DIM], 'txc_dropout': 0.0}, + 'pgbig': {'in_features': 66, 'num_stages': 12, 'd_model': 16, 'kernel_size': 10, 'drop_out': 0.3, 'input_n': 10, 'output_n': 25, 'dct_n': 35, 'cuda_idx': 0} + } + + +def get_prediction_model_dict(model, data_loader: DataLoader, input_n: int, output_n: int, dataset_name: str, vel=False, + dev='cuda', dropout=False) -> dict: + prediction_dict = {INP_K: [], OUT_K: [], GT_K: []} + if dropout: + enable_dropout(model) + for _, data_arr in enumerate(data_loader): + pose = data_arr[0].to(dev) + B = pose.shape[0] + inp_seq = pose[:, :output_n, :] + if len(inp_seq) == 1: + inp_seq = inp_seq.unsqueeze(0) + inp = pose[:, output_n - input_n:output_n, JOINTS_TO_INCLUDE[dataset_name]]. \ + view(B, input_n, INCLUDED_JOINTS_COUNT[dataset_name] // DIM, DIM).permute(0, 3, 1, 2) + gt = pose[:, output_n:, :] + print(f'GT: {gt.shape}') + if len(gt) == 1: + gt = gt.unsqueeze(0) + with torch.no_grad(): + out = model(inp).permute(0, 1, 3, 2).contiguous().view(-1, output_n, INCLUDED_JOINTS_COUNT[dataset_name]) + out = rescale_to_original_joints_count(out, gt, dataset_name) + print(out.shape) +# prediction_dict['outputs_orig'].append(out) + if vel: + inp_seq = inp_seq[:, 1:, :] - inp_seq[:, :-1, :] + out = out[:, 1:, :] - out[:, :-1, :] +# gt = gt[:, 1:, :] - gt[:, :-1, :] + prediction_dict[GT_K].append(gt) + prediction_dict[OUT_K].append(out) + prediction_dict[INP_K].append(inp_seq) + prediction_dict[GT_K] = torch.concat(prediction_dict[GT_K], dim=0) + prediction_dict[OUT_K] = torch.concat(prediction_dict[OUT_K], dim=0) + prediction_dict[INP_K] = torch.concat(prediction_dict[INP_K], dim=0) +# prediction_dict['outputs_orig'] = torch.concat(prediction_dict['outputs_orig'], dim=0) + return prediction_dict + +def get_dataloader_dict(data_loader: DataLoader, input_n: int, output_n: int, dataset_name: str, vel=False, dev='cuda') -> dict: + prediction_dict = {GT_K: [], INP_K: []} + for _, data_arr in enumerate(data_loader): + pose = data_arr[0].to(dev) + inp_seq = pose[:, :output_n, :] + if len(inp_seq) == 1: + inp_seq = inp_seq.unsqueeze(0) + B = pose.shape[0] + if pose.shape[1] > output_n: + gt = pose[:, output_n:, :] + else: + gt = pose + if len(gt) == 1: + gt = gt.unsqueeze(0) + if vel: + inp_seq = inp_seq[:, 1:, :] - inp_seq[:, :-1, :] + gt = gt[:, 1:, :] - gt[:, :-1, :] + prediction_dict[GT_K].append(gt) + prediction_dict[INP_K].append(inp_seq) + prediction_dict[GT_K] = torch.concat(prediction_dict[GT_K], dim=0) + prediction_dict[INP_K] = torch.concat(prediction_dict[INP_K], dim=0) + return prediction_dict + +def enable_dropout(m): + for each_module in m.modules(): + if each_module.__class__.__name__.startswith('Dropout'): + print('Dropout enabled.') + each_module.train() \ No newline at end of file diff --git a/epistemic_uncertainty/utils/smpl_skeleton.npz b/epistemic_uncertainty/utils/smpl_skeleton.npz new file mode 100644 index 0000000..0ef8692 Binary files /dev/null and b/epistemic_uncertainty/utils/smpl_skeleton.npz differ diff --git a/epistemic_uncertainty/utils/train_utils.py b/epistemic_uncertainty/utils/train_utils.py new file mode 100644 index 0000000..19f5264 --- /dev/null +++ b/epistemic_uncertainty/utils/train_utils.py @@ -0,0 +1,51 @@ +from datetime import datetime +import torch as torch + + +def calc_reg_loss(model, reg_type='l2', avg=True): + reg_loss = None + parameters = list(param for name, param in model.named_parameters() if 'bias' not in name) + num_params = len(parameters) + if reg_type.lower() == 'l2': + for param in parameters: + if reg_loss is None: + reg_loss = 0.5 * torch.sum(param ** 2) + else: + reg_loss = reg_loss + 0.5 * param.norm(2) ** 2 + + if avg: + reg_loss /= num_params + return reg_loss + else: + return torch.tensor(0.0, device=model.device) + + +def adjust_lr(optimizer, epoch, lr=None, lr_decay=None, scheduler=None): + if scheduler is not None: + scheduler.step() + new_lr = scheduler.get_lr()[0] + elif (lr is not None) and (lr_decay is not None): + new_lr = lr * (lr_decay ** epoch) + for param_group in optimizer.param_groups: + param_group['lr'] = new_lr + else: + raise ValueError('Missing parameters for LR adjustment') + return new_lr + + +def save_model(model, output_path: str, best=False): + now_str = datetime.now().strftime("%Y_%m_%d_%H_%M") + if best: + path = f'./{output_path}/dc_best_{now_str}.pt' + print(f'The best model saved at {path}') + else: + path = f'./{output_path}/dc_{now_str}.pt' + print(f'The model saved at {path}') + torch.save(model.state_dict(), path) + + +def save_model_results_dict(model_dict, pred_model_name: str, dataset_name: str): + now_str = datetime.now().strftime("%Y_%m_%d_%H_%M") + path = f'./{pred_model_name}_{dataset_name}_{now_str}_test_results.pt' + print(f'The prediction model results on test set saved at {path}') + torch.save(model_dict, path) diff --git a/epistemic_uncertainty/utils/uncertainty.py b/epistemic_uncertainty/utils/uncertainty.py new file mode 100644 index 0000000..c89cd6d --- /dev/null +++ b/epistemic_uncertainty/utils/uncertainty.py @@ -0,0 +1,20 @@ +from .functions import * +from .dataset_utils import JOINTS_TO_INCLUDE, SCALE_RATIO + +LOSS_K, UNC_K = 'loss', 'uncertainty' + + +def entropy_uncertainty(p, as_list=False): + entropy = - p * torch.log2(p) + out = torch.nansum(entropy, dim=1) + if as_list: + return out + return torch.mean(out) + + +def calculate_pose_uncertainty(prediction_pose, dc_model, dataset_name: str): + uncertainties = 0 + with torch.no_grad(): + p, _ = dc_model(scale(prediction_pose[:, :, JOINTS_TO_INCLUDE[dataset_name]], SCALE_RATIO[dataset_name])) + uncertainties += entropy_uncertainty(p) + return uncertainties diff --git a/factory/__init__.py b/factory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/factory/epistemic_uncertainty_evaluator.py b/factory/epistemic_uncertainty_evaluator.py new file mode 100644 index 0000000..4d4a2a9 --- /dev/null +++ b/factory/epistemic_uncertainty_evaluator.py @@ -0,0 +1,72 @@ +import logging +import time + +import torch +import numpy as np +import os +from tqdm import tqdm + +from utils.others import dict_to_device +from epistemic_uncertainty.utils.uncertainty import calculate_pose_uncertainty + +logger = logging.getLogger(__name__) + + +class UncertaintyEvaluator: + def __init__(self, args, dataloader, model, uncertainty_model, reporter): + self.args = args + self.dataloader = dataloader + self.model = model.to(args.device) + self.uncertainty_model = uncertainty_model + self.reporter = reporter + self.rounds_num = args.rounds_num + self.device = args.device + + def evaluate(self): + logger.info('Epistemic uncertainty evaluation started.') + self.model.eval() + for i in range(self.rounds_num): + logger.info('round ' + str(i + 1) + '/' + str(self.rounds_num)) + self.__evaluate() + self.reporter.print_pretty_metrics(logger, ['UNCERTAINTY']) + self.reporter.save_csv_metrics(['UNCERTAINTY'], os.path.join(self.args.save_dir, "uncertainty_eval.csv")) + logger.info("Epistemic uncertainty evaluation has been completed.") + + def __evaluate(self): + self.reporter.start_time = time.time() + pose_key = None + for data in tqdm(self.dataloader): + actions = set(data['action']) if 'action' in data.keys() else set() + actions.add("all") + if pose_key is None: + pose_key = [k for k in data.keys() if "pose" in k][0] + batch_size = data[pose_key].shape[0] + with torch.no_grad(): + # calculate uncertainty + model_outputs = self.model(dict_to_device(data, self.device)) + assert 'pred_pose' in model_outputs.keys(), 'outputs of model should include pred_pose' + + # calculate pose_metrics + report_attrs = {} + dynamic_counts = {} + + pred_metric_pose = model_outputs['pred_pose'] + + future_metric_pose = data['future_pose'] + + for action in actions: + if action == "all": + metric_value = calculate_pose_uncertainty(pred_metric_pose.to(self.device), + self.uncertainty_model, + self.args.dataset_name) + else: + indexes = np.where(np.asarray(data['action']) == action)[0] + metric_value = calculate_pose_uncertainty(pred_metric_pose.to(self.device)[indexes], + self.uncertainty_model, + self.args.dataset_name) + dynamic_counts[f'UNCERTAINTY_{action}'] = len(indexes) + report_attrs[f'UNCERTAINTY_{action}'] = metric_value + + self.reporter.update(report_attrs, batch_size, True, dynamic_counts) + + self.reporter.epoch_finished() diff --git a/factory/epistemic_uncertainty_trainer.py b/factory/epistemic_uncertainty_trainer.py new file mode 100644 index 0000000..01f2d40 --- /dev/null +++ b/factory/epistemic_uncertainty_trainer.py @@ -0,0 +1,49 @@ +import logging +import time +import torch +from torch.utils.tensorboard import SummaryWriter +from epistemic_uncertainty.model.dc.deep_clustering import DCModel +from epistemic_uncertainty.model.dc.train_dc import cluster, train_dc_model +from epistemic_uncertainty.model.lstm.lstm import LstmAutoEncoder, EncoderWrapper +from epistemic_uncertainty.model.lstm.train_lstm import train_lstm_model +from epistemic_uncertainty.utils.dataset_utils import INCLUDED_JOINTS_COUNT +from epistemic_uncertainty.utils.train_utils import save_model + +torch.autograd.set_detect_anomaly(True) +logger = logging.getLogger(__name__) + + +class UncertaintyTrainer: + def __init__(self, args, train_dataset, train_dataloader, valid_dataloader): + self.args = args + self.train_dataloader = train_dataloader + self.valid_dataloader = valid_dataloader + self.train_dataset = train_dataset + + def train(self): + logger.info("Training started.") + time0 = time.time() + self.__train_dc_model() + logger.info("-" * 100) + logger.info('Training is completed in %.2f seconds.' % (time.time() - time0)) + + def __train_dc_model(self): + dev = self.args.device + n_clusters = self.args.n_clusters + exp_name = self.args.experiment_name + lstm_ae = LstmAutoEncoder(pose_dim=INCLUDED_JOINTS_COUNT[self.args.dataset_name], dev=dev) + train_lstm_model(self.args.lstm, lstm_ae, self.train_dataloader, self.valid_dataloader, dev=dev) + torch.save(lstm_ae, f'lstm_model_{exp_name}.pt') + lstm_ae.eval() + lstm_ae.to(dev) + encoder = EncoderWrapper(lstm_ae).to(dev) + initial_clusters = cluster(self.train_dataset, encoder, n_clusters, dev) + dc_model = DCModel(lstm_ae=lstm_ae, n_clusters=n_clusters, + initial_clusters=initial_clusters, + device=dev) + best_model = train_dc_model(self.args.dc, dc_model, self.train_dataset, self.args.batch_size, + num_workers=self.args.num_workers, + dev=dev) + save_model(dc_model, f'dc_model_{exp_name}') + save_model(best_model, f'dc_model_best_{exp_name}', best=True) + return dc_model diff --git a/factory/evaluator.py b/factory/evaluator.py new file mode 100644 index 0000000..e853410 --- /dev/null +++ b/factory/evaluator.py @@ -0,0 +1,86 @@ +import logging +import time + +import torch +import numpy as np +import os +from tqdm import tqdm + +from metrics import POSE_METRICS +from utils.others import dict_to_device + +logger = logging.getLogger(__name__) + + +class Evaluator: + # evaluator = Evaluator(cfg, eval_dataloader, model, loss_module, eval_reporter) + def __init__(self, args, dataloader, model, loss_module, reporter): + self.args = args + self.dataloader = dataloader + self.model = model.to(args.device) + self.loss_module = loss_module.to(args.device) + self.reporter = reporter + self.pose_metrics = args.pose_metrics + self.rounds_num = args.rounds_num + self.device = args.device + + + def evaluate(self): + logger.info('Evaluation started.') + self.model.eval() + # self.loss_module.eval() + for i in range(self.rounds_num): + logger.info('round ' + str(i + 1) + '/' + str(self.rounds_num)) + self.__evaluate() + self.reporter.print_pretty_metrics(logger, self.pose_metrics) + self.reporter.save_csv_metrics(self.pose_metrics, os.path.join(self.args.save_dir,"eval.csv")) + logger.info("Evaluation has been completed.") + + def __evaluate(self): + self.reporter.start_time = time.time() + pose_key = None + for data in tqdm(self.dataloader): + actions = set(data['action']) if 'action' in data.keys() else set() + actions.add("all") + # TODO + if pose_key is None: + pose_key = [k for k in data.keys() if "pose" in k][0] + batch_size = data[pose_key].shape[0] + with torch.no_grad(): + # predict & calculate loss + model_outputs = self.model(dict_to_device(data, self.device)) + loss_outputs = self.loss_module(model_outputs, dict_to_device(data, self.device)) + assert 'pred_pose' in model_outputs.keys(), 'outputs of model should include pred_pose' + + # calculate pose_metrics + report_attrs = loss_outputs + dynamic_counts = {} + for metric_name in self.pose_metrics: + metric_func = POSE_METRICS[metric_name] + + pred_metric_pose = model_outputs['pred_pose'] + if 'pred_metric_pose' in model_outputs: + pred_metric_pose = model_outputs['pred_metric_pose'] + + # TODO: write write a warning =D + + future_metric_pose = data['future_pose'] + if 'future_metric_pose' in data: + future_metric_pose = data['future_metric_pose'] + + for action in actions: + if action == "all": + metric_value = metric_func(pred_metric_pose.to(self.device), + future_metric_pose.to(self.device), + self.model.args.keypoint_dim) + else: + indexes = np.where(np.asarray(data['action']) == action)[0] + metric_value = metric_func(pred_metric_pose.to(self.device)[indexes], + future_metric_pose.to(self.device)[indexes], + self.model.args.keypoint_dim) + dynamic_counts[f'{metric_name}_{action}']=len(indexes) + report_attrs[f'{metric_name}_{action}'] = metric_value + + self.reporter.update(report_attrs, batch_size, True, dynamic_counts) + + self.reporter.epoch_finished() diff --git a/factory/output_generator.py b/factory/output_generator.py new file mode 100644 index 0000000..18d4b1f --- /dev/null +++ b/factory/output_generator.py @@ -0,0 +1,50 @@ +import logging +import time +import pandas as pd +import torch + +from utils.save_load import save_test_results +from utils.others import dict_to_device + +logger = logging.getLogger(__name__) + + +class Output_Generator: + def __init__(self, model, dataloader, save_dir, device): + self.model = model.to(device) + self.dataloader = dataloader + self.save_dir = save_dir + self.device = device + + self.result = pd.DataFrame() + self.pred_pose = torch.Tensor().to(device) + + def generate(self): + logger.info("Generating outputs started.") + self.model.eval() + time0 = time.time() + self.__generate() + save_test_results(self.result, [self.pred_pose], self.save_dir) + logger.info('Generating outputs is completed in: %.2f' % (time.time() - time0)) + + def __generate(self): + for data in self.dataloader: + with torch.no_grad(): + # predict & calculate loss + model_outputs = self.model(dict_to_device(data, self.device)) + assert 'pred_pose' in model_outputs.keys(), 'outputs of model should include pred_pose' + pred_pose = model_outputs['pred_pose'] + + self.store_results(pred_pose) + + def store_results(self, pred_pose): + # update tensors + self.pred_pose = torch.cat((self.pred_pose, pred_pose), 0) + + # to cpu + if self.device == 'cuda': + pred_pose = pred_pose.detach().cpu() + # update dataframe + for i in range(pred_pose.shape[0]): + single_data = {'pred_pose': str(pred_pose[i].numpy().tolist())} + self.result = self.result.append(single_data, ignore_index=True) diff --git a/factory/trainer.py b/factory/trainer.py new file mode 100644 index 0000000..ab6a4b5 --- /dev/null +++ b/factory/trainer.py @@ -0,0 +1,225 @@ +import logging +import time +import torch +import gc +from torch.utils.tensorboard import SummaryWriter +import numpy as np +from tqdm import tqdm +from path_definition import * +from metrics import POSE_METRICS +from utils.others import dict_to_device +from utils.reporter import Reporter +from utils.save_load import save_snapshot +torch.autograd.set_detect_anomaly(True) +logger = logging.getLogger(__name__) +import mlflow +import mlflow.pytorch +from path_definition import * +from os.path import join + + +class Trainer: + def __init__(self, args, train_dataloader, valid_dataloader, model, loss_module, optimizer, optimizer_args, + scheduler, train_reporter, valid_reporter, train_uncertainty_evaluator=None, validation_uncertainty_evaluator=None): + self.args = args + self.train_dataloader = train_dataloader + self.valid_dataloader = valid_dataloader + self.model = model.to(args.device) + self.loss_module = loss_module.to(args.device) + self.optimizer = optimizer + self.optimizer_args = optimizer_args + self.scheduler = scheduler + self.train_reporter = train_reporter + self.valid_reporter = valid_reporter + self.tensor_board = SummaryWriter(args.save_dir) + self.use_validation = False if valid_dataloader is None else True + # self.uncertainty_evaluator = uncertainty_evaluator + + self.train_uncertainty_evaluator = train_uncertainty_evaluator + self.validation_uncertainty_evaluator = validation_uncertainty_evaluator + + mlflow.set_tracking_uri(join(args.mlflow_tracking_uri, 'mlruns') if args.mlflow_tracking_uri else join(ROOT_DIR, 'mlruns')) + mlflow.set_experiment(args.experiment_name if args.experiment_name else args.model.type) + + self.run = mlflow.start_run() + + config_path = os.path.join(os.getcwd(), '.hydra', 'config.yaml') + mlflow.log_artifact(config_path) + + params = { + 'model': args.model.type, + **dict(args.model), + 'optimizer': args.optimizer.type, + **dict(args.optimizer), + 'loss': args.model.loss.type, + **dict(args.model.loss), + 'scheduler': args.scheduler.type, + **dict(args.scheduler), + 'obs_frames_num': args.obs_frames_num, + 'pred_frames_num': args.pred_frames_num, + 'tag': args.experiment_tag, + **dict(args.data), + 'save_dir': args.save_dir + } + del params['type'] + + mlflow.log_params(params) + + def train(self): + logger.info("Training started.") + time0 = time.time() + self.best_loss = np.inf + self.best_epoch = -1 + for epoch in range(self.args.start_epoch, self.args.epochs): + print("epoch:", epoch) #new + self.__train() + if self.use_validation: + self.__validate() + self.scheduler.step(self.valid_reporter.history['loss'][-1]) + + if self.best_model: + save_snapshot(self.model, self.loss_module, self.optimizer, self.optimizer_args, epoch + 1, + self.train_reporter, + self.valid_reporter, self.args.save_dir, best_model=True) + self.best_model = False + if self.validation_uncertainty_evaluator is not None: + self.__validate_uncertainty(train=False) + + if (epoch + 1) % self.args.snapshot_interval == 0 or (epoch + 1) == self.args.epochs: + save_snapshot(self.model, self.loss_module, self.optimizer, self.optimizer_args, epoch + 1, + self.train_reporter, + self.valid_reporter, self.args.save_dir) + self.train_reporter.save_data(self.args.save_dir) + if self.use_validation: + self.valid_reporter.save_data(self.args.save_dir) + Reporter.save_plots(self.args.save_dir, self.train_reporter.history, + self.valid_reporter.history, self.use_validation) + # if self.use_validation and + self.tensor_board.close() + mlflow.end_run() + logger.info("-" * 100) + logger.info('Training is completed in %.2f seconds.' % (time.time() - time0)) + + def __train(self): + self.model.train() + self.train_reporter.start_time = time.time() + pose_key = None + for data in tqdm(self.train_dataloader): + gc.collect() + # TODO: fix later + batch_size = data['observed_pose'].shape[0] + data = dict_to_device(data, self.args.device) + # predict & calculate loss + self.model.zero_grad() + self.loss_module.zero_grad() + + model_outputs = self.model(data) + loss_outputs = self.loss_module(model_outputs, data) + + assert 'pred_pose' in model_outputs.keys(), 'outputs of model should include pred_pose' + assert 'loss' in loss_outputs.keys(), 'outputs of loss should include loss' + + # backpropagate and optimize + + loss = loss_outputs['loss'] + loss.backward() + + # print(model.para) + + if self.args.optimizer.type == 'sam': + self.optimizer.first_step(zero_grad=True) + + model_outputs = self.model(data) + loss_outputs = self.loss_module(model_outputs, data) + loss = loss_outputs['loss'] + loss.backward() + self.optimizer.second_step(zero_grad=True) + + else: + self.optimizer.step() + + loss_outputs['loss'] = loss_outputs['loss'].detach().item() + + # calculate pose_metrics + + report_attrs = loss_outputs + for metric_name in self.args.pose_metrics: + metric_func = POSE_METRICS[metric_name] + + pred_metric_pose = model_outputs['pred_pose'] + if 'pred_metric_pose' in model_outputs: + pred_metric_pose = model_outputs['pred_metric_pose'] + + # TODO: write write a warning =D + + future_metric_pose = data['future_pose'] + if 'future_metric_pose' in data: + future_metric_pose = data['future_metric_pose'] + metric_value = metric_func( + pred_metric_pose.to(self.args.device), + future_metric_pose.to(self.args.device), + self.model.args.keypoint_dim + ) + + report_attrs[metric_name] = metric_value.detach().item() + + self.train_reporter.update(report_attrs, batch_size) + + self.train_reporter.epoch_finished(self.tensor_board, mlflow) + self.train_reporter.print_values(logger) + # self.train_reporter.print_values(logger, self.model.args.use_mask) + if self.train_uncertainty_evaluator is not None: + self.__validate_uncertainty(train=True) + + def __validate(self): + self.model.eval() + self.loss_module.eval() + + self.valid_reporter.start_time = time.time() + pose_key = None + epoch_loss = 0.0 + for data in tqdm(self.valid_dataloader): + data = dict_to_device(data, self.args.device) + batch_size = data['observed_pose'].shape[0] + + with torch.no_grad(): + # predict & calculate loss + model_outputs = dict_to_device(self.model(data), self.args.device) + loss_outputs = self.loss_module(model_outputs, dict_to_device(data, self.args.device)) + epoch_loss += loss_outputs['loss'].item() + + assert 'pred_pose' in model_outputs.keys(), 'outputs of model should include pred_pose' + + # calculate pose_metrics + report_attrs = loss_outputs + for metric_name in self.args.pose_metrics: + metric_func = POSE_METRICS[metric_name] + + pred_metric_pose = model_outputs['pred_pose'] + if 'pred_metric_pose' in model_outputs: + pred_metric_pose = model_outputs['pred_metric_pose'] + + future_metric_pose = data['future_pose'] + if 'future_metric_pose' in data: + future_metric_pose = data['future_metric_pose'] + metric_value = metric_func( + pred_metric_pose.to(self.args.device), + future_metric_pose.to(self.args.device), + self.model.args.keypoint_dim + ) + report_attrs[metric_name] = metric_value + + self.valid_reporter.update(report_attrs, batch_size) + + if epoch_loss < self.best_loss: + self.best_model = True + self.best_loss = epoch_loss + + self.valid_reporter.epoch_finished(self.tensor_board, mlflow) + self.valid_reporter.print_values(logger) + + def __validate_uncertainty(self, train=True): + if train: + self.train_uncertainty_evaluator.evaluate() + else: + self.validation_uncertainty_evaluator.evaluate() diff --git a/losses/__init__.py b/losses/__init__.py new file mode 100644 index 0000000..cdb7dfc --- /dev/null +++ b/losses/__init__.py @@ -0,0 +1,22 @@ +from .mse_vel import MSEVel +from .mse_pose import MSEPose +from .mae_vel import MAEVel +from .derpof_loss import DeRPoFLoss +from .his_rep_itself_loss import HisRepItselfLoss +from .mpjpe import MPJPE +from .msr_gcn_loss import MSRGCNLoss +from .potr_loss import POTRLoss +from .pua_loss import PUALoss +from .pgbig_loss import PGBIG_PUALoss + +LOSSES = {'mse_vel': MSEVel, + 'mse_pose': MSEPose, + 'mae_vel': MAEVel, + 'derpof': DeRPoFLoss, + 'his_rep_itself': HisRepItselfLoss, + 'mpjpe': MPJPE, + 'msr_gcn':MSRGCNLoss, + 'potr': POTRLoss, + 'pua_loss': PUALoss, + 'pgbig_loss': PGBIG_PUALoss + } diff --git a/losses/derpof_loss.py b/losses/derpof_loss.py new file mode 100644 index 0000000..4b592a8 --- /dev/null +++ b/losses/derpof_loss.py @@ -0,0 +1,44 @@ +import torch +import torch.nn as nn + + +class DeRPoFLoss(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + self.mse = nn.MSELoss() + self.bce = nn.BCELoss() + + def forward(self, model_outputs, input_data): + observed_pose = input_data['observed_pose'] + future_pose = input_data['future_pose'] + future_vel = torch.cat(((future_pose[..., 0, :] - observed_pose[..., -1, :]).unsqueeze(-2), + future_pose[..., 1:, :] - future_pose[..., :-1, :]), -2) + bs, frames_num, features = future_vel.shape + + # global velocity + future_vel_global = 0.5 * ( + future_vel.view(bs, frames_num, features // 3, 3)[:, :, 0] + future_vel.view( + bs, frames_num, features // 3, 3)[:, :, 1]).reshape(frames_num, bs, 1, 3) + # local velocity + + future_vel_local = ( + future_vel.view(frames_num, bs, features // 3, 3) - future_vel_global) + + loss_global = self.mse(future_vel_global, model_outputs['pred_vel_global']) + loss_local = vae_loss_function(future_vel_local, model_outputs['pred_vel_local'], model_outputs['mean'], + model_outputs['log_var']) + loss = loss_global + self.args.local_loss_weight * loss_local + + outputs = {'loss': loss, 'global_loss': loss_global, 'local_loss': loss_local} + + return outputs + + +def vae_loss_function(x, x_hat, mean, log_var): + assert x_hat.shape == x.shape + reconstruction_loss = torch.mean(torch.norm(x - x_hat, dim=len(x.shape) - 1)) + KLD = - 0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp()) + return reconstruction_loss + 0.01 * KLD diff --git a/losses/his_rep_itself_loss.py b/losses/his_rep_itself_loss.py new file mode 100644 index 0000000..363929b --- /dev/null +++ b/losses/his_rep_itself_loss.py @@ -0,0 +1,239 @@ +import numpy as np +import torch +import torch.nn as nn +from utils.others import sig5, sigstar, polyx +import re + +class HisRepItselfLoss(nn.Module): + def __init__(self, args): + super().__init__() + + self.args = args + self.output_n = args.output_n + self.input_n = args.input_n + self.itera = args.itera + self.seq_in = args.kernel_size + self.device = args.device + self.mode = args.un_mode + self.modality = args.modality + + assert args.un_mode in \ + [ + 'default', 'ATJ', 'TJ', 'AJ', 'AT', 'A', 'T', 'J', + 'sig5-T', 'sig5-TJ', + 'sig5s-T', 'sig5s-TJ', + 'sigstar-T', 'sigstar-TJ', + 'sig5r-TJ', + 'sig5shifted-T', + 'input_rel', + 'sig5-TJPrior', + 'sig5-TJPriorSum', + 'estimate-mean', + ] or bool(re.findall(r'^poly-TJ*-\d+$', args.un_mode)) + + self.dim = 3 + if self.modality == "Human36": + self.dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + elif self.modality == "AMASS": + pass + else: + assert False, "Unknown modality" + + self.connect = [ + (8, 9), (9, 10), (10, 11), + (9, 17), (17, 18), (18, 19), (19, 20), (20, 21), + (9, 12), (12, 13), (13, 14), (14, 15), (15, 16), + (0, 1), (1, 2), (2, 3), + (4, 5), (5, 6), (6, 7) + ] + self.S = np.array([c[0] for c in self.connect]) + self.E = np.array([c[1] for c in self.connect]) + + self.sample_rate = 2 + # joints at same loc + self.joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + self.index_to_ignore = np.concatenate( + (self.joint_to_ignore * 3, self.joint_to_ignore * 3 + 1, self.joint_to_ignore * 3 + 2)) + self.joint_equal = np.array([13, 19, 22, 13, 27, 30]) + self.index_to_equal = np.concatenate((self.joint_equal * 3, self.joint_equal * 3 + 1, self.joint_equal * 3 + 2)) + + self.action_dict = { + "walking": 0, + "eating": 1, + "smoking": 2, + "discussion": 3, + "directions": 4, + "greeting": 5, + "phoning": 6, + "posing": 7, + "purchases": 8, + "sitting": 9, + "sittingdown": 10, + "takingphoto": 11, + "waiting": 12, + "walkingdog": 13, + "walkingtogether": 14 + } + + def un_loss(self, pred, gt, params, actions=None, mode='ATJ', pred_disp=1): + # pred, gt: B, T, J, D + # params: A, T, J ---- 16, 25, 22 + B, T, J, D = pred.shape + + if mode == 'input_rel': + return torch.mean(torch.norm((pred - gt)*params, dim=-1)) + + losses = torch.norm(pred - gt, dim=3) # B, T, J + frames_num = torch.arange(T).to(self.device) + + joints_num = torch.tensor([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]).to(self.device) + if mode == 'ATJ': + s = params[actions] # B, T, J + elif mode == 'AT': + s = params[actions][:, :, 0].unsqueeze(-1) # B, T, 1 + elif mode == 'AJ': + s = params[actions][:, 0, :].unsqueeze(1) # B, 1, J + elif mode == 'TJ': + s = params[0].unsqueeze(0) # 1, T, J + elif mode == 'A': + s = params[actions][:, 0, 0].reshape(B, 1, 1) + elif mode == 'T': + s = params[0, :, 0].reshape(1, T, 1) + elif mode == 'J': + s = params[0, 0, :].reshape(1, 1, J) + elif 'poly-TJ-' in mode: + p = params + x = p.shape[-1] - 1 + s = polyx(p, torch.arange(0.5, 0.85, 0.01).to(self.device), x) # J, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, J + elif 'poly-T-' in mode: + p = params[0,:] + x = torch.tensor(p.shape[-1] - 1).to(self.device) + s = polyx(p, torch.arange(0.5, 0.85, 0.01).to(self.device), x) # 1, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, 1 + elif mode == 'sig5-T': + # params: J, 5 + s = sig5(params[0, :], frames_num) # 1, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, 1 + elif mode == 'sig5-TJ': + # params: J, 5 + s = sig5(params, frames_num) # J, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, J + + elif mode == 'sig5s-T': + s = sig5(params[0, :]**2, frames_num) # 1, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, 1 + + elif mode == 'sig5s-TJ': + s = sig5(params**2, frames_num) # J, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, J + + elif mode == 'sig5shifted-T': + s = sig5(params + 1.5, frames_num) # J, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, J + + elif mode == 'sig5r-TJ': + s = sig5(torch.relu(params) + 0.1, frames_num) # J, T + s = s.permute(1, 0).unsqueeze(0) # 1, T, J + + elif mode == 'sigstar-T': + params = params[0, :].unsqueeze(0) # 1, 2 + params = torch.cat([params, torch.ones(1, 1).to(self.device)], dim=-1) # 1, 3 + s = sigstar(params, frames_num) # 1, T + s = s.permute(1, 0).unsqueeze(0) + + elif mode == 'sigstar-TJ': + # params : J, 2 + params = torch.cat([params, torch.ones(J, 1).to(self.device)], dim=-1) # J, 3 + s = sigstar(params, frames_num) # J, T + s = s.permute(1, 0).unsqueeze(0) + elif mode == 'sig5-TJPrior': + st = sig5(params[0], frames_num) # 1, T + st = st.unsqueeze(-1) # 1, T, 1 + + sj = sig5(params[1], joints_num) # 1, J + sj = sj.unsqueeze(1) # 1, 1, J + + s = st + sj # 1, T, J + elif mode == 'sig5-TJPriorSum': + st = sig5(params, frames_num) # J, T + st = st.permute(1, 0).unsqueeze(0) # 1, T, J + + s = torch.zeros((1, T, J)).to(self.device) + s[:, :, [0, 4, 8]] = st[:, :, [0, 4, 8]] + s[:, :, self.E] = st[:, :, self.E] + for c in self.connect: + s[:, :, c[1]] = s[:, :, c[0]] + s[:, :, c[1]] + elif mode == 'estimate-mean': + s = torch.mean(losses,dim=0).unsqueeze(0) + s = torch.log(s).detach() + else: + raise Exception('The defined uncertainry mode is not supported.') + + + loss = 1 / torch.exp(s) * losses + s + + loss = torch.mean(loss) + + return loss + + def forward(self, model_outputs, input_data): + seq1 = torch.cat((input_data['observed_pose'], input_data['future_pose']), dim=1) # B, T, J*D + p3d_h36 = seq1.reshape(seq1.shape[0], seq1.shape[1], -1) + batch_size, seq_n, joints = p3d_h36.shape + p3d_h36 = p3d_h36.float().to(self.device) + if self.modality == "Human36": + p3d_sup = p3d_h36.clone()[:, :, self.dim_used][:, -self.output_n - self.seq_in:].reshape( + [-1, self.seq_in + self.output_n, len(self.dim_used)//3, 3]) + elif self.modality == "AMASS": + p3d_sup = p3d_h36.clone()[:, -self.output_n - self.seq_in:].reshape( + [-1, self.seq_in + self.output_n, joints // 3, 3]) + p3d_src = p3d_h36.clone() + p3d_out_all = model_outputs['pred_pose'] + + pred_disp = None + + if self.mode == 'default': + if self.itera == 1: + if self.modality == "Human36": + loss_p3d = torch.mean(torch.norm(p3d_out_all[:, :, 0] - p3d_sup, dim=3)) + elif self.modality == "AMASS": + loss_p3d = torch.mean(torch.norm(p3d_out_all - p3d_sup, dim=3)) + else: + loss_p3d = torch.mean(torch.norm(p3d_out_all[:, :self.seq_in+10] - p3d_sup[:, :self.seq_in+10], dim=3)) + + elif self.mode == 'input_rel': + assert self.itera == 1 + params = model_outputs['pred_un_params'][:, :, 0] + if self.modality == "Human36": + loss_p3d = self.un_loss(pred=p3d_out_all[:, :, 0], gt=p3d_sup, params=params, actions=None, mode=self.mode, pred_disp=pred_disp) + elif self.modality == "AMASS": + loss_p3d = self.un_loss(pred=p3d_out_all, gt=p3d_sup, params=params, actions=actions, mode=self.mode, pred_disp=pred_disp) + else: + if 'A' in self.mode: + actions = torch.tensor([self.action_dict[a] for a in input_data['action']]).to(self.device) + else: + actions = None + + params = model_outputs['un_params'] + if self.itera == 1: + loss_p3d = self.un_loss(pred=p3d_out_all[:, :, 0], gt=p3d_sup, params=params, actions=actions, mode=self.mode, pred_disp=pred_disp) + else: + loss_p3d = self.un_loss(pred=p3d_out_all[:, :self.seq_in+10], gt=p3d_sup[:, :self.seq_in+10], params=params, actions=actions, mode=self.mode, pred_disp=pred_disp) + + + p3d_out = model_outputs['pred_metric_pose'] + + mpjpe_p3d_h36 = torch.mean( + torch.norm(p3d_h36[:, -self.output_n:].reshape( + [-1, self.output_n, (joints // 3), 3] + ) - p3d_out.reshape( + p3d_out.shape[0], p3d_out.shape[1], joints // 3, 3), dim=3 + ) + ) + outputs = {'loss': loss_p3d, 'mpjpe': mpjpe_p3d_h36} + + return outputs \ No newline at end of file diff --git a/losses/mae_vel.py b/losses/mae_vel.py new file mode 100644 index 0000000..d82a753 --- /dev/null +++ b/losses/mae_vel.py @@ -0,0 +1,26 @@ +import torch +import torch.nn as nn + + +class MAEVel(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + self.mae = nn.L1Loss() + self.bce = nn.BCELoss() + + def forward(self, model_outputs, input_data): + observed_pose = input_data['observed_pose'] + future_pose = input_data['future_pose'] + future_vel = torch.cat(((future_pose[..., 0, :] - observed_pose[..., -1, :]).unsqueeze(-2), + future_pose[..., 1:, :] - future_pose[..., :-1, :]), -2) + vel_loss = self.mae(model_outputs['pred_vel'], future_vel) + + loss = vel_loss + outputs = {'vel_loss': vel_loss} + + outputs['loss'] = loss + + return outputs diff --git a/losses/mpjpe.py b/losses/mpjpe.py new file mode 100644 index 0000000..f63d65e --- /dev/null +++ b/losses/mpjpe.py @@ -0,0 +1,34 @@ +import numpy as np +import torch +import torch.nn as nn + +from models.sts_gcn.utils import data_utils + + +class MPJPE(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + + + def forward(self, y_pred, y_true): + + y_pred = y_pred['pred_pose'] # B,T,JC + y_true = y_true['future_pose'] # B,T,JC + + B,T,JC = y_pred.shape + assert JC % self.args.nJ == 0, "Number of joints * dim of each joint is not dividable by nJ" + J = self.args.nJ + C = JC // J + + y_pred = y_pred.view(B, T, J, C) + y_true = y_true.view(B, T, J, C) + + l = torch.norm(y_pred - y_true, dim=-1) # B,T,J + l = torch.mean(l) + + return { + 'loss' : l + } diff --git a/losses/mse_pose.py b/losses/mse_pose.py new file mode 100644 index 0000000..5d3ef42 --- /dev/null +++ b/losses/mse_pose.py @@ -0,0 +1,21 @@ +import torch.nn as nn + + +class MSEPose(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + self.mse = nn.MSELoss() + self.bce = nn.BCELoss() + + def forward(self, model_outputs, input_data): + pose_loss = self.mse(model_outputs['pred_pose'], input_data['future_pose']) + + loss = pose_loss + outputs = {'pose_loss': pose_loss} + + outputs['loss'] = loss + + return outputs diff --git a/losses/mse_vel.py b/losses/mse_vel.py new file mode 100644 index 0000000..ed1a2ce --- /dev/null +++ b/losses/mse_vel.py @@ -0,0 +1,26 @@ +import torch +import torch.nn as nn + + +class MSEVel(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + self.mse = nn.MSELoss() + self.bce = nn.BCELoss() + + def forward(self, model_outputs, input_data): + observed_pose = input_data['observed_pose'] + future_pose = input_data['future_pose'] + future_vel = torch.cat(((future_pose[..., 0, :] - observed_pose[..., -1, :]).unsqueeze(-2), + future_pose[..., 1:, :] - future_pose[..., :-1, :]), -2) + vel_loss = self.mse(model_outputs['pred_vel'], future_vel) + + loss = vel_loss + outputs = {'vel_loss': vel_loss} + + outputs['loss'] = loss + + return outputs diff --git a/losses/msr_gcn_loss.py b/losses/msr_gcn_loss.py new file mode 100644 index 0000000..5ac4503 --- /dev/null +++ b/losses/msr_gcn_loss.py @@ -0,0 +1,99 @@ +import imp +import torch +import torch.nn as nn +from metrics import ADE +import numpy as np + +from models.msr_gcn.utils import data_utils + +class Proc(nn.Module): + def __init__(self, args): + super(Proc, self).__init__() + + self.args = args + + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + self.dim_used = dimensions_to_use + + self.Index2212 = [[0], [1, 2, 3], [4], [5, 6, 7], [8, 9], [10, 11], [12], [13], [14, 15, 16], [17], [18], [19, 20, 21]] + self.Index127 = [[0, 1], [2, 3], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11]] + self.Index74 = [[0, 2], [1, 2], [3, 4], [5, 6]] + + def down(self, x, index): + N, features, seq_len = x.shape + my_data = x.reshape(N, -1, 3, seq_len) # x, 22, 3, 10 + da = torch.zeros((N, len(index), 3, seq_len)).to(x.device) # x, 12, 3, 10 + for i in range(len(index)): + da[:, i, :, :] = torch.mean(my_data[:, index[i], :, :], dim=1) + da = da.reshape(N, -1, seq_len) + return da + + def forward(self, x, preproc): + if preproc: + x32 = x.permute((0,2,1)) + x22 = x32[:, self.dim_used, :] + x12 = self.down(x22, self.Index2212) + x7 = self.down(x12, self.Index127) + x4 = self.down(x7, self.Index74) + + return { + "p32":x32, + "p22":x22, + "p12":x12, + "p7":x7, + "p4":x4 + } + else: + return x + +def L2NormLoss_train(gt, out): + ''' + ### (batch size,feature dim, seq len) + 等同于 mpjpe_error_p3d() + ''' + + batch_size, _, seq_len = gt.shape + gt = gt.view(batch_size, -1, 3, seq_len).permute(0, 3, 1, 2).contiguous() + out = out.view(batch_size, -1, 3, seq_len).permute(0, 3, 1, 2).contiguous() + loss = torch.mean(torch.norm(gt - out, 2, dim=-1)) + return loss + +class MSRGCNLoss(nn.Module): + + def __init__(self, args): + super().__init__() + self.proc = Proc(args) + self.args = args + + def forward(self, model_outputs, input_data): + gt = torch.cat([input_data['observed_pose'].clone(), input_data['future_pose'].clone()], dim=1) + output_size = gt.shape[1] + gt = gt.reshape((gt.shape[0], gt.shape[1], -1)) + gt = self.proc(gt, True) # batch_size * (66|36|21|12) * T + out = { + "p22":model_outputs["p22"], # batch_size * (66|36|21|12) * T + "p12":model_outputs["p12"], + "p7":model_outputs["p7"], + "p4":model_outputs["p4"] + } + losses = {} + for k in out.keys(): + losses[k] = 0 + # frames = [i for i in [11,13,17,19,23,34] if i < output_size] + + for k in out.keys(): + temp = out[k] + # if "22" in k: + # batch_size, _, seq_len = gt[k].shape + # for frame in frames: + # losses[frame]=torch.mean(torch.norm(gt[k].view(batch_size,-1,3,seq_len)[:,:,:,frame+10-1]- \ + # temp.view(batch_size, -1, 3, seq_len)[:,:,:,frame+10-1], 2, -1)) + losses[k] += L2NormLoss_train(gt[k], temp) + + final_loss = 0 + for k in out.keys(): + final_loss+= losses[k] + + return {'loss': final_loss} \ No newline at end of file diff --git a/losses/pgbig_loss.py b/losses/pgbig_loss.py new file mode 100644 index 0000000..4904047 --- /dev/null +++ b/losses/pgbig_loss.py @@ -0,0 +1,99 @@ + +import numpy as np +import torch +import torch.nn as nn +from .pua_loss import PUALoss +from models.pgbig.data_proc import Preprocess, Human36m_Preprocess, AMASS_3DPW_Preprocess + + +def smooth(src, sample_len, kernel_size): + """ + data:[bs, 60, 96] + """ + src_data = src[:, -sample_len:, :].clone() + smooth_data = src_data.clone() + for i in range(kernel_size, sample_len): + smooth_data[:, i] = torch.mean(src_data[:, kernel_size:i+1], dim=1) + return smooth_data + + +class PGBIG_PUALoss(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + + if args.inner_type == "PUAL": + if 'S' in args.tasks: + self.pual1 = PUALoss(args).to(args.device) + self.pual2 = PUALoss(args).to(args.device) + self.pual3 = PUALoss(args).to(args.device) + self.pual4 = PUALoss(args).to(args.device) + else: + self.pual = PUALoss(args).to(args.device) + + if args.pre_post_process == 'human3.6m': + self.preprocess = Human36m_Preprocess(args).to(args.device) + elif args.pre_post_process == 'AMASS' or args.pre_post_process == '3DPW': + self.preprocess = AMASS_3DPW_Preprocess(args).to(args.device) + else: + self.preprocess = Preprocess(args).to(args.device) + + for p in self.preprocess.parameters(): + p.requires_grad = False + + + def forward(self, y_pred, y_true): + p1 = y_pred['p1'] + p2 = y_pred['p2'] + p3 = y_pred['p3'] + p4 = y_pred['p4'] + + y_future = self.preprocess(y_true['future_pose'], normal=True) + y_obs = self.preprocess(y_true['observed_pose'], normal=True) + + y_full = torch.cat([y_obs, y_future], dim=1) + + B, T, JC = y_full.shape + J, C = JC // 3, 3 + _, seq_in, _ = y_obs.shape + + smooth1 = smooth(y_full, sample_len=T, kernel_size=seq_in) + smooth2 = smooth(smooth1, sample_len=T, kernel_size=seq_in) + smooth3 = smooth(smooth2, sample_len=T, kernel_size=seq_in) + + # nn.utils.clip_grad_norm_( + # list(net_pred.parameters()), max_norm=opt.max_norm) + + if self.args.inner_type == "PUAL": + if 'S' in self.args.tasks: + loss_p3d_4 = self.pual4({'pred_pose': p4}, {'future_pose': y_full})['loss'] + loss_p3d_3 = self.pual3({'pred_pose': p3}, {'future_pose': smooth1})['loss'] + loss_p3d_2 = self.pual2({'pred_pose': p2}, {'future_pose': smooth2})['loss'] + loss_p3d_1 = self.pual1({'pred_pose': p1}, {'future_pose': smooth3})['loss'] + else: + loss_p3d_4 = self.pual({'pred_pose': p4}, {'future_pose': y_full})['loss'] + loss_p3d_3 = self.pual({'pred_pose': p3}, {'future_pose': smooth1})['loss'] + loss_p3d_2 = self.pual({'pred_pose': p2}, {'future_pose': smooth2})['loss'] + loss_p3d_1 = self.pual({'pred_pose': p1}, {'future_pose': smooth3})['loss'] + + else: + p3d_sup_4 = y_full.view(B, T, J, C) + p3d_sup_3 = smooth1.view(B, T, J, C) + p3d_sup_2 = smooth2.view(B, T, J, C) + p3d_sup_1 = smooth3.view(B, T, J, C) + + p4 = p4.view(B, T, J, C) + p3 = p3.view(B, T, J, C) + p2 = p2.view(B, T, J, C) + p1 = p1.view(B, T, J, C) + + loss_p3d_4 = torch.mean(torch.norm(p4 - p3d_sup_4, dim=3)) + loss_p3d_3 = torch.mean(torch.norm(p3 - p3d_sup_3, dim=3)) + loss_p3d_2 = torch.mean(torch.norm(p2 - p3d_sup_2, dim=3)) + loss_p3d_1 = torch.mean(torch.norm(p1 - p3d_sup_1, dim=3)) + + return { + 'loss': (loss_p3d_4 + loss_p3d_3 + loss_p3d_2 + loss_p3d_1)/4 + } diff --git a/losses/potr_loss.py b/losses/potr_loss.py new file mode 100644 index 0000000..83e2e57 --- /dev/null +++ b/losses/potr_loss.py @@ -0,0 +1,149 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import os, sys +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") +from models.potr.data_process import train_preprocess + +class POTRLoss(nn.Module): + + def __init__(self, args): + super().__init__() + + self.args = args + + if self.args.loss_fn == 'mse': + self.loss_fn = self.loss_mse + elif self.args.loss_fn == 'smoothl1': + self.loss_fn = self.smooth_l1 + elif self.args.loss_fn == 'l1': + self.loss_fn = self.loss_l1 + else: + raise ValueError('Unknown loss name {}.'.format(self.args.loss_fn)) + + def smooth_l1(self, decoder_pred, decoder_gt): + l1loss = nn.SmoothL1Loss(reduction='mean') + return l1loss(decoder_pred, decoder_gt) + + def loss_l1(self, decoder_pred, decoder_gt, reduction='mean'): + return nn.L1Loss(reduction=reduction)(decoder_pred, decoder_gt) + + def loss_activity(self, logits, class_gt): + """Computes entropy loss from logits between predictions and class.""" + return nn.functional.cross_entropy(logits, class_gt, reduction='mean') + + def compute_class_loss(self, class_logits, class_gt): + """Computes the class loss for each of the decoder layers predictions or memory.""" + class_loss = 0.0 + for l in range(len(class_logits)): + class_loss += self.loss_activity(class_logits[l], class_gt) + + return class_loss/len(class_logits) + + def layerwise_loss_fn(self, decoder_pred, decoder_gt, class_logits=None, class_gt=None): + """Computes layerwise loss between predictions and ground truth.""" + pose_loss = 0.0 + + for l in range(len(decoder_pred)): + pose_loss += self.loss_fn(decoder_pred[l], decoder_gt) + + pose_loss = pose_loss/len(decoder_pred) + + class_loss = None + if class_logits is not None: + class_loss = self.compute_class_loss(class_logits, class_gt) + + + return pose_loss, class_loss + + def ua_loss(self, decoder_pred, decoder_gt, class_logits, class_gt, uncertainty_matrix=None): + B = decoder_gt.shape[0] + T = decoder_gt.shape[-3] + L = len(decoder_pred) + + pose_loss = 0.0 + class_loss = None + uncertainty_loss = None + + loss_fn = nn.L1Loss(reduction='none') + if uncertainty_matrix is not None: + assert class_gt is not None + assert uncertainty_matrix.shape == (self.args.num_activities, self.args.n_major_joints) + uncertainty_vector = uncertainty_matrix[class_gt].reshape(B, 1, self.args.n_major_joints, 1).to(self.args.device) # (n_joints, ) + u_coeff = (torch.arange(1, T+1) / T).reshape(1, T, 1, 1).to(self.args.device) + else: + uncertainty_vector = 1 + u_coeff = 0 + + for l in range(L): + pose_loss += ((1 - u_coeff ** uncertainty_vector) * loss_fn(decoder_pred[l], decoder_gt)).mean() + + pose_loss = pose_loss / L + + + if class_logits is not None: + class_loss = self.compute_class_loss(class_logits, class_gt) + + if uncertainty_matrix is not None: + uncertainty_loss = torch.log(uncertainty_matrix).mean() + + + return pose_loss, class_loss, uncertainty_loss + + def compute_loss(self, inputs=None, target=None, preds=None, class_logits=None, class_gt=None): + return self.layerwise_loss_fn(preds, target, class_logits, class_gt) + + + + def forward(self, model_outputs, input_data): + input_data = train_preprocess(input_data, self.args) + + '''selection_loss = 0 + if self.args.query_selection: + prob_mat = model_outputs['mat'][-1] + selection_loss = self.compute_selection_loss( + inputs=prob_mat, + target=input_data['src_tgt_distance'] + )''' + + pred_class, gt_class = None, None + if self.args.predict_activity: + gt_class = input_data['action_ids'] + pred_class = model_outputs['out_class'] + + uncertainty_matrix = None + if self.args.consider_uncertainty: + uncertainty_matrix = model_outputs['uncertainty_matrix'] + + + pose_loss, activity_loss, uncertainty_loss = self.ua_loss( + decoder_pred=model_outputs['pred_pose'], + decoder_gt=input_data['decoder_outputs'], + class_logits=pred_class, + class_gt=gt_class, + uncertainty_matrix=uncertainty_matrix + ) + + pl = pose_loss.item() + step_loss = pose_loss #+ selection_loss + + if self.args.predict_activity: + step_loss += self.args.activity_weight*activity_loss + + if self.args.consider_uncertainty: + step_loss -= self.args.uncertainty_weight*uncertainty_loss + + outputs = { + 'loss': step_loss, + #'selection_loss': selection_loss, + 'pose_loss': pl, + } + + if self.args.predict_activity: + outputs['activity_loss'] = activity_loss.item() + + if self.args.consider_uncertainty: + outputs['uncertainty_loss'] = uncertainty_loss.item() + + return outputs diff --git a/losses/pua_loss.py b/losses/pua_loss.py new file mode 100644 index 0000000..a16b751 --- /dev/null +++ b/losses/pua_loss.py @@ -0,0 +1,122 @@ + +import numpy as np +import torch +import torch.nn as nn + + +class PUALoss(nn.Module): + + def __init__(self, args): + super().__init__() + ''' + args mush have: + @param init_mean : float : initialize S such that the mean of S is init_mean, 3.5 is a good default + @param tasks : str : list of tasks as a string, J for tasks over joints, T for over time(frames), A for actions. if A is used, 'action' must be in the input. + @param nT: int: number of frames to predict + @param nJ: int: number of joints + @param action_list : list(str) : name of different actions as a list of str. used in case of A present in tasks. + @param time_prior: str : what time prior to use, must be one of sig5, sig*, none + @param clipMinS, clipMaxS: float : these values are used to slip s. MinS is needed if there are tasks in the input with errors near zero. one can set them to None, resulting in no cliping. + @param device : str : device to run torch on + ''' + self.args = args + + init_mean = self.args.init_mean + self.s = torch.ones(1, 1, 1, requires_grad = True).to(self.args.device) * init_mean + # Fix tasks for joints + if 'J' in args.tasks: + self.nJ = args.nJ + self.s = self.s.repeat(1, 1, self.nJ) + else: + self.nJ = 1 + #fix tasks for time + if 'T' not in args.tasks: + self.nT = 1 + elif args.time_prior == 'sig5': + self.nT = 5 + self.s = self.s.repeat(1, 5, 1) + self.s[:, :, :] = 0 + self.s[:, 0, :] = init_mean + self.s[:, 2, :] = 1 + elif args.time_prior == 'sig*': + self.nT = 3 + self.s = self.s.repeat(1, 3, 1) + self.s[:, 0, :] = init_mean + self.s[:, 1, :] = 1 + self.s[:, 2, :] = -10 + elif args.time_prior == 'none': + self.nT = args.nT + self.s = self.s.repeat(1, self.nT, 1) + elif 'poly' in args.time_prior: + self.nT = int(args.time_prior[4:]) + 1 + self.s = self.s.repeat(1, self.nT, 1) + self.s[:, 1:, :] = 0 + else: + raise Exception("{} is not a supported prior for time axis, options are: [sig5, sig*, none].".format(args.time_prior)) + # fix tasks for action + if 'A' in args.tasks: + self.action_list = args.action_list + self.nA = len(self.action_list) + self.action_map = {self.action_list[i]: i for i in range(self.nA)} + self.s = self.s.repeat(self.nA, 1, 1) + self.sigma = nn.Embedding(self.nA, self.nT * self.nJ) + self.sigma.weight = nn.Parameter(self.s.view(-1, self.nT * self.nJ)) + else: + self.nA = None + self.sigma = nn.Parameter(self.s) + + def calc_sigma(self, y_true): + local_sigma = self.sigma + if self.nA is not None: + actions = y_true['action'] + indx = torch.tensor([self.action_map[act] for act in actions]).to(self.args.device) + local_sigma = local_sigma(indx) + local_sigma = local_sigma.view(-1, self.nT, self.nJ) + + if 'T' in self.args.tasks: + if self.args.time_prior == 'sig5': + x = torch.arange(self.args.nT).to(self.args.device).unsqueeze(1).unsqueeze(0) # 1, T, 1 + c = 2 * local_sigma[:, 3 - 1, :] * local_sigma[:, 5 - 1, :] / torch.abs(local_sigma[:, 3 - 1, :] + local_sigma[:, 5 - 1, :]) + f = 1 / (1 + torch.exp(-c * (local_sigma[:, 4 - 1, :] - x))) + g = torch.exp(local_sigma[:, 3 - 1, :] * (local_sigma[:, 4 - 1, :] - x)) + h = torch.exp(local_sigma[:, 5 - 1, :] * (local_sigma[:, 4 - 1, :] - x)) + local_sigma = local_sigma[:, 1 - 1, :] + (local_sigma[:, 2 - 1, :] / (1 + f * g + (1 - f) * h)) + + elif self.args.time_prior == 'sig*': + x = torch.arange(self.args.nT).to(self.args.device).unsqueeze(1).unsqueeze(0) # 1, T, 1 + local_sigma = local_sigma[:, 0:1, :] / (1 + torch.exp(local_sigma[:, 1:2, :] * (local_sigma[:, 2:3, :] - x))) + elif 'poly' in self.args.time_prior: + x = torch.arange(self.args.nT).to(self.args.device).unsqueeze(1).unsqueeze(1).unsqueeze(0) / 10 # 1, T, 1, 1 + po = torch.arange(self.nT).to(self.args.device).unsqueeze(1).unsqueeze(0).unsqueeze(0) # 1, 1, D, 1 + x = x ** po # 1, T, D, 1 + local_sigma = local_sigma.unsqueeze(1) # 1, 1, D, ? + local_sigma = (local_sigma * x).sum(dim=-2) # 1, T, ? + + + local_sigma = torch.clamp(local_sigma, min=self.args.clipMinS, max=self.args.clipMaxS) + + return local_sigma #local_sigma + + + + def forward(self, y_pred, y_true): + + sigma = self.calc_sigma(y_true) + + y_pred = y_pred['pred_pose'] # B,T,JC + y_true = y_true['future_pose'] # B,T,JC + + B,T,JC = y_pred.shape + assert T == self.args.nT and JC % self.args.nJ == 0, "Either number or predicted frames (nT) is not right, or number of joints * dim of each joint is not dividable by nJ" + J = self.args.nJ + C = JC // J + + y_pred = y_pred.view(B, T, J, C) + y_true = y_true.view(B, T, J, C) + + l = torch.norm(y_pred - y_true, dim=-1) # B,T,J + l = torch.mean(torch.exp(-sigma) * l + sigma) + + return { + 'loss' : l + } \ No newline at end of file diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 0000000..58dffce --- /dev/null +++ b/metrics/__init__.py @@ -0,0 +1,19 @@ +from .pose_metrics import ADE, FDE, local_ade, local_fde, VIM, VAM, MSE +from .pose_metrics import F1, F3, F7, F9, F9, F13, F17, F21 #new + +POSE_METRICS = {'ADE': ADE, + 'FDE': FDE, + 'local_ade': local_ade, + 'local_fde': local_fde, + 'VIM': VIM, + 'VAM': VAM, + 'MSE': MSE, + #new: + 'F1': F1, + 'F3': F3, + 'F7': F7, + 'F9': F9, + 'F13': F13, + 'F17': F17, + 'F21': F21 + } diff --git a/metrics/pose_metrics.py b/metrics/pose_metrics.py new file mode 100644 index 0000000..d36bbe2 --- /dev/null +++ b/metrics/pose_metrics.py @@ -0,0 +1,251 @@ +import logging + +import numpy as np +import torch + +logger = logging.getLogger(__name__) + + +def ADE(pred, target, dim): + """ + Average Displacement Error + """ + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + + displacement = 0 + for d in range(dim): + displacement += (pred[..., d] - target[..., d]) ** 2 + + ade = torch.mean(torch.sqrt(displacement)) + return ade + + +def FDE(pred, target, dim): + """ + Final Displacement Error + """ + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., -1, :, d] - target[..., -1, :, d]) ** 2 + fde = torch.mean(torch.sqrt(displacement)) + return fde + +def local_ade(pred, target, dim): + bs, frames, feat = pred.shape + keypoints = feat // dim + pred_pose = pred.reshape(bs, frames, keypoints, dim) + local_pred_pose = pred_pose - pred_pose[:, :, 0:1, :].repeat(1, 1, keypoints, 1) + target_pose = target.reshape(bs, frames, keypoints, dim) + local_target_pose = target_pose - target_pose[:, :, 0:1, :].repeat(1, 1, keypoints, 1) + local_pred_pose = local_pred_pose.reshape(bs, frames, feat) + local_target_pose = local_target_pose.reshape(bs, frames, feat) + return ADE(local_pred_pose, local_target_pose, dim) + + + +def local_fde(pred, target, dim): + bs, frames, feat = pred.shape + keypoints = feat // dim + pred_pose = pred.reshape(bs, frames, keypoints, dim) + local_pred_pose = pred_pose - pred_pose[:, :, 0:1, :].repeat(1, 1, keypoints, 1) + target_pose = target.reshape(bs, frames, keypoints, dim) + local_target_pose = target_pose - target_pose[:, :, 0:1, :].repeat(1, 1, keypoints, 1) + local_pred_pose = local_pred_pose.reshape(bs, frames, feat) + local_target_pose = local_target_pose.reshape(bs, frames, feat) + return FDE(local_pred_pose, local_target_pose, dim) + +def MSE(pred, target, dim=None): + """ + Mean Squared Error + Arguments: + pred -- predicted sequence : (batch_size, sequence_length, pose_dim*n_joints) + + """ + # target = target.reshape(*target.shape[:-2], -1) + assert pred.shape == target.shape + B, S, D = pred.shape + mean_errors = torch.zeros((B, S)) + + # Training is done in exponential map or rotation matrix or quaternion + # but the error is reported in Euler angles, as in previous work [3,4,5] + for i in np.arange(B): + # seq_len x complete_pose_dim (H36M==99) + eulerchannels_pred = pred[i] #.numpy() + # n_seeds x seq_len x complete_pose_dim (H36M==96) + action_gt = target#srnn_gts_euler[action] + + # seq_len x complete_pose_dim (H36M==96) + gt_i = action_gt[i]#np.copy(action_gt.squeeze()[i].numpy()) + # Only remove global rotation. Global translation was removed before + gt_i[:, 0:3] = 0 + + # here [2,4,5] remove data based on the std of the batch THIS IS WEIRD! + # (seq_len, 96) - (seq_len, 96) + idx_to_use = np.where(np.std(gt_i.detach().cpu().numpy(), 0) > 1e-4)[0] + euc_error = torch.pow(gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) + + euc_error = torch.sum(euc_error, 1) + + euc_error = torch.sqrt(euc_error) + mean_errors[i,:] = euc_error + + mean_mean_errors = torch.mean(mean_errors, 0) + return mean_mean_errors.mean() + +def VIM(pred, target, dim, mask): + """ + Visibilty Ignored Metric + Inputs: + pred: Prediction data - array of shape (pred_len, #joint*(2D/3D)) + target: Ground truth data - array of shape (pred_len, #joint*(2D/3D)) + dim: dimension of data (2D/3D) + mask: Visibility mask of pos - array of shape (pred_len, #joint) + Output: + errorPose: + """ + assert mask is not None, 'pred_mask should not be None.' + + target_i_global = np.copy(target) + if dim == 2: + mask = np.repeat(mask, 2, axis=-1) + errorPose = np.power(target_i_global - pred, 2) * mask + # get sum on joints and remove the effect of missing joints by averaging on visible joints + errorPose = np.sqrt(np.divide(np.sum(errorPose, 1), np.sum(mask, axis=1))) + where_are_NaNs = np.isnan(errorPose) + errorPose[where_are_NaNs] = 0 + elif dim == 3: + errorPose = np.power(target_i_global - pred, 2) + errorPose = np.sum(errorPose, 1) + errorPose = np.sqrt(errorPose) + else: + msg = "Dimension of data must be either 2D or 3D." + logger.error(msg=msg) + raise Exception(msg) + return errorPose + + +def VAM(pred, target, dim, mask, occ_cutoff=100): + """ + Visibility Aware Metric + Inputs: + pred: Prediction data - array of shape (pred_len, #joint*(2D/3D)) + target: ground truth data - array of shape (pred_len, #joint*(2D/3D)) + dim: dimension of data (2D/3D) + mask: Predicted visibilities of pose, array of shape (pred_len, #joint) + occ_cutoff: Maximum error penalty + Output: + seq_err: + """ + assert mask is not None, 'pred_mask should not be None.' + assert dim == 2 or dim == 3 + + pred_mask = np.repeat(mask, 2, axis=-1) + seq_err = [] + if type(target) is list: + target = np.array(target) + target_mask = np.where(abs(target) < 0.5, 0, 1) + for frame in range(target.shape[0]): + f_err = 0 + N = 0 + for j in range(0, target.shape[1], 2): + if target_mask[frame][j] == 0: + if pred_mask[frame][j] == 0: + dist = 0 + elif pred_mask[frame][j] == 1: + dist = occ_cutoff + N += 1 + elif target_mask[frame][j] > 0: + N += 1 + if pred_mask[frame][j] == 0: + dist = occ_cutoff + elif pred_mask[frame][j] == 1: + d = np.power(target[frame][j:j + 2] - pred[frame][j:j + 2], 2) + d = np.sum(np.sqrt(d)) + dist = min(occ_cutoff, d) + else: + msg = "Target mask must be positive values." + logger.error(msg) + raise Exception(msg) + f_err += dist + if N > 0: + seq_err.append(f_err / N) + else: + seq_err.append(f_err) + return np.array(seq_err) + + +#new: +def F1(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 1, :, d] - target[..., 1, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F3(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 3, :, d] - target[..., 3, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F7(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 7, :, d] - target[..., 7, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F9(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 9, :, d] - target[..., 9, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F13(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 13, :, d] - target[..., 13, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F17(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 17, :, d] - target[..., 17, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de + +def F21(pred, target, dim): + keypoints_num = int(pred.shape[-1] / dim) + pred = torch.reshape(pred, pred.shape[:-1] + (keypoints_num, dim)) + target = torch.reshape(target, target.shape[:-1] + (keypoints_num, dim)) + displacement = 0 + for d in range(dim): + displacement += (pred[..., 21, :, d] - target[..., 21, :, d]) ** 2 + de = torch.mean(torch.sqrt(displacement)) + return de \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..2891f01 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,22 @@ +from .zero_vel import ZeroVel +from .pv_lstm import PVLSTM +from .disentangled import Disentangled +from .derpof import DeRPoF +from models.history_repeats_itself.history_repeats_itself import HistoryRepeatsItself +from .sts_gcn.sts_gcn import STsGCN +from .msr_gcn.msrgcn import MSRGCN +from .potr.potr import POTR +from .st_trans.ST_Trans import ST_Trans +from .pgbig.pgbig import PGBIG + +MODELS = {'zero_vel': ZeroVel, + 'pv_lstm': PVLSTM, + 'disentangled': Disentangled, + 'derpof': DeRPoF, + 'history_repeats_itself': HistoryRepeatsItself, + 'potr': POTR, + 'sts_gcn': STsGCN, + 'msr_gcn': MSRGCN, + 'st_trans': ST_Trans, + 'pgbig': PGBIG , + } diff --git a/models/derpof.py b/models/derpof.py new file mode 100644 index 0000000..f7e59cb --- /dev/null +++ b/models/derpof.py @@ -0,0 +1,159 @@ +import torch +import torch.nn as nn + +from utils.others import pose_from_vel + + +class DeRPoF(nn.Module): + def __init__(self, args): + super(DeRPoF, self).__init__() + self.args = args + self.keypoints_num = self.args.keypoints_num + self.keypoint_dim = self.args.keypoint_dim + self.features_num = int(args.keypoints_num * args.keypoint_dim) + + # global + self.global_model = LSTM_g(pose_dim=self.features_num, embedding_dim=args.embedding_dim, h_dim=args.hidden_dim, + dropout=args.dropout) + + # local + encoder = Encoder(pose_dim=self.features_num, h_dim=args.hidden_dim, latent_dim=args.latent_dim, + dropout=args.dropout) + decoder = Decoder(pose_dim=self.features_num, h_dim=args.hidden_dim, latent_dim=args.latent_dim, + dropout=args.dropout) + self.local_model = VAE(Encoder=encoder, Decoder=decoder) + + def forward(self, inputs): + pose = inputs['observed_pose'] + # print(pose.dtype) + vel = (pose[..., 1:, :] - pose[..., :-1, :]).permute(1, 0, 2) + frames_num, bs, _ = vel.shape + + # global + global_vel = 0.5 * (vel.view(frames_num, bs, self.keypoints_num, self.keypoint_dim)[:, :, 0] + + vel.view(frames_num, bs, self.keypoints_num, self.keypoint_dim)[:, :, 1]) + + global_vel = global_vel + + # local + local_vel = (vel.view(frames_num, bs, self.keypoints_num, self.keypoint_dim) + - global_vel.view(frames_num, bs, 1, self.keypoint_dim)).view(frames_num, bs, self.features_num) + # predict + global_vel_out = self.global_model(global_vel, self.args.pred_frames_num).view(self.args.pred_frames_num, bs, 1, self.keypoint_dim) + local_vel_out, mean, log_var = self.local_model(local_vel, self.args.pred_frames_num) + local_vel_out = local_vel_out.view(self.args.pred_frames_num, bs, self.keypoints_num, self.keypoint_dim) + # merge local and global velocity + vel_out = (global_vel_out + local_vel_out) + pred_vel = vel_out.view(self.args.pred_frames_num, bs, self.features_num).permute(1, 0, 2) + pred_pose = pose_from_vel(pred_vel, pose[..., -1, :]) + outputs = {'pred_pose': pred_pose, 'pred_vel_global': global_vel_out, 'pred_vel_local': local_vel_out, + 'mean': mean, 'log_var': log_var} + + return outputs + + +class LSTM_g(nn.Module): + def __init__(self, pose_dim, embedding_dim=8, h_dim=16, num_layers=2, dropout=0.1): + super(LSTM_g, self).__init__() + self.pose_dim = pose_dim + self.embedding_dim = embedding_dim + self.h_dim = h_dim + self.num_layers = num_layers + + self.embedding_fn = nn.Sequential(nn.Linear(3, embedding_dim), nn.ReLU()) + self.encoder_g = nn.LSTM(embedding_dim, h_dim, num_layers, dropout=dropout) + self.decoder_g = nn.LSTM(embedding_dim, h_dim, num_layers, dropout=dropout) + self.hidden2g = nn.Sequential(nn.Linear(h_dim, 3)) + + def forward(self, global_s, pred_len): + seq_len, batch, l = global_s.shape + state_tuple_g = (torch.zeros(self.num_layers, batch, self.h_dim, device=global_s.device, dtype=torch.float32), + torch.zeros(self.num_layers, batch, self.h_dim, device=global_s.device, dtype=torch.float32)) + + global_s = global_s.contiguous() + + output_g, state_tuple_g = self.encoder_g( + self.embedding_fn(global_s.view(-1, 3)).view(seq_len, batch, self.embedding_dim), state_tuple_g) + + pred_s_g = torch.tensor([], device=global_s.device) + last_s_g = global_s[-1].unsqueeze(0) + for _ in range(pred_len): + output_g, state_tuple_g = self.decoder_g( + self.embedding_fn(last_s_g.view(-1, 3)).view(1, batch, self.embedding_dim), state_tuple_g) + curr_s_g = self.hidden2g(output_g.view(-1, self.h_dim)) + pred_s_g = torch.cat((pred_s_g, curr_s_g.unsqueeze(0)), dim=0) + last_s_g = curr_s_g.unsqueeze(0) + return pred_s_g + + +class VAE(nn.Module): + def __init__(self, Encoder, Decoder): + super(VAE, self).__init__() + self.Encoder = Encoder + self.Decoder = Decoder + + def reparameterization(self, mean, var): + epsilon = torch.randn_like(var) # sampling epsilon + z = mean + var * epsilon # reparameterization trick + return z + + def forward(self, obs_s, pred_len): + mean, log_var = self.Encoder(obs_s=obs_s) + z = self.reparameterization(mean, torch.exp(0.5 * log_var)) # takes exponential function (log var -> var) + preds_s = self.Decoder(obs_s=obs_s, latent=z, pred_len=pred_len) + + return preds_s, mean, log_var + + +class Encoder(nn.Module): + def __init__(self, pose_dim, h_dim=32, latent_dim=16, num_layers=2, dropout=0.2): + super(Encoder, self).__init__() + + self.pose_dim = pose_dim + self.h_dim = h_dim + self.latent_dim = latent_dim + self.num_layers = num_layers + + self.encoder = nn.LSTM(pose_dim, h_dim, num_layers, dropout=dropout) + self.FC_mean = nn.Linear(h_dim, latent_dim) + self.FC_var = nn.Linear(h_dim, latent_dim) + + def forward(self, obs_s): + batch = obs_s.size(1) + state_tuple = (torch.zeros(self.num_layers, batch, self.h_dim, device=obs_s.device, dtype=torch.float32), + torch.zeros(self.num_layers, batch, self.h_dim, device=obs_s.device, dtype=torch.float32)) + output, state_tuple = self.encoder(obs_s, state_tuple) + out = output[-1] + mean = self.FC_mean(out) + log_var = self.FC_var(out) + return mean, log_var + + +class Decoder(nn.Module): + def __init__(self, pose_dim, h_dim=32, latent_dim=16, num_layers=2, dropout=0.2): + super(Decoder, self).__init__() + self.pose_dim = pose_dim + self.h_dim = h_dim + self.latent_dim = latent_dim + self.num_layers = num_layers + + self.decoder = nn.LSTM(pose_dim, h_dim, num_layers, dropout=dropout) + self.FC = nn.Sequential(nn.Linear(latent_dim, h_dim)) + self.mlp = nn.Sequential(nn.Linear(h_dim, pose_dim)) + + def forward(self, obs_s, latent, pred_len): + batch = obs_s.size(1) + decoder_c = torch.zeros(self.num_layers, batch, self.h_dim, device=obs_s.device, dtype=torch.float32) + last_s = obs_s[-1].unsqueeze(0) + decoder_h = self.FC(latent).unsqueeze(0) + decoder_h = decoder_h.repeat(self.num_layers, 1, 1) + state_tuple = (decoder_h, decoder_c) + + preds_s = torch.tensor([], device=obs_s.device) + for _ in range(pred_len): + output, state_tuple = self.decoder(last_s, state_tuple) + curr_s = self.mlp(output.view(-1, self.h_dim)) + preds_s = torch.cat((preds_s, curr_s.unsqueeze(0)), dim=0) + last_s = curr_s.unsqueeze(0) + + return preds_s diff --git a/models/disentangled.py b/models/disentangled.py new file mode 100644 index 0000000..5f55bd0 --- /dev/null +++ b/models/disentangled.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn + +from models.pv_lstm import PVLSTM +from models.zero_vel import ZeroVel +from utils.others import pose_from_vel + + +class Disentangled(nn.Module): + def __init__(self, args): + super(Disentangled, self).__init__() + self.args = args + + # global + global_args = args.copy() + global_args.keypoints_num = 1 + self.global_model = ZeroVel(global_args) + + # local + local_args = args.copy() + local_args.keypoints_num = args.keypoints_num - global_args.keypoints_num + self.local_model = PVLSTM(local_args) + + def forward(self, inputs): + pose = inputs['observed_pose'] + + # global + global_pose = pose[..., : self.args.keypoint_dim] + global_inputs = {'observed_pose': global_pose} + + # local + repeat = torch.ones(len(global_pose.shape), dtype=int) + repeat[-1] = self.local_model.args.keypoints_num + local_pose = pose[..., self.args.keypoint_dim:] - global_pose.repeat(tuple(repeat)) + local_inputs = {'observed_pose': local_pose} + + # predict + global_outputs = self.global_model(global_inputs) + local_outputs = self.local_model(local_inputs) + + # merge local and global velocity + global_vel_out = global_outputs['pred_vel'] + local_vel_out = local_outputs['pred_vel'] + repeat = torch.ones(len(global_vel_out.shape), dtype=int) + repeat[-1] = self.local_model.args.keypoints_num + pred_vel = torch.cat((global_vel_out, local_vel_out + global_vel_out.repeat(tuple(repeat))), dim=-1) + pred_pose = pose_from_vel(pred_vel, pose[..., -1, :]) + outputs = {'pred_pose': pred_pose, 'pred_vel': pred_vel} + + return outputs diff --git a/models/history_repeats_itself/history_repeats_itself.py b/models/history_repeats_itself/history_repeats_itself.py new file mode 100644 index 0000000..e52e7e6 --- /dev/null +++ b/models/history_repeats_itself/history_repeats_itself.py @@ -0,0 +1,429 @@ +import torch +from torch import nn +from torch.nn.parameter import Parameter +import numpy as np +import math +import re + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m +class HistoryRepeatsItself(nn.Module): + def __init__(self, args): + super(HistoryRepeatsItself, self).__init__() + args.loss.itera = args.itera + args.loss.un_mode = args.un_mode + self.modality = args.modality + args.loss.modality = self.modality + + self.args = args + self.init_mode = args.init_mode + print(args) + self.device = args.device + self.net_pred = AttModel(in_features=args.in_features, kernel_size=args.kernel_size, d_model=args.d_model, + num_stage=args.num_stage, dct_n=args.dct_n, device=self.device) + + l_p3d = 0 + + self.in_n = args.input_n + self.out_n = args.output_n + if args.un_mode == 'sig5-TJPrior': + un_params = torch.nn.Parameter(torch.zeros((args.in_features//3 + args.output_n + args.kernel_size, 5))) + elif 'sig5' in args.un_mode: + un_params = torch.nn.Parameter(torch.zeros(args.in_features//3, 5)) + elif 'sigstar' in args.un_mode: + un_params = torch.nn.Parameter(torch.zeros(args.in_features//3, 2)) + elif 'poly' in args.un_mode: + try: + params_count = int(re.findall("\d+", args.un_mode)[0]) + 2 + self.params_count = params_count + except: + assert False, "you must have a number after 'poly'" + un_params = torch.nn.Parameter(torch.zeros(args.in_features//3, params_count)) + else: + if args.itera == 1: + un_params = torch.nn.Parameter(torch.zeros(15, self.out_n + args.kernel_size ,args.in_features//3)) + else: + un_params = torch.nn.Parameter(torch.zeros(15, 10 + args.kernel_size ,args.in_features//3)) + + self.un_params = un_params + if self.init_mode == "descending": + torch.nn.init.constant_(self.un_params[:, 0], -0.2) + torch.nn.init.constant_(self.un_params[:, 1], 3.7) + torch.nn.init.constant_(self.un_params[:, 2], -0.2) + torch.nn.init.constant_(self.un_params[:, 3], 10) + torch.nn.init.constant_(self.un_params[:, 4], -0.1) + + elif self.init_mode == "increasing": + torch.nn.init.constant_(self.un_params[:, 0], 0) + torch.nn.init.constant_(self.un_params[:, 1], 3) + torch.nn.init.constant_(self.un_params[:, 2], 0.2) + torch.nn.init.constant_(self.un_params[:, 3], 10.7) + torch.nn.init.constant_(self.un_params[:, 4], 0.1) + + elif self.init_mode == "constant-one": + torch.nn.init.constant_(self.un_params[:, 0], 1) + torch.nn.init.constant_(self.un_params[:, 1], 0) # this is not a bug :) + torch.nn.init.constant_(self.un_params[:, 2], 1) + torch.nn.init.constant_(self.un_params[:, 3], 1) + torch.nn.init.constant_(self.un_params[:, 4], 1) + + elif self.init_mode == "increasing1": + torch.nn.init.constant_(self.un_params[:, 0], 0) + torch.nn.init.constant_(self.un_params[:, 1], 7.8) + torch.nn.init.constant_(self.un_params[:, 2], 0.5) + torch.nn.init.constant_(self.un_params[:, 3], 17.8) + torch.nn.init.constant_(self.un_params[:, 4], 0.2) + + elif self.init_mode == "increasing2": + torch.nn.init.constant_(self.un_params[:, 0], 2.1) + torch.nn.init.constant_(self.un_params[:, 1], 2.6) + torch.nn.init.constant_(self.un_params[:, 2], 0.5) + torch.nn.init.constant_(self.un_params[:, 3], 17.8) + torch.nn.init.constant_(self.un_params[:, 4], 0.2) + + elif self.init_mode == "increasing3": + torch.nn.init.constant_(self.un_params[:, 0], 2.1) + torch.nn.init.constant_(self.un_params[:, 1], 6) + torch.nn.init.constant_(self.un_params[:, 2], 0.5) + torch.nn.init.constant_(self.un_params[:, 3], 17.8) + torch.nn.init.constant_(self.un_params[:, 4], 0.2) + + elif self.init_mode == "increasing4": + torch.nn.init.constant_(self.un_params[:, 0], 0.6) + torch.nn.init.constant_(self.un_params[:, 1], 4.7) + torch.nn.init.constant_(self.un_params[:, 2], 0.1) + torch.nn.init.constant_(self.un_params[:, 3], 20) + torch.nn.init.constant_(self.un_params[:, 4], 0.2) + elif self.init_mode == 'poly_decreasing': + coeff = 1 + for i in range(self.params_count): + torch.nn.init.constant_(self.un_params[:, i], coeff) + coeff /= 10 + + elif bool(re.findall(r'^default_[-+]?(?:\d*\.\d+|\d+)_[-+]?(?:\d*\.\d+|\d+)$', self.init_mode)): + mean, std = [float(n) for n in re.findall('[-+]?(?:\d*\.\d+|\d+)', self.init_mode)] + torch.nn.init.normal_(self.un_params, mean=mean, std=std) + + elif self.init_mode == "default": + mean, std = 0, 1 + torch.nn.init.normal_(self.un_params, mean=mean, std=std) + else: + raise Exception("The defined init mode is not supported.") + + print(self.un_params) + if self.modality == "Human36": + self.dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + elif self.modality == "AMASS" or self.modality == "3DPW": + pass + else: + assert False, "The modality is not supported." + self.seq_in = args.kernel_size + self.sample_rate = 2 + self.joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + self.index_to_ignore = np.concatenate( + (self.joint_to_ignore * 3, self.joint_to_ignore * 3 + 1, self.joint_to_ignore * 3 + 2)) + self.joint_equal = np.array([13, 19, 22, 13, 27, 30]) + self.index_to_equal = np.concatenate((self.joint_equal * 3, self.joint_equal * 3 + 1, self.joint_equal * 3 + 2)) + self.itera = args.itera + self.idx = np.expand_dims(np.arange(self.seq_in + self.out_n), axis=1) + ( + self.out_n - self.seq_in + np.expand_dims(np.arange(self.itera), axis=0)) + + def forward_human(self, inputs): + seq = torch.cat((inputs['observed_pose'], inputs['future_pose']), dim=1) + p3d_h36 = seq.reshape(seq.shape[0], seq.shape[1], -1) + batch_size, seq_n, _ = p3d_h36.shape + p3d_h36 = p3d_h36.float() + p3d_sup = p3d_h36.clone()[:, :, self.dim_used][:, -self.out_n - self.seq_in:].reshape( + [-1, self.seq_in + self.out_n, len(self.dim_used) // 3, 3]) + p3d_src = p3d_h36.clone()[:, :, self.dim_used] + if self.itera == 1: + p3d_out_all = self.net_pred(p3d_src, input_n=self.in_n, output_n=self.out_n, itera=self.itera) + p3d_out = p3d_h36.clone()[:, self.in_n:self.in_n + self.out_n] + p3d_out[:, :, self.dim_used] = p3d_out_all[:, self.seq_in: self.seq_in + self.out_n, 0] + p3d_out[:, :, self.index_to_ignore] = p3d_out[:, :, self.index_to_equal] + p3d_out = p3d_out.reshape([-1, self.out_n, 96]) + p3d_out_all = p3d_out_all.reshape( + [batch_size, self.seq_in + self.out_n, self.itera, len(self.dim_used) // 3, 3]) + else: + if self.training: + iterr = 1 + out_ = 10 + else: + iterr = self.itera + out_ = self.out_n + p3d_out_all = self.net_pred(p3d_src, input_n=self.in_n, output_n=10, itera=iterr) + p3d_1 = p3d_out_all[:, :self.seq_in, 0].clone() + p3d_out_all = p3d_out_all[:, self.seq_in:].transpose(1, 2).reshape([batch_size, 10 * iterr, -1])[:, :out_] + zero_ = torch.zeros_like(p3d_out_all) + if self.training: + p3d_out_all = torch.cat((p3d_out_all, zero_, zero_), dim=1)[:, :self.out_n] + p3d_out = p3d_h36.clone()[:, self.in_n:self.in_n + self.out_n] + p3d_out[:, :, self.dim_used] = p3d_out_all + p3d_out[:, :, self.index_to_ignore] = p3d_out[:, :, self.index_to_equal] + p3d_out = p3d_out.reshape([-1, self.out_n, 96]) + + p3d_h36 = p3d_h36[:, :self.in_n + out_].reshape([-1, self.in_n + out_, 32, 3]) + + p3d_out_all = torch.cat((p3d_1, p3d_out_all), dim=1) + p3d_out_all = p3d_out_all.reshape( + [batch_size, self.seq_in + self.out_n, len(self.dim_used) // 3, 3]) + return {'pred_pose': p3d_out_all, 'pred_metric_pose': p3d_out, 'un_params': self.un_params} + + def forward_amass(self,inputs): + seq = torch.cat((inputs['observed_pose'], inputs['future_pose']), dim=1) + bs, seq_n, joints = seq.shape + p3d_h36 = seq.reshape(seq.shape[0], seq.shape[1], -1) + + batch_size, seq_n, _ = p3d_h36.shape + p3d_h36 = p3d_h36.float() + + p3d_src = p3d_h36.clone() + + if self.itera == 1: + p3d_out_all = self.net_pred(p3d_src, output_n=self.out_n, input_n=self.in_n, itera=self.itera) + p3d_out = p3d_out_all[:, self.seq_in:].reshape([batch_size, self.out_n, joints]) + p3d_out_all = p3d_out_all[:, :, 0].reshape([batch_size, self.seq_in + self.out_n, joints//3, 3]) + + else: + assert False, "itera > 1 is not available for amass dataset" + return {'pred_pose': p3d_out_all, 'pred_metric_pose': p3d_out, 'un_params': self.un_params} + + def forward(self,inputs): + if self.modality == "Human36": + return self.forward_human(inputs) + elif self.modality == "AMASS" or self.modality == "3DPW": + return self.forward_amass(inputs) + else: + assert False, "Unknown modality" +class AttModel(nn.Module): + + def __init__(self, in_features=48, kernel_size=5, d_model=512, num_stage=2, dct_n=10, device='cpu'): + super(AttModel, self).__init__() + + self.kernel_size = kernel_size + self.d_model = d_model + self.dct_n = dct_n + self.device = device + assert kernel_size == 10 + + self.convQ = nn.Sequential(nn.Conv1d(in_channels=in_features, out_channels=d_model, kernel_size=6, + bias=False), + nn.ReLU(), + nn.Conv1d(in_channels=d_model, out_channels=d_model, kernel_size=5, + bias=False), + nn.ReLU()) + + self.convK = nn.Sequential(nn.Conv1d(in_channels=in_features, out_channels=d_model, kernel_size=6, + bias=False), + nn.ReLU(), + nn.Conv1d(in_channels=d_model, out_channels=d_model, kernel_size=5, + bias=False), + nn.ReLU()) + + self.gcn = GCN(input_feature=(dct_n) * 2, hidden_feature=d_model, p_dropout=0.3, + num_stage=num_stage, + node_n=in_features) + + def forward(self, src, output_n=25, input_n=50, itera=1): + """ + :param src: [batch_size,seq_len,feat_dim] + :param output_n: + :param input_n: + :param frame_n: + :param dct_n: + :param itera: + :return: + """ + dct_n = self.dct_n + src = src[:, :input_n] + src_tmp = src.clone() + bs = src.shape[0] + src_key_tmp = src_tmp.transpose(1, 2)[:, :, :(input_n - output_n)].clone() + src_query_tmp = src_tmp.transpose(1, 2)[:, :, -self.kernel_size:].clone() + + dct_m, idct_m = get_dct_matrix(self.kernel_size + output_n) + dct_m = torch.from_numpy(dct_m).float().to(self.device) + idct_m = torch.from_numpy(idct_m).float().to(self.device) + + vn = input_n - self.kernel_size - output_n + 1 + vl = self.kernel_size + output_n + idx = np.expand_dims(np.arange(vl), axis=0) + \ + np.expand_dims(np.arange(vn), axis=1) + src_value_tmp = src_tmp[:, idx].clone().reshape( + [bs * vn, vl, -1]) + src_value_tmp = torch.matmul(dct_m[:dct_n].unsqueeze(dim=0), src_value_tmp).reshape( + [bs, vn, dct_n, -1]).transpose(2, 3).reshape( + [bs, vn, -1]) + + idx = list(range(-self.kernel_size, 0, 1)) + [-1] * output_n + outputs = [] + + key_tmp = self.convK(src_key_tmp / 1000.0) + for i in range(itera): + query_tmp = self.convQ(src_query_tmp / 1000.0) + score_tmp = torch.matmul(query_tmp.transpose(1, 2), key_tmp) + 1e-15 + att_tmp = score_tmp / (torch.sum(score_tmp, dim=2, keepdim=True)) + dct_att_tmp = torch.matmul(att_tmp, src_value_tmp)[:, 0].reshape( + [bs, -1, dct_n]) + + input_gcn = src_tmp[:, idx] + dct_in_tmp = torch.matmul(dct_m[:dct_n].unsqueeze(dim=0), input_gcn).transpose(1, 2) + dct_in_tmp = torch.cat([dct_in_tmp, dct_att_tmp], dim=-1) + dct_out_tmp = self.gcn(dct_in_tmp) + out_gcn = torch.matmul(idct_m[:, :dct_n].unsqueeze(dim=0), + dct_out_tmp[:, :, :dct_n].transpose(1, 2)) + outputs.append(out_gcn.unsqueeze(2)) + if itera > 1: + out_tmp = out_gcn.clone()[:, 0 - output_n:] + src_tmp = torch.cat([src_tmp, out_tmp], dim=1) + + vn = 1 - 2 * self.kernel_size - output_n + vl = self.kernel_size + output_n + idx_dct = np.expand_dims(np.arange(vl), axis=0) + \ + np.expand_dims(np.arange(vn, -self.kernel_size - output_n + 1), axis=1) + + src_key_tmp = src_tmp[:, idx_dct[0, :-1]].transpose(1, 2) + key_new = self.convK(src_key_tmp / 1000.0) + key_tmp = torch.cat([key_tmp, key_new], dim=2) + + src_dct_tmp = src_tmp[:, idx_dct].clone().reshape( + [bs * self.kernel_size, vl, -1]) + src_dct_tmp = torch.matmul(dct_m[:dct_n].unsqueeze(dim=0), src_dct_tmp).reshape( + [bs, self.kernel_size, dct_n, -1]).transpose(2, 3).reshape( + [bs, self.kernel_size, -1]) + src_value_tmp = torch.cat([src_value_tmp, src_dct_tmp], dim=1) + + src_query_tmp = src_tmp[:, -self.kernel_size:].transpose(1, 2) + + outputs = torch.cat(outputs, dim=2) + return outputs + + +class GraphConvolution(nn.Module): + """ + adapted from : https://github.com/tkipf/gcn/blob/92600c39797c2bfb61a508e52b88fb554df30177/gcn/layers.py#L132 + """ + + def __init__(self, in_features, out_features, bias=True, node_n=48): + super(GraphConvolution, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = Parameter(torch.FloatTensor(in_features, out_features)) + self.att = Parameter(torch.FloatTensor(node_n, node_n)) + if bias: + self.bias = Parameter(torch.FloatTensor(out_features)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + self.att.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, inputs): + support = torch.matmul(inputs, self.weight) + output = torch.matmul(self.att, support) + if self.bias is not None: + return output + self.bias + else: + return output + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GC_Block(nn.Module): + def __init__(self, in_features, p_dropout, bias=True, node_n=48): + """ + Define a residual block of GCN + """ + super(GC_Block, self).__init__() + self.in_features = in_features + self.out_features = in_features + + self.gc1 = GraphConvolution(in_features, in_features, node_n=node_n, bias=bias) + self.bn1 = nn.BatchNorm1d(node_n * in_features) + + self.gc2 = GraphConvolution(in_features, in_features, node_n=node_n, bias=bias) + self.bn2 = nn.BatchNorm1d(node_n * in_features) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + y = self.gc1(x) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + y = self.gc2(y) + b, n, f = y.shape + y = self.bn2(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + return y + x + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GCN(nn.Module): + def __init__(self, input_feature, hidden_feature, p_dropout, num_stage=1, node_n=48): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(input_feature, hidden_feature, node_n=node_n) + self.bn1 = nn.BatchNorm1d(node_n * hidden_feature) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(hidden_feature, p_dropout=p_dropout, node_n=node_n)) + + self.gcbs = nn.ModuleList(self.gcbs) + + self.gc7 = GraphConvolution(hidden_feature, input_feature, node_n=node_n) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + y = self.gc1(x) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + y = y + x + + return y diff --git a/models/msr_gcn/__init__.py b/models/msr_gcn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/msr_gcn/layers.py b/models/msr_gcn/layers.py new file mode 100644 index 0000000..0beba76 --- /dev/null +++ b/models/msr_gcn/layers.py @@ -0,0 +1,183 @@ +import torch +import torch.nn as nn +from torch.nn.parameter import Parameter +import math + +class GraphConvolution(nn.Module): + """ + adapted from : https://github.com/tkipf/gcn/blob/92600c39797c2bfb61a508e52b88fb554df30177/gcn/layers.py#L132 + """ + + def __init__(self, in_features, out_features, bias=True, node_n=48): + super(GraphConvolution, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = Parameter(torch.FloatTensor(in_features, out_features)) # W + self.att = Parameter(torch.FloatTensor(node_n, node_n)) # A + if bias: + self.bias = Parameter(torch.FloatTensor(out_features)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + self.att.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, input): + # b, n, d + support = torch.matmul(input, self.weight) + output = torch.matmul(self.att, support) + if self.bias is not None: + return output + self.bias + else: + return output + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GC_Block(nn.Module): + def __init__(self, in_features, p_dropout, bias=True, node_n=48, leaky_c=0.2): + """ + Define a residual block of GCN + """ + super(GC_Block, self).__init__() + self.in_features = in_features + self.out_features = in_features + + self.gc1 = GraphConvolution(in_features, in_features, node_n=node_n, bias=bias) + self.bn1 = nn.BatchNorm1d(node_n * in_features) + + self.gc2 = GraphConvolution(in_features, in_features, node_n=node_n, bias=bias) + self.bn2 = nn.BatchNorm1d(node_n * in_features) + + self.do = nn.Dropout(p_dropout) + # self.act_f = nn.Tanh() + self.act_f = nn.LeakyReLU(leaky_c) + + def forward(self, x): + y = self.gc1(x) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + y = self.gc2(y) + b, n, f = y.shape + y = self.bn2(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + y = y + x + return y + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class PreGCN(nn.Module): + def __init__(self, input_feature, hidden_feature, node_n, p_dropout, leaky_c=0.2): + super(PreGCN, self).__init__() + + self.input_feature = input_feature + self.hidden_feature = hidden_feature + self.node_n = node_n + + self.gcn = GraphConvolution(input_feature, hidden_feature, node_n=node_n) + self.bn1d = nn.BatchNorm1d(node_n * hidden_feature) + # self.act_f = nn.Tanh() + self.act_f = nn.LeakyReLU(leaky_c) + + self.do = nn.Dropout(p_dropout) + + def forward(self, x): + y = self.gcn(x) + b, n, f = y.shape + y = self.bn1d(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + return y + + +class PostGCN(nn.Module): + def __init__(self, input_feature, hidden_feature, node_n): + super(PostGCN, self).__init__() + + self.input_feature = input_feature + self.hidden_feature = hidden_feature + self.node_n = node_n + + self.gcn = GraphConvolution(input_feature, hidden_feature, node_n=node_n) + # self.act_f = nn.Sigmoid() + # self.act_f = nn.LeakyReLU(option.leaky_c) # 最后一层加激活不确定对不对 + # self.act_f = nn.Tanh() + + def forward(self, x): + y = self.gcn(x) + # y = self.act_f(y) # 最后一层加激活不确定对不对 + return y + + +class SingleLeftLinear(nn.Module): + def __init__(self, input_feature, out_features, seq_len, p_dropout, leaky_c=0.2): + super(SingleLeftLinear, self).__init__() + self.input_feature = input_feature + self.out_features = out_features + self.seq_len = seq_len + + self.linear = nn.Linear(input_feature, out_features) # B, 35, 66 -> B, 35, 36 + self.bn = nn.BatchNorm1d(out_features * seq_len) + # self.act = nn.Tanh() + self.act = nn.LeakyReLU(leaky_c) + self.do = nn.Dropout(p_dropout) + + def forward(self, input): + ''' + + :param input: B, 66, 64 + :return: y: B, 66, 35 + ''' + input = input.permute(0, 2, 1).contiguous() # b, 64, 66 + y = self.linear(input) + b, n, f = y.shape + y = self.bn(y.view(b, -1)).view(b, n, f) + y = self.act(y) + y = self.do(y) + y = y.permute(0, 2, 1) + return y + + +class SingleRightLinear(nn.Module): + def __init__(self, input_feature, out_features, node_n, p_dropout, leaky_c=0.2): + super(SingleRightLinear, self).__init__() + + self.input_feature = input_feature + self.out_features = out_features + self.node_n = node_n + + self.linear = nn.Linear(input_feature, out_features) # B, 66, 35 -> B, 66, 128 + self.bn = nn.BatchNorm1d(node_n * out_features) + # self.act = nn.Tanh() + self.act = nn.LeakyReLU(leaky_c) + self.do = nn.Dropout(p_dropout) + + def forward(self, input): + ''' + + :param input: B, 66, 35 + :return: y: B, 66, 35 + ''' + y = self.linear(input) + b, n, f = y.shape + y = self.bn(y.view(b, -1)).view(b, n, f) + y = self.act(y) + y = self.do(y) + return y diff --git a/models/msr_gcn/msrgcn.py b/models/msr_gcn/msrgcn.py new file mode 100644 index 0000000..f6259da --- /dev/null +++ b/models/msr_gcn/msrgcn.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : msrgcn.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 16:46 +''' + +import torch +import torch.nn as nn +from .layers import SingleLeftLinear, SingleRightLinear, PreGCN, GC_Block, PostGCN +from .preprocessor import Proc +from .preprocessor import reverse_dct_torch + +class MSRGCN(nn.Module): + def __init__(self, args): + super(MSRGCN, self).__init__() + + + self.proc = Proc(args) + + self.args = args + + p_dropout = args.p_dropout + #leaky_c=0.2 + leaky_c=args.leaky_c + #final_out_noden=22 + final_out_noden=args.final_out_noden + #input_feature=35 + input_feature=args.input_feature + + + self.first_enhance = PreGCN(input_feature=input_feature, hidden_feature=64, node_n=final_out_noden * 3, + p_dropout=p_dropout, leaky_c=leaky_c) # 35, 64, 66, 0.5 + self.first_left = nn.Sequential( + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), # 64, 0.5, 66 + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + ) + + self.first_down = nn.Sequential( + SingleLeftLinear(input_feature=final_out_noden * 3, out_features=36, seq_len=64, p_dropout=p_dropout, + leaky_c=leaky_c), # 66, 128, 64 + ) + + self.second_enhance = PreGCN(input_feature=64, hidden_feature=128, node_n=36, p_dropout=p_dropout, + leaky_c=leaky_c) + self.second_left = nn.Sequential( + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + ) + + self.second_down = nn.Sequential( + SingleLeftLinear(input_feature=36, out_features=21, seq_len=128, p_dropout=p_dropout, leaky_c=leaky_c), + # 66, 36, 64 + ) + + self.third_enhance = PreGCN(input_feature=128, hidden_feature=256, node_n=21, p_dropout=p_dropout, + leaky_c=leaky_c) + self.third_left = nn.Sequential( + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + ) + + self.third_down = nn.Sequential( + SingleLeftLinear(input_feature=21, out_features=12, seq_len=256, p_dropout=p_dropout, leaky_c=leaky_c), + # 66, 36, 64 + ) + + self.fourth_enhance = PreGCN(input_feature=256, hidden_feature=512, node_n=12, p_dropout=p_dropout, + leaky_c=leaky_c) + self.fourth_left = nn.Sequential( + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), # 64, 0.5, 66 + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + ) + + # 右半部分 + self.fourth_right = nn.Sequential( + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + ) + self.fourth_up = nn.Sequential( + SingleLeftLinear(input_feature=12, out_features=21, seq_len=512, p_dropout=p_dropout, leaky_c=leaky_c), + SingleRightLinear(input_feature=512, out_features=256, node_n=21, p_dropout=p_dropout, leaky_c=leaky_c), + ) + + self.third_right_crop = nn.Sequential( + SingleLeftLinear(input_feature=42, out_features=21, seq_len=256, p_dropout=p_dropout, leaky_c=leaky_c), + ) + self.third_right = nn.Sequential( + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + ) + self.third_up = nn.Sequential( + SingleLeftLinear(input_feature=21, out_features=36, seq_len=256, p_dropout=p_dropout, leaky_c=leaky_c), + SingleRightLinear(input_feature=256, out_features=128, node_n=36, p_dropout=p_dropout, leaky_c=leaky_c) + ) + + self.second_right_crop = nn.Sequential( + SingleLeftLinear(input_feature=72, out_features=36, seq_len=128, p_dropout=p_dropout, leaky_c=leaky_c), + ) + self.second_right = nn.Sequential( + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + ) + self.second_up = nn.Sequential( + SingleLeftLinear(input_feature=36, out_features=final_out_noden * 3, seq_len=128, p_dropout=p_dropout, + leaky_c=leaky_c), + SingleRightLinear(input_feature=128, out_features=64, node_n=final_out_noden * 3, p_dropout=p_dropout, + leaky_c=leaky_c) + ) + + self.first_right_crop = nn.Sequential( + SingleLeftLinear(input_feature=final_out_noden * 3 * 2, out_features=final_out_noden * 3, seq_len=64, + p_dropout=p_dropout, leaky_c=leaky_c), + ) + self.first_right = nn.Sequential( + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), # 64, 0.5, 66 + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + ) + + # 右边出口部分 + self.first_extra = nn.Sequential( + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + GC_Block(in_features=64, p_dropout=p_dropout, node_n=final_out_noden * 3, leaky_c=leaky_c), + ) + self.first_out = PostGCN(input_feature=64, hidden_feature=input_feature, node_n=final_out_noden * 3) + + self.second_extra = nn.Sequential( + GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + # GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + # GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + # GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + # GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + # GC_Block(in_features=128, p_dropout=p_dropout, node_n=36, leaky_c=leaky_c), + ) + self.second_out = PostGCN(input_feature=128, hidden_feature=input_feature, node_n=36) + + self.third_extra = nn.Sequential( + GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + # GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + # GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + # GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + # GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + # GC_Block(in_features=256, p_dropout=p_dropout, node_n=21, leaky_c=leaky_c), + ) + self.third_out = PostGCN(input_feature=256, hidden_feature=input_feature, node_n=21) + + self.fourth_extra = nn.Sequential( + GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + # GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + # GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + # GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + # GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + # GC_Block(in_features=512, p_dropout=p_dropout, node_n=12, leaky_c=leaky_c), + ) + self.fourth_out = PostGCN(input_feature=512, hidden_feature=input_feature, node_n=12) + + def forward(self, inputs): + ''' + :param x: B, 66, 35 + :return: + ''' + + observed = inputs['observed_pose'].clone() + + device = inputs['observed_pose'].device + observed = self.proc(observed, True) + x_p32 = observed['p32'] + x_p22 = observed['p22'] + x_p12 = observed['p12'] + x_p7 = observed['p7'] + x_p4 = observed['p4'] + + enhance_first_left = self.first_enhance(x_p22) # B, 66, 64 + out_first_left = self.first_left(enhance_first_left) + enhance_first_left # 残差连接 + second_left = self.first_down(out_first_left) # 8, 36, 64 + + enhance_second_left = self.second_enhance(second_left) # 8, 36, 128 + out_second_left = self.second_left(enhance_second_left) + enhance_second_left # 残差连接 + third_left = self.second_down(out_second_left) + + enhance_third_left = self.third_enhance(third_left) # 8, 21, 256 + out_third_left = self.third_left(enhance_third_left) + enhance_third_left # 残差连接 + fourth_left = self.third_down(out_third_left) + + enhance_bottom = self.fourth_enhance(fourth_left) # 8, 12, 512 + bottom = self.fourth_left(enhance_bottom) + enhance_bottom # 残差连接 + + bottom_right = self.fourth_right(bottom) + bottom # 残差连接 + + in_third_right = self.fourth_up(bottom_right) + cat_third = torch.cat((out_third_left, in_third_right), dim=-2) + crop_third_right = self.third_right_crop(cat_third) + third_right = self.third_right(crop_third_right) + crop_third_right # 残差连接 + + in_second_right = self.third_up(third_right) + cat_second = torch.cat((out_second_left, in_second_right), dim=-2) + crop_second_right = self.second_right_crop(cat_second) + second_right = self.second_right(crop_second_right) + crop_second_right # 残差连接 + + in_first_right = self.second_up(second_right) + cat_first = torch.cat((out_first_left, in_first_right), dim=-2) + crop_first_right = self.first_right_crop(cat_first) + first_right = self.first_right(crop_first_right) + crop_first_right # 残差连接 + + fusion_first = self.first_extra(first_right) + first_right # 残差连接 + pred_first = self.first_out(fusion_first) + x_p22 # 大残差连接 + + fusion_second = self.second_extra(second_right) + second_right # 残差连接 + pred_second = self.second_out(fusion_second) + x_p12 # 大残差连接 + + fusion_third = self.third_extra(third_right) + third_right # 两重残差连接 + pred_third = self.third_out(fusion_third) + x_p7 # 大残差连接 + + fusion_fourth = self.fourth_extra(bottom_right) + bottom_right # 残差连接 + pred_fourth = self.fourth_out(fusion_fourth) + x_p4 # 大残差连接 + + + + pred_first = (pred_first+1)/2 + pred_first = pred_first *(self.proc.global_max-self.proc.global_min)+self.proc.global_min + pred_first = reverse_dct_torch(pred_first, self.proc.idct_m.to(device), self.proc.input_n+self.proc.output_n) + + pred_second = (pred_second+1)/2 + pred_second = pred_second *(self.proc.global_max-self.proc.global_min)+self.proc.global_min + pred_second = reverse_dct_torch(pred_second, self.proc.idct_m.to(device), self.proc.input_n+self.proc.output_n) + + pred_third = (pred_third+1)/2 + pred_third = pred_third *(self.proc.global_max-self.proc.global_min)+self.proc.global_min + pred_third = reverse_dct_torch(pred_third, self.proc.idct_m.to(device), self.proc.input_n+self.proc.output_n) + + pred_fourth = (pred_fourth+1)/2 + pred_fourth = pred_fourth *(self.proc.global_max-self.proc.global_min)+self.proc.global_min + pred_fourth = reverse_dct_torch(pred_fourth, self.proc.idct_m.to(device), self.proc.input_n+self.proc.output_n) + + + temp = pred_first.permute(0,2,1) + + pred_pose = torch.zeros((pred_first.shape[0], self.args.pred_frames_num, 32 * 3)).to(device) + + pred_pose[:,:,self.proc.dim_used] = temp[:,self.proc.input_n:,:] + pred_pose[:,:,self.proc.dim_repeat_32] = temp[:,self.proc.input_n:,self.proc.dim_repeat_22] + pred_pose[:,:,self.proc.dim_replace] = inputs['observed_pose'][:,-1:,self.proc.dim_replace] + return { + "pred_metric_pose":pred_pose,"pred_pose": pred_first, "p22": pred_first, "p12": pred_second, "p7": pred_third, "p4": pred_fourth + } + + diff --git a/models/msr_gcn/preprocessor.py b/models/msr_gcn/preprocessor.py new file mode 100644 index 0000000..3341fdc --- /dev/null +++ b/models/msr_gcn/preprocessor.py @@ -0,0 +1,118 @@ +import numpy as np +from torch import nn +import torch +from .utils import data_utils + + +def get_dct_matrix(N, device): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return torch.FloatTensor(dct_m).to(device), torch.FloatTensor(idct_m).to(device) + + +def dct_transform_torch(data, dct_m, dct_n): + ''' + B, 60, 35 + ''' + batch_size, features, seq_len = data.shape + + data = data.contiguous().view(-1, seq_len) # [180077*60, 35] + data = data.permute(1, 0) # [35, b*60] + + out_data = torch.matmul(dct_m[:dct_n, :], data) # [dct_n, 180077*60] + out_data = out_data.permute(1, 0).contiguous().view(-1, features, dct_n) # [b, 60, dct_n] + return out_data + +def reverse_dct_torch(dct_data, idct_m, seq_len): + ''' + B, 60, 35 + ''' + batch_size, features, dct_n = dct_data.shape + + dct_data = dct_data.permute(2, 0, 1).contiguous().view(dct_n, -1) # dct_n, B*60 + out_data = torch.matmul(idct_m[:, :dct_n], dct_data).contiguous().view(seq_len, batch_size, -1).permute(1, 2, 0) + return out_data + + +class Proc(nn.Module): + def __init__(self, args): + super(Proc, self).__init__() + + self.dct_used = args.dct_used + self.input_n = args.input_n + self.output_n = args.output_n + self.dct_m, self.idct_m = get_dct_matrix(self.input_n + self.output_n, args.device) + self.global_min = args.global_min + self.global_max = args.global_max + + self.args = args + + self.dim_repeat_22 = [27, 28, 29, 27, 28, 29, 42, 43, 44, 48, 49, 50, 57, 58, 59, 63, 64, 65] + self.dim_repeat_32 = [48,49,50, 72,73,74, 60,61,62, 69,70,71, 84,85,86, 93,94,95] + + self.dim_replace = [0,1,2, 3,4,5, 18,19,20, 33,34,35] + + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + self.dim_used = dimensions_to_use + + self.Index2212 = [[0], [1, 2, 3], [4], [5, 6, 7], [8, 9], [10, 11], [12], [13], [14, 15, 16], [17], [18], [19, 20, 21]] + self.Index127 = [[0, 1], [2, 3], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11]] + self.Index74 = [[0, 2], [1, 2], [3, 4], [5, 6]] + + def down(self, x, index): + N, features, seq_len = x.shape + my_data = x.reshape(N, -1, 3, seq_len) # x, 22, 3, 10 + da = torch.zeros((N, len(index), 3, seq_len)).to(x.device) # x, 12, 3, 10 + for i in range(len(index)): + da[:, i, :, :] = torch.mean(my_data[:, index[i], :, :], dim=1) + da = da.reshape(N, -1, seq_len) + return da + + def forward(self, x, preproc): + if preproc: + x32 = x.permute((0,2,1)) + x32 = torch.cat([x32, x32[:,:,-1].unsqueeze(-1).repeat(1,1,self.output_n)], dim=2) + + x22 = x32[:, self.dim_used, :] + x12 = self.down(x22, self.Index2212) + x7 = self.down(x12, self.Index127) + x4 = self.down(x7, self.Index74) + + x32 = dct_transform_torch(x32, self.dct_m, self.dct_used) + x22 = dct_transform_torch(x22, self.dct_m, self.dct_used) + x12 = dct_transform_torch(x12, self.dct_m, self.dct_used) + x7 = dct_transform_torch(x7, self.dct_m, self.dct_used) + x4 = dct_transform_torch(x4, self.dct_m, self.dct_used) + + x32 = (x32-self.global_min)/(self.global_max-self.global_min) + x22 = (x22-self.global_min)/(self.global_max-self.global_min) + x12 = (x12-self.global_min)/(self.global_max-self.global_min) + x7 = (x7-self.global_min)/(self.global_max-self.global_min) + x4 = (x4-self.global_min)/(self.global_max-self.global_min) + + x32=x32*2-1 + x22=x22*2-1 + x12=x12*2-1 + x7=x7*2-1 + x4=x4*2-1 + + # print(x32.shape, x22.shape, x12.shape, x7.shape, x4.shape) + + # extend inputs + dct + global min and max + return { + "p32":x32, + "p22":x22, + "p12":x12, + "p7":x7, + "p4":x4 + } + else: + return x diff --git a/models/msr_gcn/utils/__init__.py b/models/msr_gcn/utils/__init__.py new file mode 100644 index 0000000..8019229 --- /dev/null +++ b/models/msr_gcn/utils/__init__.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : __init__.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 16:34 +''' + +from .dct import get_dct_matrix, reverse_dct_torch, reverse_dct_numpy +from .data_utils import define_actions, define_actions_cmu +from .draw_pictures import draw_pic_gt_pred, draw_pic_single_2d, draw_pic_single \ No newline at end of file diff --git a/models/msr_gcn/utils/data_utils.py b/models/msr_gcn/utils/data_utils.py new file mode 100644 index 0000000..753f6ba --- /dev/null +++ b/models/msr_gcn/utils/data_utils.py @@ -0,0 +1,839 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : data_utils.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 16:59 +''' + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import torch +import os +from . import forward_kinematics + + +def rotmat2euler(R): + """ + Converts a rotation matrix to Euler angles + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 + + Args + R: a 3x3 rotation matrix + Returns + eul: a 3x1 Euler angle representation of R + """ + if R[0, 2] == 1 or R[0, 2] == -1: + # special case + E3 = 0 # set arbitrarily + dlta = np.arctan2(R[0, 1], R[0, 2]); + + if R[0, 2] == -1: + E2 = np.pi / 2; + E1 = E3 + dlta; + else: + E2 = -np.pi / 2; + E1 = -E3 + dlta; + + else: + E2 = -np.arcsin(R[0, 2]) + E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) + E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) + + eul = np.array([E1, E2, E3]); + return eul + + +def rotmat2quat(R): + """ + Converts a rotation matrix to a quaternion + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 + + Args + R: 3x3 rotation matrix + Returns + q: 1x4 quaternion + """ + rotdiff = R - R.T; + + r = np.zeros(3) + r[0] = -rotdiff[1, 2] + r[1] = rotdiff[0, 2] + r[2] = -rotdiff[0, 1] + sintheta = np.linalg.norm(r) / 2; + r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); + + costheta = (np.trace(R) - 1) / 2; + + theta = np.arctan2(sintheta, costheta); + + q = np.zeros(4) + q[0] = np.cos(theta / 2) + q[1:] = r0 * np.sin(theta / 2) + return q + + +def rotmat2expmap(R): + return quat2expmap(rotmat2quat(R)); + + +def expmap2rotmat(r): + """ + Converts an exponential map angle to a rotation matrix + Matlab port to python for evaluation purposes + I believe this is also called Rodrigues' formula + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m + + Args + r: 1x3 exponential map + Returns + R: 3x3 rotation matrix + """ + theta = np.linalg.norm(r) + r0 = np.divide(r, theta + np.finfo(np.float32).eps) + r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) + r0x = r0x - r0x.T + R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); + return R + + +def quat2expmap(q): + """ + Converts a quaternion to an exponential map + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 + + Args + q: 1x4 quaternion + Returns + r: 1x3 exponential map + Raises + ValueError if the l2 norm of the quaternion is not close to 1 + """ + if (np.abs(np.linalg.norm(q) - 1) > 1e-3): + raise (ValueError, "quat2expmap: input quaternion is not norm 1") + + sinhalftheta = np.linalg.norm(q[1:]) + coshalftheta = q[0] + + r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); + theta = 2 * np.arctan2(sinhalftheta, coshalftheta) + theta = np.mod(theta + 2 * np.pi, 2 * np.pi) + + if theta > np.pi: + theta = 2 * np.pi - theta + r0 = -r0 + + r = r0 * theta + return r + + +def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): + """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 + + Args + normalizedData: nxd matrix with normalized data + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + origData: data originally used to + """ + T = normalizedData.shape[0] + D = data_mean.shape[0] + + origData = np.zeros((T, D), dtype=np.float32) + dimensions_to_use = [] + for i in range(D): + if i in dimensions_to_ignore: + continue + dimensions_to_use.append(i) + dimensions_to_use = np.array(dimensions_to_use) + + if one_hot: + origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] + else: + origData[:, dimensions_to_use] = normalizedData + + # potentially ineficient, but only done once per experiment + stdMat = data_std.reshape((1, D)) + stdMat = np.repeat(stdMat, T, axis=0) + meanMat = data_mean.reshape((1, D)) + meanMat = np.repeat(meanMat, T, axis=0) + origData = np.multiply(origData, stdMat) + meanMat + return origData + + +def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): + """ + Converts the output of the neural network to a format that is more easy to + manipulate for, e.g. conversion to other format or visualization + + Args + poses: The output from the TF model. A list with (seq_length) entries, + each with a (batch_size, dim) output + Returns + poses_out: A tensor of size (batch_size, seq_length, dim) output. Each + batch is an n-by-d sequence of poses. + """ + seq_len = len(poses) + if seq_len == 0: + return [] + + batch_size, dim = poses[0].shape + + poses_out = np.concatenate(poses) + poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) + poses_out = np.transpose(poses_out, [1, 0, 2]) + + poses_out_list = [] + for i in xrange(poses_out.shape[0]): + poses_out_list.append( + unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) + + return poses_out_list + + +def readCSVasFloat(filename): + """ + Borrowed from SRNN code. Reads a csv and returns a float matrix. + https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 + + Args + filename: string. Path to the csv file + Returns + returnArray: the read data in a float32 matrix + """ + returnArray = [] + lines = open(filename).readlines() + for line in lines: + line = line.strip().split(',') + if len(line) > 0: + returnArray.append(np.array([np.float32(x) for x in line])) + + returnArray = np.array(returnArray) + return returnArray + + +def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): + """ + Normalize input data by removing unused dimensions, subtracting the mean and + dividing by the standard deviation + + Args + data: nx99 matrix with data to normalize + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dim_to_use: vector with dimensions used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + data_out: the passed data matrix, but normalized + """ + data_out = {} + nactions = len(actions) + + if not one_hot: + # No one-hot encoding... no need to do anything special + for key in data.keys(): + data_out[key] = np.divide((data[key] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + + else: + # TODO hard-coding 99 dimensions for un-normalized human poses + for key in data.keys(): + data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) + + return data_out + + +def normalization_stats(completeData): + """" + Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 + + Args + completeData: nx99 matrix with data to normalize + Returns + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + dimensions_to_use: vector with dimensions used by the model + """ + data_mean = np.mean(completeData, axis=0) + data_std = np.std(completeData, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + + data_std[dimensions_to_ignore] = 1.0 + + return data_mean, data_std, dimensions_to_ignore, dimensions_to_use + + +def define_actions(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + if action in actions: + return [action] + + if action == "all": + return actions[:2] + + if action == "all_srnn": + return ["walking", "eating", "smoking", "discussion"] + + raise (ValueError, "Unrecognized action: %d" % action) + + +"""all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" + + +def define_actions_cmu(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", + "washwindow"] + if action in actions: + return [action] + + if action == "all": + return actions + + raise (ValueError, "Unrecognized action: %d" % action) + + +def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, sample_rate=2, data_std=0, data_mean=0, is_test=False, device="cuda:0", test_manner="all"): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = torch.from_numpy(action_sequence).float() + + xyz = expmap2xyz_torch_cmu(exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + action_sequence = xyz + + even_list = range(0, n, sample_rate) + the_sequence = np.array(action_sequence[even_list, :]) # x, 114 + num_frames = len(the_sequence) + # 训练集,整体测试集 + if (not is_test) or (is_test and test_manner == "all"): + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + # 测试集 随机挑选 8 + elif test_manner == "8": + # 水滴测试 + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] # 35, 114 + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + # data_std[dimensions_to_ignore] = 1.0 + # data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def rotmat2euler_torch(R, device="cuda:0"): + """ + Converts a rotation matrix to euler angles + batch pytorch version ported from the corresponding numpy method above + + :param R:N*3*3 + :return: N*3 + """ + n = R.data.shape[0] + eul = torch.zeros(n, 3).float() + idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() + idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() + if len(idx_spec1) > 0: + R_spec1 = R[idx_spec1, :, :] + eul_spec1 = torch.zeros(len(idx_spec1), 3).float() + eul_spec1[:, 2] = 0 + eul_spec1[:, 1] = -np.pi / 2 + delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) + eul_spec1[:, 0] = delta + eul[idx_spec1, :] = eul_spec1 + + if len(idx_spec2) > 0: + R_spec2 = R[idx_spec2, :, :] + eul_spec2 = torch.zeros(len(idx_spec2), 3).float() + + eul_spec2[:, 2] = 0 + eul_spec2[:, 1] = np.pi / 2 + delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) + eul_spec2[:, 0] = delta + eul[idx_spec2] = eul_spec2 + + idx_remain = np.arange(0, n) + idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() + if len(idx_remain) > 0: + R_remain = R[idx_remain, :, :] + eul_remain = torch.zeros(len(idx_remain), 3).float() + eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) + eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), + R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) + eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), + R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) + eul[idx_remain, :] = eul_remain + + return eul + + +def rotmat2quat_torch(R, device="cuda:0"): + """ + Converts a rotation matrix to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N * 3 * 3 + :return: N * 4 + """ + rotdiff = R - R.transpose(1, 2) + r = torch.zeros_like(rotdiff[:, 0]) + r[:, 0] = -rotdiff[:, 1, 2] + r[:, 1] = rotdiff[:, 0, 2] + r[:, 2] = -rotdiff[:, 0, 1] + r_norm = torch.norm(r, dim=1) + sintheta = r_norm / 2 + r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) + t1 = R[:, 0, 0] + t2 = R[:, 1, 1] + t3 = R[:, 2, 2] + costheta = (t1 + t2 + t3 - 1) / 2 + theta = torch.atan2(sintheta, costheta) + q = torch.zeros(R.shape[0], 4).float() + q[:, 0] = torch.cos(theta / 2) + q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) + + return q + + +def expmap2quat_torch(exp): + """ + Converts expmap to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N*3 + :return: N*4 + """ + theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) + v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) + sinhalf = torch.sin(theta / 2) + coshalf = torch.cos(theta / 2) + q1 = torch.mul(v, sinhalf.repeat(1, 3)) + q = torch.cat((coshalf, q1), dim=1) + return q + + +def expmap2rotmat_torch(r, device="cuda:0"): + """ + Converts expmap matrix to rotation + batch pytorch version ported from the corresponding method above + :param r: N*3 + :return: N*3*3 + """ + theta = torch.norm(r, 2, 1) + r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) + r1 = torch.zeros_like(r0).repeat(1, 3) + r1[:, 1] = -r0[:, 2] + r1[:, 2] = r0[:, 1] + r1[:, 5] = -r0[:, 0] + r1 = r1.view(-1, 3, 3) + r1 = r1 - r1.transpose(1, 2) + n = r1.data.shape[0] + R = torch.eye(3, 3).repeat(n, 1, 1).float().to(device) + torch.mul( + torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( + (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) + return R + + +def expmap2xyz_torch(expmap, device="cuda:0"): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() + xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd, device) + return xyz + + +def expmap2xyz_torch_cmu(expmap): + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables_cmu() + xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) + return xyz + + +def load_data(path_to_dataset, subjects, actions, sample_rate, seq_len, input_n=10, data_mean=None, data_std=None): + """ + adapted from + https://github.com/una-dinosauria/human-motion-prediction/src/data_utils.py#L216 + + :param path_to_dataset: path of dataset + :param subjects: + :param actions: + :param sample_rate: + :param seq_len: past frame length + future frame length + :param is_norm: normalize the expmap or not + :param data_std: standard deviation of the expmap + :param data_mean: mean of the expmap + :param input_n: past frame length + :return: + """ + + sampled_seq = [] + complete_seq = [] + # actions_all = define_actions("all") + # one_hot_all = np.eye(len(actions_all)) + for subj in subjects: + for action_idx in np.arange(len(actions)): + action = actions[action_idx] + if not (subj == 5): + for subact in [1, 2]: # subactions + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, subact) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + the_sequence1 = np.array(action_sequence[even_list, :]) + num_frames1 = len(the_sequence1) + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, 2) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + the_sequence2 = np.array(action_sequence[even_list, :]) + num_frames2 = len(the_sequence2) + + fs_sel1, fs_sel2 = find_indices_srnn(num_frames1, num_frames2, seq_len, input_n=input_n) + seq_sel1 = the_sequence1[fs_sel1, :] + seq_sel2 = the_sequence2[fs_sel2, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel1 + sampled_seq = np.concatenate((sampled_seq, seq_sel2), axis=0) + complete_seq = the_sequence1 + complete_seq = np.append(complete_seq, the_sequence2, axis=0) + + # if is not testing or validation then get the data statistics + if not (subj == 5 and subj == 11): + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def load_data_3d(path_to_dataset, subjects, actions, sample_rate, seq_len, device="cuda:0", test_manner="all"): + """ + + adapted from + https://github.com/una-dinosauria/human-motion-prediction/src/data_utils.py#L216 + :param path_to_dataset: + :param subjects: + :param actions: + :param sample_rate: + :param seq_len: + :return: + """ + + sampled_seq = [] + complete_seq = [] + for subj in subjects: + for action_idx in np.arange(len(actions)): + action = actions[action_idx] + if not (subj == 5): + for subact in [1, 2]: # subactions + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, subact) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + num_frames = len(even_list) + the_sequence = np.array(action_sequence[even_list, :]) + the_seq = torch.from_numpy(the_sequence).float() + # remove global rotation and translation + the_seq[:, 0:6] = 0 + p3d = expmap2xyz_torch(the_seq) + the_sequence = p3d.view(num_frames, -1).cpu().data.numpy() + + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + + # print([num_frames, len(seq_sel)]) + + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(action_sequence[even_list, :]) + the_seq1 = torch.from_numpy(the_sequence1).float() + the_seq1[:, 0:6] = 0 + p3d1 = expmap2xyz_torch(the_seq1) + the_sequence1 = p3d1.view(num_frames1, -1).cpu().data.numpy() + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(path_to_dataset, subj, action, 2) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(action_sequence[even_list, :]) + the_seq2 = torch.from_numpy(the_sequence2).float() + the_seq2[:, 0:6] = 0 + p3d2 = expmap2xyz_torch(the_seq2) + the_sequence2 = p3d2.view(num_frames2, -1).cpu().data.numpy() + + if test_manner == "all": + # # 全部数据用来测试 + fs_sel1 = [np.arange(i, i + seq_len) for i in range(num_frames1 - 100)] + fs_sel2 = [np.arange(i, i + seq_len) for i in range(num_frames2 - 100)] + elif test_manner == "8": + # 随机取 8 个 + fs_sel1, fs_sel2 = find_indices_srnn(num_frames1, num_frames2, seq_len) + + seq_sel1 = the_sequence1[fs_sel1, :] + seq_sel2 = the_sequence2[fs_sel2, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel1 + sampled_seq = np.concatenate((sampled_seq, seq_sel2), axis=0) + complete_seq = the_sequence1 + complete_seq = np.append(complete_seq, the_sequence2, axis=0) + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel1), axis=0) + sampled_seq = np.concatenate((sampled_seq, seq_sel2), axis=0) + complete_seq = np.append(complete_seq, the_sequence1, axis=0) + complete_seq = np.append(complete_seq, the_sequence2, axis=0) + + # ignore constant joints and joints at same position with other joints + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + # 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 21, 22, 25, 26, 27, 29, 30 + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + return sampled_seq, dimensions_to_ignore, dimensions_to_use + + +def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 4): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + + +if __name__ == "__main__": + actions = define_actions("all") + load_data("F:\\model_report_data\\data\\human36mData3D\\others\\h3.6m\\dataset", [1, 6, 7, 8, 9], actions, 2, 35, input_n=10, data_mean=None, data_std=None) + + pass \ No newline at end of file diff --git a/models/msr_gcn/utils/dct.py b/models/msr_gcn/utils/dct.py new file mode 100644 index 0000000..b37981b --- /dev/null +++ b/models/msr_gcn/utils/dct.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : dct.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 21:18 +''' +import torch +import numpy as np + +# ********************** 离散余弦变换基础帧 **************************** + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + +def dct_transform_numpy(data, dct_m, dct_n): + ''' + B, 60, 35 + ''' + batch_size, features, seq_len = data.shape + data = data.reshape(-1, seq_len) # [180077*60, 35] + data = data.transpose(1, 0) # [35, b*60] + + out_data = np.matmul(dct_m[:dct_n, :], data) # [dct_n, 180077*60] + out_data = out_data.transpose().reshape((-1, features, dct_n)) # [b, 60, dct_n] + return out_data + +def reverse_dct_numpy(dct_data, idct_m, seq_len): + ''' + B, 60, 35 + ''' + batch_size, features, dct_n = dct_data.shape + + dct_data = dct_data.transpose(2, 0, 1).reshape((dct_n, -1)) # dct_n, B*60 + out_data = np.matmul(idct_m[:, :dct_n], dct_data).reshape((seq_len, batch_size, -1)).transpose(1, 2, 0) + return out_data + +def dct_transform_torch(data, dct_m, dct_n): + ''' + B, 60, 35 + ''' + batch_size, features, seq_len = data.shape + + data = data.contiguous().view(-1, seq_len) # [180077*60, 35] + data = data.permute(1, 0) # [35, b*60] + + out_data = torch.matmul(dct_m[:dct_n, :], data) # [dct_n, 180077*60] + out_data = out_data.permute(1, 0).contiguous().view(-1, features, dct_n) # [b, 60, dct_n] + return out_data + +def reverse_dct_torch(dct_data, idct_m, seq_len): + ''' + B, 60, 35 + ''' + batch_size, features, dct_n = dct_data.shape + + dct_data = dct_data.permute(2, 0, 1).contiguous().view(dct_n, -1) # dct_n, B*60 + out_data = torch.matmul(idct_m[:, :dct_n], dct_data).contiguous().view(seq_len, batch_size, -1).permute(1, 2, 0) + return out_data \ No newline at end of file diff --git a/models/msr_gcn/utils/draw_pictures.py b/models/msr_gcn/utils/draw_pictures.py new file mode 100644 index 0000000..8c01c36 --- /dev/null +++ b/models/msr_gcn/utils/draw_pictures.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : draw_pictures.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 21:22 +''' +import numpy as np +import matplotlib + +matplotlib.use('agg') +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import seaborn as sns + + +def draw_pic_single(mydata, I, J, LR, full_path): + # 22, 3 + # I + # J + # LR + + # # **************************** + # # 调整坐标,规范数据格式,:这里由于转换过来后本身应满足需求,不需要专门 revert_coordinate 或者交换坐标轴 + mydata = mydata[:, [0, 2, 1]] + # # **************************** + + x = mydata[:, 0] + y = mydata[:, 1] + z = mydata[:, 2] + + plt.figure() + ax = plt.subplot(111, projection='3d') + ax.grid(False) + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_zlabel('z') + ax.set_xlim3d([-1000, 1000]) + ax.set_ylim3d([-1000, 1000]) + ax.set_zlim3d([-1000, 1000]) + + ax.scatter(x, y, z, c='b') + + # (250, 40, 40) #FA2828 红 + # (245, 125, 125) #F57D7D 粉 + # (11, 11, 11) #0B0B0B 黑色 + # (180, 180, 180) #B4B4B4 灰色 + + # Make connection matrix + for i in np.arange(len(I)): + x, y, z = [np.array([mydata[I[i], j], mydata[J[i], j]]) for j in range(3)] + ax.plot(x, y, z, lw=2, c='#B4B4B4' if LR[i] else '#B4B4B4') + + plt.savefig(full_path) + plt.close() + +def draw_pic_single_2d(mydata, I, J, LR, full_path): + x = mydata[:, 0] + y = mydata[:, 1] + + plt.figure(figsize=(6, 6)) + + plt.scatter(x, y, c='r') + + # (250, 40, 40) #FA2828 红 + # (245, 125, 125) #F57D7D 粉 + # (11, 11, 11) #0B0B0B 黑色 + # (180, 180, 180) #B4B4B4 灰色 + + # Make connection matrix + for i in np.arange(len(I)): + x, y = [np.array([mydata[I[i], j], mydata[J[i], j]]) for j in range(2)] + # ax.plot(x, y, z, lw=2, color='#FA2828' if LR[i] else '#F57D7D') + # ax.plot(x, y, z, lw=2, color='#0B0B0B' if LR[i] else '#B4B4B4') + plt.plot(x, y, lw=2, color='g' if LR[i] else 'b') + + plt.xlim((-800, 800)) + plt.ylim((-1500, 800)) + # 设置坐标轴名称 + plt.xlabel('x') + plt.ylabel('y') + # 设置坐标轴刻度 + my_x_ticks = np.arange(-1000, 1000, 200) + my_y_ticks = np.arange(-1000, 1000, 200) + plt.xticks(my_x_ticks) + plt.yticks(my_y_ticks) + plt.grid(False) + + plt.savefig(full_path) + plt.close(1) + +def draw_pic_gt_pred(gt, pred, I, J, LR, full_path): + # # **************************** + # # 调整坐标,规范数据格式,:这里由于转换过来后本身应满足需求,不需要专门 revert_coordinate 或者交换坐标轴 + gt = gt[:, [0, 2, 1]] + pred = pred[:, [0, 2, 1]] + + # # **************************** + + plt.figure() + ax = plt.subplot(111, projection='3d') + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_zlabel('z') + ax.set_xlim3d([-1000, 1000]) + ax.set_ylim3d([-1000, 1000]) + ax.set_zlim3d([-1000, 1000]) + + ax.scatter(gt[:, 0], gt[:, 1], gt[:, 2], c='k', linewidths=1) + ax.scatter(pred[:, 0], pred[:, 1], pred[:, 2], c='r', linewidths=1) + + # (250, 40, 40) #FA2828 红 + # (245, 125, 125) #F57D7D 粉 + # (11, 11, 11) #0B0B0B 黑色 + # (180, 180, 180) #B4B4B4 灰色 + + # Make connection matrix + for i in np.arange(len(I)): + x, y, z = [np.array([gt[I[i], j], gt[J[i], j]]) for j in range(3)] + ax.plot(x, y, z, lw=1, color='#0B0B0B' if LR[i] else '#B4B4B4') + for i in np.arange(len(I)): + x, y, z = [np.array([pred[I[i], j], pred[J[i], j]]) for j in range(3)] + ax.plot(x, y, z, lw=2, color='#FA2828' if LR[i] else '#F57D7D') + + plt.savefig(full_path) + plt.close() + +def draw_pic_gt_pred_2d(gt, pred, I, J, LR, full_path): + + plt.figure(figsize=(6, 6)) + + plt.scatter(gt[:, 0], gt[:, 1], c='k', linewidths=1) + plt.scatter(pred[:, 0], pred[:, 1], c='r', linewidths=1) + + # (250, 40, 40) #FA2828 红 + # (245, 125, 125) #F57D7D 粉 + # (11, 11, 11) #0B0B0B 黑色 + # (180, 180, 180) #B4B4B4 灰色 + + # Make connection matrix + for i in np.arange(len(I)): + x, y = [np.array([gt[I[i], j], gt[J[i], j]]) for j in range(2)] + plt.plot(x, y, lw=1, color='#0B0B0B' if LR[i] else '#B4B4B4') + for i in np.arange(len(I)): + x, y = [np.array([pred[I[i], j], pred[J[i], j]]) for j in range(2)] + plt.plot(x, y, lw=2, color='#FA2828' if LR[i] else '#F57D7D') + + plt.xlim((-800, 800)) + plt.ylim((-1500, 800)) + # 设置坐标轴名称 + plt.xlabel('x') + plt.ylabel('y') + # 设置坐标轴刻度 + my_x_ticks = np.arange(-1000, 1000, 200) + my_y_ticks = np.arange(-1000, 1000, 200) + plt.xticks(my_x_ticks) + plt.yticks(my_y_ticks) + plt.grid(False) + + plt.savefig(full_path) + plt.close(1) + + +if __name__ == "__main__": + import numpy as np + + data = np.random.randn(220, 220) + diff --git a/models/msr_gcn/utils/forward_kinematics.py b/models/msr_gcn/utils/forward_kinematics.py new file mode 100644 index 0000000..08f6d2f --- /dev/null +++ b/models/msr_gcn/utils/forward_kinematics.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : forward_kinematics.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 17:00 +''' +import numpy as np +import torch +from torch.autograd.variable import Variable +from . import data_utils + + +def fkl(angles, parent, offset, rotInd, expmapInd): + """ + Convert joint angles and bone lenghts into the 3d points of a person. + + adapted from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 + + which originaly based on expmap2xyz.m, available at + https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m + Args + angles: 99-long vector with 3d position and 3d joint angles in expmap format + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + Returns + xyz: 32x3 3d points that represent a person in 3d space + """ + + assert len(angles) == 99 + + # Structure that indicates parents for each joint + njoints = 32 + xyzStruct = [dict() for x in range(njoints)] + + for i in np.arange(njoints): + + # if not rotInd[i]: # If the list is empty + # xangle, yangle, zangle = 0, 0, 0 + # else: + # xangle = angles[rotInd[i][0] - 1] + # yangle = angles[rotInd[i][1] - 1] + # zangle = angles[rotInd[i][2] - 1] + if i == 0: + xangle = angles[0] + yangle = angles[1] + zangle = angles[2] + thisPosition = np.array([xangle, yangle, zangle]) + else: + thisPosition = np.array([0, 0, 0]) + + r = angles[expmapInd[i]] + + thisRotation = data_utils.expmap2rotmat(r) + + if parent[i] == -1: # Root node + xyzStruct[i]['rotation'] = thisRotation + xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition + else: + xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ + xyzStruct[parent[i]]['xyz'] + xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) + + xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] + xyz = np.array(xyz).squeeze() + # xyz = xyz[:, [0, 2, 1]] + # xyz = xyz[:,[2,0,1]] + + return xyz + + +def _some_variables(): + """ + borrowed from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 + + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, + 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 + + offset = np.array( + [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, + -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, + 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, + 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, + 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, + 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, + 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, + 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[5, 6, 4], + [8, 9, 7], + [11, 12, 10], + [14, 15, 13], + [17, 18, 16], + [], + [20, 21, 19], + [23, 24, 22], + [26, 27, 25], + [29, 30, 28], + [], + [32, 33, 31], + [35, 36, 34], + [38, 39, 37], + [41, 42, 40], + [], + [44, 45, 43], + [47, 48, 46], + [50, 51, 49], + [53, 54, 52], + [56, 57, 55], + [], + [59, 60, 58], + [], + [62, 63, 61], + [65, 66, 64], + [68, 69, 67], + [71, 72, 70], + [74, 75, 73], + [], + [77, 78, 76], + []] + + expmapInd = np.split(np.arange(4, 100) - 1, 32) + + return parent, offset, rotInd, expmapInd + + +def _some_variables_cmu(): + """ + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, + 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 + + offset = 70 * np.array( + [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, + 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, + 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, + -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, + 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, + 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, + 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, + 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, + 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, + 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, + -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, + -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[6, 5, 4], + [9, 8, 7], + [12, 11, 10], + [15, 14, 13], + [18, 17, 16], + [21, 20, 19], + [], + [24, 23, 22], + [27, 26, 25], + [30, 29, 28], + [33, 32, 31], + [36, 35, 34], + [], + [39, 38, 37], + [42, 41, 40], + [45, 44, 43], + [48, 47, 46], + [51, 50, 49], + [54, 53, 52], + [], + [57, 56, 55], + [60, 59, 58], + [63, 62, 61], + [66, 65, 64], + [69, 68, 67], + [72, 71, 70], + [], + [75, 74, 73], + [], + [78, 77, 76], + [81, 80, 79], + [84, 83, 82], + [87, 86, 85], + [90, 89, 88], + [93, 92, 91], + [], + [96, 95, 94], + []] + posInd = [] + for ii in np.arange(38): + if ii == 0: + posInd.append([1, 2, 3]) + else: + posInd.append([]) + + expmapInd = np.split(np.arange(4, 118) - 1, 38) + + return parent, offset, posInd, expmapInd + + +def fkl_torch(angles, parent, offset, rotInd, expmapInd, device="cuda:0"): + """ + pytorch version of fkl. + + convert joint angles to joint locations + batch pytorch version of the fkl() method above + :param angles: N*99 + :param parent: + :param offset: + :param rotInd: + :param expmapInd: + :return: N*joint_n*3 + """ + n = angles.data.shape[0] + j_n = offset.shape[0] + p3d = torch.from_numpy(offset).float().unsqueeze(0).repeat(n, 1, 1).to(device) + angles = angles[:, 3:].contiguous().view(-1, 3) + R = data_utils.expmap2rotmat_torch(angles, device).view(n, j_n, 3, 3) + for i in np.arange(1, j_n): + if parent[i] > 0: + R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() + p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] + return p3d + + +def main(): + # Load all the data + parent, offset, rotInd, expmapInd = _some_variables() + + # numpy implementation + # with h5py.File('samples.h5', 'r') as h5f: + # expmap_gt = h5f['expmap/gt/walking_0'][:] + # expmap_pred = h5f['expmap/preds/walking_0'][:] + expmap_pred = np.array( + [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, + -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, + 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, + 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, + -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, + -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + expmap_gt = np.array( + [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, + -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, + 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, + 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, + -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, + -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) + xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) + + exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).float() + xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) + xyz = xyz.cpu().data.numpy() + print(xyz) + + +if __name__ == '__main__': + main() diff --git a/models/msr_gcn/utils/multi_scale.py b/models/msr_gcn/utils/multi_scale.py new file mode 100644 index 0000000..14cfa0d --- /dev/null +++ b/models/msr_gcn/utils/multi_scale.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +@project : MSRGCN +@file : multi_scale.py +@author : Droliven +@contact : droliven@163.com +@ide : PyCharm +@time : 2021-07-27 20:30 +''' + +import numpy as np + +def p_down(mydata, Index): + ''' + leng, features, seq_len + ''' + leng, features, seq_len = mydata.shape + mydata = mydata.reshape(leng, -1, 3, seq_len) # x, 22, 3, 35 + + da = np.zeros((leng, len(Index), 3, seq_len)) # x, 12, 3, 35 + for i in range(len(Index)): + da[:, i, :, :] = np.mean(mydata[:, Index[i], :, :], axis=1) + da = da.reshape(leng, -1, seq_len) + return da + +def downs_from_22(downs, down_key): + + for key1, key2, key3 in down_key: + downs[key2] = p_down(downs[key1], key3) + return downs + diff --git a/models/pgbig/__init__.py b/models/pgbig/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/pgbig/base_model.py b/models/pgbig/base_model.py new file mode 100644 index 0000000..e7d4918 --- /dev/null +++ b/models/pgbig/base_model.py @@ -0,0 +1,241 @@ +import torch.nn as nn +import torch +from torch.nn.parameter import Parameter +import math + +class GraphConvolution(nn.Module): + """ + adapted from : https://github.com/tkipf/gcn/blob/92600c39797c2bfb61a508e52b88fb554df30177/gcn/layers.py#L132 + """ + + def __init__(self, in_c, out_c, node_n = 22, seq_len = 35, bias=True): + super(GraphConvolution, self).__init__() + self.in_features = in_c + self.out_features = out_c + self.att = Parameter(torch.FloatTensor(node_n, node_n)) + self.weight_seq = Parameter(torch.FloatTensor(seq_len, seq_len)) + + self.weight_c = Parameter(torch.FloatTensor(in_c, out_c)) + + if bias: + self.bias = Parameter(torch.FloatTensor(seq_len)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + self.support = None + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.att.size(1)) + self.weight_c.data.uniform_(-stdv, stdv) + self.weight_seq.data.uniform_(-stdv, stdv) + self.att.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, input): + #input [b,c,22,35] + + #先进行图卷积再进行空域卷积 + # [b,c,22,35] -> [b,35,22,c] -> [b,35,22,c] + support = torch.matmul(self.att, input.permute(0, 3, 2, 1)) + + # [b,35,22,c] -> [b,35,22,64] + output_gcn = torch.matmul(support, self.weight_c) + + + #进行空域卷积 + # [b,35,22,64] -> [b,22,64,35] + output_fc = torch.matmul(output_gcn.permute(0, 2, 3, 1), self.weight_seq).permute(0, 2, 1, 3).contiguous() + + + if self.bias is not None: + return (output_fc + self.bias) + else: + return output_fc + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GC_Block(nn.Module): + def __init__(self, channal, p_dropout, bias=True, node_n=22, seq_len = 20): + """ + Define a residual block of GCN + """ + super(GC_Block, self).__init__() + self.in_features = channal + self.out_features = channal + + self.gc1 = GraphConvolution(channal, channal, node_n=node_n, seq_len=seq_len, bias=bias) + self.bn1 = nn.BatchNorm1d(channal*node_n*seq_len) + + self.gc2 = GraphConvolution(channal, channal, node_n=node_n, seq_len=seq_len, bias=bias) + self.bn2 = nn.BatchNorm1d(channal*node_n*seq_len) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + y = self.gc2(y) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn2(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + return y + x + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + +class GCN(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1 ): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(out_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=out_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=out_channal, out_c=in_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + # b, n, f = y.shape + # y = self.bn2(y.view(b, -1)).view(b, n, f) + # y = self.act_f(y) + # y = self.do(y) + + return y + x + +class GCN_encoder(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1 ): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN_encoder, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(out_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=out_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=out_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(out_channal*node_n*seq_len) + self.reshape_conv = torch.nn.Conv2d(in_channels=in_channal, out_channels=out_channal, kernel_size=(1, 1)) + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + + def forward(self, x): + + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + # b, c, n, l = y.shape + # y = self.bn2(y.view(b, -1)).view(b, c, n, l).contiguous() + # y = self.act_f(y) + # y = self.do(y) + + return y + self.reshape_conv(x) + +class GCN_decoder(nn.Module): + def __init__(self, in_channal, out_channal, node_n=22, seq_len=20, p_dropout=0.3, num_stage=1): + """ + :param input_feature: num of input feature + :param hidden_feature: num of hidden feature + :param p_dropout: drop out prob. + :param num_stage: number of residual blocks + :param node_n: number of nodes in graph + """ + super(GCN_decoder, self).__init__() + self.num_stage = num_stage + + self.gc1 = GraphConvolution(in_c=in_channal, out_c=in_channal, node_n=node_n, seq_len=seq_len) + self.bn1 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block(channal=in_channal, p_dropout=p_dropout, node_n=node_n, seq_len=seq_len)) + + self.gcbs = nn.ModuleList(self.gcbs) + self.gc7 = GraphConvolution(in_c=in_channal, out_c=out_channal, node_n=node_n, seq_len=seq_len) + self.bn2 = nn.BatchNorm1d(in_channal*node_n*seq_len) + + self.reshape_conv = torch.nn.Conv2d(in_channels=in_channal, out_channels=out_channal, kernel_size=(1, 1)) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + y = self.gc1(x) + b, c, n, l = y.shape + y = y.view(b, -1).contiguous() + y = self.bn1(y).view(b, c, n, l).contiguous() + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + y = self.gc7(y) + self.reshape_conv(x) + + return y + + diff --git a/models/pgbig/data_proc.py b/models/pgbig/data_proc.py new file mode 100644 index 0000000..50eeae1 --- /dev/null +++ b/models/pgbig/data_proc.py @@ -0,0 +1,145 @@ +import numpy as np +import torch +from torch import nn + + +def joint_to_index(x): + return np.concatenate((x * 3, x * 3 + 1, x * 3 + 2)) + + +class AMASS_3DPW_values(): + def __init__(self) -> None: + self.mean = np.array([[[ 144.42061, -471.00433, 42.905945, -144.2189, -471.00433, + 55.37049, 5.0257893, 235.50217, 6.188506, 131.57523, + -916.6374, -73.56259, -129.137, -914.8206, -67.33688, + 5.8155527, 278.29196, 43.45168, 189.25381, -942.00867, + 31.58287, -185.46811, -942.00867, 38.146023, 1.6395822, + 504.07196, 65.04476, 90.96787, 467.06006, 46.06653, + -79.57573, 464.56763, 35.583405, 5.7321978, 544.3716, + 132.00195, 189.30196, 464.18073, 46.495617, -181.78586, + 461.8248, 38.285446, 242.19247, 208.86894, 10.837954, + -243.21066, 220.56078, 20.73184, 256.45264, 66.64482, + 116.55112, -262.37643, 88.037315, 129.05185 ]]]) + self.std = np.array([[[ 48.352272, 68.6015, 119.17078, 47.49278, 69.03037, 120.7153, + 9.933628, 16.11266, 32.15347, 81.19508, 148.52235, 160.55476, + 78.806435, 148.95927, 161.60782, 15.046006, 26.999517, 41.232426, + 90.12439, 126.81438, 174.0965, 87.97808, 128.31987, 173.8965, + 38.010742, 43.834454, 91.36834, 28.467258, 66.382034, 72.003075, + 26.970959, 66.33471, 69.758385, 48.895977, 62.34938, 100.590385, + 33.747925, 76.94056, 85.15882, 32.314583, 77.06175, 83.645386, + 80.88272, 109.25045, 123.5628, 79.029915, 115.18032, 127.966995, + 158.545, 217.86617, 164.67949, 156.79645, 235.94897, 175.8384 ]]]) + +class Human36m_values(): + def __init__(self) -> None: + self.dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + + self.mean = np.array([[[-107.9520, -334.9428, 159.4178, -59.2900, -708.5010, 61.4318, + -61.6391, -757.3103, 189.2157, -72.0266, -761.0327, 251.2684, + 151.7538, -326.8112, 161.4840, 134.0947, -709.5225, 71.7927, + 153.4157, -744.0466, 200.7195, 163.8421, -737.7441, 261.8018, + -17.9565, 210.8857, -12.5731, -30.5735, 429.6271, 36.1767, + -43.2606, 489.0777, 114.7583, -54.7775, 578.9327, 88.4990, + 108.9527, 394.7472, 26.0654, 237.0195, 213.8401, 44.9180, + 188.2216, 135.0727, 139.9878, 152.3083, 163.3067, 155.1163, + 196.3242, 118.3158, 182.5405, -163.6815, 375.3079, 23.2578, + -266.1268, 186.6490, 53.2938, -217.2098, 156.2352, 160.8916, + -200.4095, 191.2718, 165.2301, -223.5151, 149.2325, 211.9896]]]) + self.std = np.array([[[65.4117, 166.9468, 160.5147, 109.2458, 295.7622, 210.9699, 122.5746, + 308.4443, 228.9709, 131.0754, 310.0372, 235.9644, 74.9162, 174.3366, + 163.9575, 129.1666, 296.0691, 209.0041, 154.0681, 305.1154, 224.3635, + 165.3411, 304.1239, 230.2749, 19.6905, 71.2422, 64.0733, 52.6362, + 150.2302, 141.1058, 68.3720, 177.7844, 164.2342, 78.0215, 203.7356, + 192.8816, 47.0527, 137.0687, 138.8337, 72.1145, 127.8964, 170.1875, + 151.9798, 210.0934, 199.3142, 155.3852, 219.3135, 193.1652, 191.3546, + 254.2903, 225.2465, 45.0912, 135.5994, 133.7429, 74.3784, 133.9870, + 160.7077, 143.9800, 235.9862, 196.2391, 147.1276, 232.4836, 188.2000, + 189.1858, 308.0274, 235.1181]]]) + + index_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + self.index_to_ignore = joint_to_index(index_to_ignore) + + index_to_equal = np.array([13, 19, 22, 13, 27, 30]) + self.index_to_equal = joint_to_index(index_to_equal) + + index_to_copy = np.array([0, 1, 6, 11]) + self.index_to_copy = joint_to_index(index_to_copy) + +human36m = Human36m_values() +amass_3dpw = AMASS_3DPW_values() + + +class Human36m_Preprocess(nn.Module): + def __init__(self, args): + super(Human36m_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + observed_pose = observed_pose[:, :, human36m.dim_used] + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class Human36m_Postprocess(nn.Module): + def __init__(self, args): + super(Human36m_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + + x = torch.zeros([pred_pose.shape[0], pred_pose.shape[1], 96]).to(self.args.device) + x[:, :, human36m.dim_used] = pred_pose + x[:, :, human36m.index_to_copy] = observed_pose[:, -1:, human36m.index_to_copy] + x[:, :, human36m.index_to_ignore] = x[:, :, human36m.index_to_equal] + return x + +class AMASS_3DPW_Preprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class AMASS_3DPW_Postprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + return pred_pose + +class Preprocess(nn.Module): + def __init__(self, args): + super(Preprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, normal=True): + return observed_pose + +class Postprocess(nn.Module): + def __init__(self, args): + super(Postprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, pred_pose, normal=True): + return pred_pose \ No newline at end of file diff --git a/models/pgbig/pgbig.py b/models/pgbig/pgbig.py new file mode 100644 index 0000000..2de820a --- /dev/null +++ b/models/pgbig/pgbig.py @@ -0,0 +1,177 @@ +from torch.nn import Module +import torch +from models.pgbig import base_model as BaseBlock +from models.pgbig.data_proc import Preprocess, Postprocess, Human36m_Postprocess, Human36m_Preprocess, AMASS_3DPW_Postprocess, AMASS_3DPW_Preprocess + +from models.pgbig import util + +""" +在model1的基础上添加st_gcn,修改 bn +""" + +class MultiStageModel(Module): + def __init__(self, opt): + super(MultiStageModel, self).__init__() + + self.opt = opt + self.kernel_size = opt.kernel_size + self.d_model = opt.d_model + # self.seq_in = seq_in + self.dct_n = opt.dct_n + # ks = int((kernel_size + 1) / 2) + assert opt.kernel_size == 10 + + self.in_features = opt.in_features + self.num_stage = opt.num_stage + self.node_n = self.in_features//3 + + self.encoder_layer_num = 1 + self.decoder_layer_num = 2 + + self.input_n = opt.obs_frames_num + self.output_n = opt.pred_frames_num + + self.gcn_encoder1 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=opt.drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder1 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n*2, + p_dropout=opt.drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder2 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=opt.drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder2 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=opt.drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder3 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=opt.drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder3 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=opt.drop_out, + num_stage=self.decoder_layer_num) + + self.gcn_encoder4 = BaseBlock.GCN_encoder(in_channal=3, out_channal=self.d_model, + node_n=self.node_n, + seq_len=self.dct_n, + p_dropout=opt.drop_out, + num_stage=self.encoder_layer_num) + + self.gcn_decoder4 = BaseBlock.GCN_decoder(in_channal=self.d_model, out_channal=3, + node_n=self.node_n, + seq_len=self.dct_n * 2, + p_dropout=opt.drop_out, + num_stage=self.decoder_layer_num) + + def forward(self, src, input_n=10, output_n=10, itera=1): + output_n = self.output_n + input_n = self.input_n + + bs = src.shape[0] + # [2000,512,22,20] + dct_n = self.dct_n + idx = list(range(self.kernel_size)) + [self.kernel_size -1] * output_n + # [b,20,66] + input_gcn = src[:, idx].clone() + + dct_m, idct_m = util.get_dct_matrix(input_n + output_n) + dct_m = torch.from_numpy(dct_m).float().to(self.opt.device) + idct_m = torch.from_numpy(idct_m).float().to(self.opt.device) + + # [b,20,66] -> [b,66,20] + input_gcn_dct = torch.matmul(dct_m[:dct_n], input_gcn).permute(0, 2, 1) + + # [b,66,20]->[b,22,3,20]->[b,3,22,20]->[b,512,22,20] + input_gcn_dct = input_gcn_dct.reshape(bs, self.node_n, -1, self.dct_n).permute(0, 2, 1, 3) + + #stage1 + latent_gcn_dct = self.gcn_encoder1(input_gcn_dct) + #[b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_1 = self.gcn_decoder1(latent_gcn_dct)[:, :, :, :dct_n] + + #stage2 + latent_gcn_dct = self.gcn_encoder2(output_dct_1) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_2 = self.gcn_decoder2(latent_gcn_dct)[:, :, :, :dct_n] + + #stage3 + latent_gcn_dct = self.gcn_encoder3(output_dct_2) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_3 = self.gcn_decoder3(latent_gcn_dct)[:, :, :, :dct_n] + + #stage4 + latent_gcn_dct = self.gcn_encoder4(output_dct_3) + # [b,512,22,20] -> [b, 512, 22, 40] + latent_gcn_dct = torch.cat((latent_gcn_dct, latent_gcn_dct), dim=3) + output_dct_4 = self.gcn_decoder4(latent_gcn_dct)[:, :, :, :dct_n] + + output_dct_1 = output_dct_1.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_2 = output_dct_2.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_3 = output_dct_3.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + output_dct_4 = output_dct_4.permute(0, 2, 1, 3).reshape(bs, -1, dct_n) + + # [b,20 66]->[b,20 66] + output_1 = torch.matmul(idct_m[:, :dct_n], output_dct_1.permute(0, 2, 1)) + output_2 = torch.matmul(idct_m[:, :dct_n], output_dct_2.permute(0, 2, 1)) + output_3 = torch.matmul(idct_m[:, :dct_n], output_dct_3.permute(0, 2, 1)) + output_4 = torch.matmul(idct_m[:, :dct_n], output_dct_4.permute(0, 2, 1)) + + return output_4, output_3, output_2, output_1 + + +class PGBIG(Module): + def __init__(self, args): + super(PGBIG, self).__init__() + self.args = args + self.in_n = args.obs_frames_num + self.out_n = args.pred_frames_num + + if args.pre_post_process == 'human3.6m': + self.preprocess = Human36m_Preprocess(args).to(args.device) + self.postprocess = Human36m_Postprocess(args).to(args.device) + elif args.pre_post_process == 'AMASS' or args.pre_post_process == '3DPW': + self.preprocess = AMASS_3DPW_Preprocess(args).to(args.device) + self.postprocess = AMASS_3DPW_Postprocess(args).to(args.device) + else: + self.preprocess = Preprocess(args).to(args.device) + self.postprocess = Postprocess(args).to(args.device) + + for p in self.preprocess.parameters(): + p.requires_grad = False + + for p in self.postprocess.parameters(): + p.requires_grad = False + + self.net = MultiStageModel(args).to(args.device) + + + def forward(self, batch): + observed_data = batch["observed_pose"].to(self.args.device) + observed_data = self.preprocess(observed_data, normal=True) + p4, p3, p2, p1 = self.net(observed_data, input_n=self.in_n, output_n=self.out_n, itera=1) + + return { + 'pred_pose': self.postprocess(batch['observed_pose'], p4[:, self.in_n:, :], normal=True), + 'p1': p1, 'p2': p2, 'p3': p3, 'p4': p4 + } + + diff --git a/models/pgbig/util.py b/models/pgbig/util.py new file mode 100644 index 0000000..616d19d --- /dev/null +++ b/models/pgbig/util.py @@ -0,0 +1,67 @@ +import torch +import numpy as np + + +def lr_decay_mine(optimizer, lr_now, gamma): + lr = lr_now * gamma + for param_group in optimizer.param_groups: + param_group['lr'] = lr + return lr + + +def orth_project(cam, pts): + """ + + :param cam: b*[s,tx,ty] + :param pts: b*k*3 + :return: + """ + s = cam[:, 0:1].unsqueeze(1).repeat(1, pts.shape[1], 2) + T = cam[:, 1:].unsqueeze(1).repeat(1, pts.shape[1], 1) + + return torch.mul(s, pts[:, :, :2] + T) + + +def opt_cam(x, x_target): + """ + :param x: N K 3 or N K 2 + :param x_target: N K 3 or N K 2 + :return: + """ + if x_target.shape[2] == 2: + vis = torch.ones_like(x_target[:, :, :1]) + else: + vis = (x_target[:, :, :1] > 0).float() + vis[:, :2] = 0 + xxt = x_target[:, :, :2] + xx = x[:, :, :2] + x_vis = vis * xx + xt_vis = vis * xxt + num_vis = torch.sum(vis, dim=1, keepdim=True) + mu1 = torch.sum(x_vis, dim=1, keepdim=True) / num_vis + mu2 = torch.sum(xt_vis, dim=1, keepdim=True) / num_vis + xmu = vis * (xx - mu1) + xtmu = vis * (xxt - mu2) + + eps = 1e-6 * torch.eye(2).float().cuda() + Ainv = torch.inverse(torch.matmul(xmu.transpose(1, 2), xmu) + eps.unsqueeze(0)) + B = torch.matmul(xmu.transpose(1, 2), xtmu) + tmp_s = torch.matmul(Ainv, B) + scale = ((tmp_s[:, 0, 0] + tmp_s[:, 1, 1]) / 2.0).unsqueeze(1) + + scale = torch.clamp(scale, 0.7, 10) + trans = mu2.squeeze(1) / scale - mu1.squeeze(1) + opt_cam = torch.cat([scale, trans], dim=1) + return opt_cam + + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m diff --git a/models/potr/__init__.py b/models/potr/__init__.py new file mode 100644 index 0000000..3636e0e --- /dev/null +++ b/models/potr/__init__.py @@ -0,0 +1,10 @@ +import os, sys +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") +from potr.utils import xavier_init, normal_init + + +INIT_FUNC = { + 'xavier': xavier_init, + 'normal': normal_init +} \ No newline at end of file diff --git a/models/potr/conv_1d_encoder.py b/models/potr/conv_1d_encoder.py new file mode 100644 index 0000000..39b9c74 --- /dev/null +++ b/models/potr/conv_1d_encoder.py @@ -0,0 +1,130 @@ +############################################################################### +# Pose Transformers (POTR): Human Motion Prediction with Non-Autoregressive +# Transformers +# +# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ +# Written by +# Angel Martinez , +# +# This file is part of +# POTR: Human Motion Prediction with Non-Autoregressive Transformers +# +# POTR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3 as +# published by the Free Software Foundation. +# +# POTR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with POTR. If not, see . +############################################################################### +"""Model of 1D convolutions for encoding pose sequences.""" + + +import numpy as np +import os +import sys + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +import torch +import torch.nn as nn + + +class Pose1DEncoder(nn.Module): + def __init__(self, input_channels=3, output_channels=128, n_joints=21): + super(Pose1DEncoder, self).__init__() + self._input_channels = input_channels + self._output_channels = output_channels + self._n_joints = n_joints + self.init_model() + + + def init_model(self): + self._model = nn.Sequential( + nn.Conv1d(in_channels=self._input_channels, out_channels=32, kernel_size=7), + nn.BatchNorm1d(32), + nn.ReLU(True), + nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3), + nn.BatchNorm1d(32), + nn.ReLU(True), + nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3), + nn.BatchNorm1d(64), + nn.ReLU(True), + nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3), + nn.BatchNorm1d(64), + nn.ReLU(True), + nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3), + nn.BatchNorm1d(128), + nn.ReLU(True), + nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3), + nn.BatchNorm1d(128), + nn.ReLU(True), + nn.Conv1d(in_channels=128, out_channels=self._output_channels, kernel_size=3), + nn.BatchNorm1d(self._output_channels), + nn.ReLU(True), + nn.Conv1d(in_channels=self._output_channels, out_channels=self._output_channels, kernel_size=3) + ) + + def forward(self, x): + """ + Args: + x: [batch_size, seq_len, skeleton_dim]. + """ + # inputs to model is [batch_size, channels, n_joints] + # transform the batch to [batch_size*seq_len, dof, n_joints] + bs, seq_len, dim = x.size() + dof = dim//self._n_joints + x = x.view(bs*seq_len, dof, self._n_joints) + + # [batch_size*seq_len, dof, n_joints] + x = self._model(x) + # [batch_size, seq_len, output_channels] + x = x.view(bs, seq_len, self._output_channels) + + return x + + +class Pose1DTemporalEncoder(nn.Module): + def __init__(self, input_channels, output_channels): + super(Pose1DTemporalEncoder, self).__init__() + self._input_channels = input_channels + self._output_channels = output_channels + self.init_model() + + def init_model(self): + self._model = nn.Sequential( + nn.Conv1d( + in_channels=self._input_channels, out_channels=32, kernel_size=3, padding=1), + nn.BatchNorm1d(32), + nn.ReLU(True), + nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1), + nn.BatchNorm1d(32), + nn.ReLU(True), + nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1), + nn.BatchNorm1d(64), + nn.ReLU(True), + nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1), + nn.BatchNorm1d(64), + nn.ReLU(True), + nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1), + nn.BatchNorm1d(128), + nn.ReLU(True), + nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1), + nn.BatchNorm1d(128), + nn.ReLU(True), + nn.Conv1d(in_channels=128, out_channels=self._output_channels, kernel_size=3, padding=1), + nn.BatchNorm1d(self._output_channels), + nn.ReLU(True), + nn.Conv1d(in_channels=self._output_channels, out_channels=self._output_channels, kernel_size=3, padding=1) + ) + + def forward(self, x): + x = torch.transpose(x, 1,2) + x = self._model(x) + x = torch.transpose(x, 1, 2) + return x diff --git a/models/potr/data_process.py b/models/potr/data_process.py new file mode 100644 index 0000000..d35e19c --- /dev/null +++ b/models/potr/data_process.py @@ -0,0 +1,127 @@ +import torch +from torch import nn +import numpy as np +from utils.others import rotmat_to_euler, expmap_to_rotmat +_MAJOR_JOINTS = [ + 0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 24, 25, 26, 27 +] + +ACTIONS = { + "walking": 0, + "eating": 1, + "smoking": 2, + "discussion": 3, + "directions": 4, + "greeting": 5, + "phoning": 6, + "posing": 7, + "purchases": 8, + "sitting": 9, + "sittingdown": 10, + "takingphoto": 11, + "waiting": 12, + "walkingdog": 13, + "walkingtogether": 14 +} + +def compute_difference_matrix(self, src_seq, tgt_seq): + """Computes a matrix of euclidean difference between sequences. + + Args: + src_seq: Numpy array of shape [src_len, dim]. + tgt_seq: Numpy array of shape [tgt_len, dim]. + + Returns: + A matrix of shape [src_len, tgt_len] with euclidean distances. + """ + B = src_seq.shape[0] + src_len = src_seq.shape[1] # M + tgt_len = tgt_seq.shape[1] # N + + distance = np.zeros((B, src_len, tgt_len), dtype=np.float32) + for b in range(B): + for i in range(src_len): + for j in range(tgt_len): + distance[b, i, j] = np.linalg.norm(src_seq[b, i]-tgt_seq[b, j]) + + row_sums = distance.sum(axis=2) + distance_norm = distance / row_sums[:, :, np.newaxis] + distance_norm = 1.0 - distance_norm + + return distance, distance_norm + +def train_preprocess(inputs, args): + + obs_pose = inputs['observed_pose'] + future_pose = inputs['future_pose'] + + obs_pose = obs_pose.reshape(obs_pose.shape[0], obs_pose.shape[1], -1, args.pose_dim) + future_pose = future_pose.reshape(future_pose.shape[0], future_pose.shape[1], -1, args.pose_dim) + + n_major_joints = len(_MAJOR_JOINTS) + obs_pose = obs_pose[:, :, _MAJOR_JOINTS] + future_pose = future_pose[:, :, _MAJOR_JOINTS] + + obs_pose = obs_pose.reshape(*obs_pose.shape[:2], -1) + future_pose = future_pose.reshape(*future_pose.shape[:2], -1) + + src_seq_len = args.obs_frames_num - 1 + + if args.include_last_obs: + src_seq_len += 1 + + encoder_inputs = np.zeros((obs_pose.shape[0], src_seq_len, args.pose_dim * n_major_joints), dtype=np.float32) + decoder_inputs = np.zeros((obs_pose.shape[0], args.future_frames_num, args.pose_dim * n_major_joints), dtype=np.float32) + decoder_outputs = np.zeros((obs_pose.shape[0], args.future_frames_num, args.pose_dim * n_major_joints), dtype=np.float32) + + data_sel = torch.cat((obs_pose, future_pose), dim=1) + + encoder_inputs[:, :, 0:args.pose_dim * n_major_joints] = data_sel[:, 0:src_seq_len,:].cpu() + decoder_inputs[:, :, 0:args.pose_dim * n_major_joints] = \ + data_sel[:, src_seq_len:src_seq_len + args.future_frames_num, :].cpu() + + decoder_outputs[:, :, 0:args.pose_dim * n_major_joints] = data_sel[:, args.obs_frames_num:, 0:args.pose_dim * n_major_joints].cpu() + + if args.pad_decoder_inputs: + query = decoder_inputs[:, 0:1, :] + decoder_inputs = np.repeat(query, args.future_frames_num, axis=1) + + + model_outputs = { + 'encoder_inputs': torch.tensor(encoder_inputs).reshape((*encoder_inputs.shape[:-1], args.n_major_joints, args.pose_dim)).to(args.device), + 'decoder_inputs': torch.tensor(decoder_inputs).reshape((*decoder_inputs.shape[:-1], args.n_major_joints, args.pose_dim)).to(args.device), + 'decoder_outputs': torch.tensor(decoder_outputs).reshape((*decoder_outputs.shape[:-1], args.n_major_joints, args.pose_dim)).to(args.device) + } + if args.predict_activity: + model_outputs['action_ids'] = torch.tensor([ACTIONS[a] for a in inputs['action']]).to(args.device) + + return model_outputs + + +def convert_to_euler(action_sequence_, n_major_joints, pose_format, is_normalized=True): + """Convert the input exponential maps to euler angles. + + Args: + action_sequence: Pose exponential maps [batch_size, sequence_length, pose_size]. + The input should not contain the one hot encoding in the vector. + """ + B, S, D = action_sequence_.shape + rotmats = action_sequence_.reshape((B*S, n_major_joints, -1)) + if pose_format == 'expmap': + rotmats = expmap_to_rotmat(rotmats) + + euler_maps = rotmat_to_euler(rotmats) + + euler_maps = euler_maps.reshape((B, S, -1)) + return euler_maps + +def post_process_to_euler(norm_seq, n_major_joints, n_h36m_joints, pose_format): + + batch_size, seq_length, n_major_joints, pose_dim = norm_seq.shape + norm_seq = norm_seq.reshape(batch_size, seq_length, n_major_joints*pose_dim) + euler_seq = convert_to_euler(norm_seq, n_major_joints, pose_format) + euler_seq = euler_seq.reshape((batch_size, seq_length, n_major_joints, 3)) + p_euler_padded = np.zeros([batch_size, seq_length, n_h36m_joints, 3]) + p_euler_padded[:, :, _MAJOR_JOINTS] = euler_seq + p_euler_padded = np.reshape(p_euler_padded, [batch_size, seq_length, -1]) + return p_euler_padded \ No newline at end of file diff --git a/models/potr/pose_encoder_decoder.py b/models/potr/pose_encoder_decoder.py new file mode 100644 index 0000000..e008944 --- /dev/null +++ b/models/potr/pose_encoder_decoder.py @@ -0,0 +1,34 @@ +import torch.nn as nn +import potr.utils as utils + +import os, sys +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +from potr import INIT_FUNC +from potr.pose_gcn import SimpleEncoder + +def pose_encoder_gcn(args): + encoder = SimpleEncoder( + n_nodes=args.n_major_joints, + input_features=9 if args.pose_format == 'rotmat' else 3, + model_dim=args.model_dim, + p_dropout=args.dropout + ) + + return encoder + + +def pose_decoder_mlp(args): + init_fn = INIT_FUNC[args.init_fn_name] + pose_decoder = nn.Linear(args.model_dim, args.pose_dim*args.n_major_joints) + utils.weight_init(pose_decoder, init_fn=init_fn) + return pose_decoder + +def select_pose_encoder_decoder_fn(args): + + if args.pose_embedding_type.lower() == 'gcn_enc': + return pose_encoder_gcn, pose_decoder_mlp + + else: + raise ValueError('Unknown pose embedding {}'.format(args.pose_embedding_type)) \ No newline at end of file diff --git a/models/potr/pose_gcn.py b/models/potr/pose_gcn.py new file mode 100644 index 0000000..eda3a91 --- /dev/null +++ b/models/potr/pose_gcn.py @@ -0,0 +1,384 @@ +import os +import sys +import torch.nn as nn +import torch +from torch.nn.parameter import Parameter +import math +import numpy as np + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +import potr.utils as utils + + +class GraphConvolution(nn.Module): + """Implements graph convolutions.""" + + def __init__(self, in_features, out_features, output_nodes=48, bias=False): + """Constructor. + + The graph convolutions can be defined as \sigma(AxHxW), where A is the + adjacency matrix, H is the feature representation from previous layer + and W is the wegith of the current layer. The dimensions of such martices + A\in R^{NxN}, H\in R^{NxM} and W\in R^{MxO} where + - N is the number of nodes + - M is the number of input features per node + - O is the number of output features per node + + Args: + in_features: Number of input features per node. + out_features: Number of output features per node. + output_nodes: Number of nodes in the graph. + """ + super(GraphConvolution, self).__init__() + self.in_features = in_features + self.out_features = out_features + self._output_nodes = output_nodes + # W\in R^{MxO} + self.weight = Parameter(torch.FloatTensor(in_features, out_features)) + # A\in R^{NxN} + self.att = Parameter(torch.FloatTensor(output_nodes, output_nodes)) + if bias: + self.bias = Parameter(torch.FloatTensor(out_features)) + else: + self.register_parameter('bias', None) + self.reset_parameters() + + def reset_parameters(self): + stdv = 1. / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + self.att.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.uniform_(-stdv, stdv) + + def forward(self, x): + """Forward pass. + + Args: + x: [batch_size, n_nodes, input_features] + Returns: + Feature representation computed from inputs. + Shape is [batch_size, n_nodes, output_features]. + """ + # [batch_size, input_dim, output_features] + # HxW = {NxM}x{MxO} = {NxO} + support = torch.matmul(x, self.weight) + # [batch_size, n_nodes, output_features] + # = {NxN}x{NxO} = {NxO} + output = torch.matmul(self.att, support) + + if self.bias is not None: + return output + self.bias + else: + return output + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class GC_Block(nn.Module): + """Residual block with graph convolutions. + + The implementation uses the same number of input features for outputs. + """ + + def __init__(self, in_features, p_dropout, output_nodes=48, bias=False): + """Constructor. + + Args: + in_features: Number of input and output features. + p_dropout: Dropout used in the layers. + output_nodes: Number of output nodes in the graph. + """ + super(GC_Block, self).__init__() + self.in_features = in_features + self.out_features = in_features + + self.gc1 = GraphConvolution( + in_features, in_features, + output_nodes=output_nodes, + bias=bias + ) + self.bn1 = nn.BatchNorm1d(output_nodes * in_features) + self.gc2 = GraphConvolution( + in_features, in_features, + output_nodes=output_nodes, + bias=bias + ) + self.bn2 = nn.BatchNorm1d(output_nodes * in_features) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + def forward(self, x): + """Forward pass of the residual module""" + y = self.gc1(x) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + y = self.gc2(y) + b, n, f = y.shape + y = self.bn2(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + return y + x + + def __repr__(self): + return self.__class__.__name__ + ' (' \ + + str(self.in_features) + ' -> ' \ + + str(self.out_features) + ')' + + +class PoseGCN(nn.Module): + def __init__(self, + input_features=128, + output_features=3, + model_dim=128, + output_nodes=21, + p_dropout=0.1, + num_stage=1): + """Constructor. + + Args: + input_feature: num of input feature of the graph nodes. + model_dim: num of hidden features of the generated embeddings. + p_dropout: dropout probability + num_stage: number of residual blocks in the network. + output_nodes: number of nodes in graph. + """ + super(PoseGCN, self).__init__() + self.num_stage = num_stage + self._n_nodes = output_nodes + self._model_dim = model_dim + self._output_features = output_features + self._hidden_dim = 512 + + self._front = nn.Sequential( + nn.Linear(model_dim, output_nodes*self._hidden_dim), + nn.Dropout(p_dropout) + ) + utils.weight_init(self._front, init_fn=utils.xavier_init) + + self.gc1 = GraphConvolution( + self._hidden_dim, + self._hidden_dim, + output_nodes=output_nodes + ) + self.bn1 = nn.BatchNorm1d(output_nodes * self._hidden_dim) + + self.gcbs = [] + for i in range(num_stage): + self.gcbs.append(GC_Block( + self._hidden_dim, + p_dropout=p_dropout, + output_nodes=output_nodes) + ) + + self.gcbs = nn.ModuleList(self.gcbs) + + self.gc7 = GraphConvolution( + self._hidden_dim, + output_features, + output_nodes=output_nodes + ) + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + gcn_params = filter(lambda p: p.requires_grad, self.parameters()) + nparams = sum([np.prod(p.size()) for p in gcn_params]) + print('[INFO] ({}) GCN has {} params!'.format(self.__class__.__name__, nparams)) + + + def preprocess(self, x): + if len(x.size()) < 3: + _, D = x.size() + # seq_len, batch_size, input_dim + x = x.view(self._seq_len, -1, D) + # [batch_size, seq_len, input_dim] + x = torch.transpose(x, 0, 1) + # [batch_size, input_dim, seq_len] + x = torch.transpose(x, 1, 2) + return x + + return x + + def postprocess(self, y): + """Flattents the input tensor. + Args: + y: Input tensor of shape [batch_size, n_nodes, output_features]. + """ + y = y.view(-1, self._n_nodes*self._output_features) + return y + + def forward(self, x): + """Forward pass of network. + + Args: + x: [batch_size, model_dim]. + """ + # [batch_size, model_dim*n_nodes] + x = self._front(x) + x = x.view(-1, self._n_nodes, self._hidden_dim) + + # [batch_size, n_joints, model_dim] + y = self.gc1(x) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + for i in range(self.num_stage): + y = self.gcbs[i](y) + + # [batch_size, n_joints, output_features] + y = self.gc7(y) + # y = y + x + + # [seq_len*batch_size, input_dim] + y = self.postprocess(y) + + return y + +class SimpleEncoder(nn.Module): + def __init__(self, + n_nodes, + input_features, + model_dim, + p_dropout): + """Constructor. + + Args: + input_dim: Dimension of the input vector. This will be equivalent to + the number of nodes in the graph, each node with 1 feature each. + model_dim: Dimension of the output vector to produce. + p_dropout: Dropout to be applied for regularization. + """ + super(SimpleEncoder, self).__init__() + #The graph convolutions can be defined as \sigma(AxHxW), where A is the + #A\in R^{NxN} x H\in R^{NxM} x W\in R ^{MxO} + self._input_features = input_features + self._output_nodes = n_nodes + self._hidden_dim = 512 + self._model_dim = model_dim + self._num_stage = 1 + + print('[INFO] ({}) Hidden dimension: {}!'.format( + self.__class__.__name__, self._hidden_dim)) + self.gc1 = GraphConvolution( + in_features=self._input_features, + out_features=self._hidden_dim, + output_nodes=self._output_nodes + ) + self.bn1 = nn.BatchNorm1d(self._output_nodes*self._hidden_dim) + self.gc2 = GraphConvolution( + in_features=self._hidden_dim, + out_features=model_dim, + output_nodes=self._output_nodes + ) + + self.gcbs = [] + for i in range(self._num_stage): + self.gcbs.append(GC_Block( + self._hidden_dim, + p_dropout=p_dropout, + output_nodes=self._output_nodes) + ) + self.gcbs = nn.ModuleList(self.gcbs) + + self.do = nn.Dropout(p_dropout) + self.act_f = nn.Tanh() + + self._back = nn.Sequential( + nn.Linear(model_dim*self._output_nodes, model_dim), + nn.Dropout(p_dropout) + ) + utils.weight_init(self._back, init_fn=utils.xavier_init) + + gcn_params = filter(lambda p: p.requires_grad, self.parameters()) + nparams = sum([np.prod(p.size()) for p in gcn_params]) + print('[INFO] ({}) GCN has {} params!'.format(self.__class__.__name__, nparams)) + + def forward(self, x): + """Forward pass of network. + + Args: + x: [batch_size, n_poses, pose_dim/input_dim]. + """ + + B, S, D = x.size() + + y = self.gc1(x.view(-1, self._output_nodes, self._input_features)) + b, n, f = y.shape + y = self.bn1(y.view(b, -1)).view(b, n, f) + y = self.act_f(y) + y = self.do(y) + + for i in range(self._num_stage): + y = self.gcbs[i](y) + + y = self.gc2(y) + + y = self._back(y.view(-1, self._model_dim*self._output_nodes)) + + y = y.view(B, S, self._model_dim) + + return y + + +def test_decoder(): + seq_len = 25 + input_size = 63 + model_dim = 128 + dropout = 0.3 + n_stages = 2 + output_nodes = 21 + + joint_dof = 1 + n_joints = model_dim + layer = GraphConvolution( + in_features=joint_dof, + out_features=model_dim, + output_nodes=n_joints + ) + + X = torch.FloatTensor(10, n_joints, joint_dof) + + gcn = PoseGCN( + input_features=model_dim, + output_features=3, + model_dim=model_dim, + output_nodes=output_nodes, + p_dropout=0.1, + num_stage=2 + ) + + X = torch.FloatTensor(10*seq_len, model_dim) + + +def test_encoder(): + input_size = 63 + model_dim = 128 + dropout = 0.3 + n_stages = 2 + output_nodes = 21 + dof = 9 + + encoder = SimpleEncoder( + n_nodes=output_nodes, + model_dim=model_dim, + input_features=dof, + p_dropout=0.1 + ) + X = torch.FloatTensor(10, 25, output_nodes*dof) + + + +if __name__ == '__main__': + test_encoder() + test_decoder() \ No newline at end of file diff --git a/models/potr/pose_transformer.py b/models/potr/pose_transformer.py new file mode 100644 index 0000000..ee13e34 --- /dev/null +++ b/models/potr/pose_transformer.py @@ -0,0 +1,241 @@ +import numpy as np +import os +import sys +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +import models.potr.utils.utils as utils +import models.potr.utils.position_encodings as PositionEncodings +import models.potr.transformer_encoder as Encoder +import models.potr.transformer_decoder as Decoder +from models.potr.transformer import Transformer +import models.potr.pose_encoder_decoder as PoseEncoderDecoder + +class PoseTransformer(nn.Module): + def __init__(self, args): + super(PoseTransformer, self).__init__() + """ + args: + training: (bool) if the it's training mode, true, else, false + non_autoregressive: (boo + + transformer arguments: + num_encoder_layers: (int) + num_decoder_layers: (int) + model_dim: (int) + num_heads: (int) + dim_ffn: (int) + dropout: (float) + init_fn: (string) + use_query_embedding: (bool) + pre_normalization: (bool) + query_selection: (bool) + pred_frames_num: (int) + + position_encoder and position_decoder arguments: + pose_enc_beta: (float) + pose_enc_alpha: (float) + # pos_encoding_params = (args.pose_enc_beta, args.pos_enc_alpha) + + handle_copy_query: + pose_dim: (int) + + init_position_encodings: + obs_frames_num: (int) + + PoseEncoderDecoder.select_pose_encoder_decoder: + pose_embedding_type: (str) + pose_format: (str) + n_joints: (int) + + forward_training: + input_dim: (int) + + useless: + use_class_token: False + + + + """ + + self.args = args + + """ + self.init_fn = utils.normal_init_ \ + if args.init_fn == 'normal_init' else utils.xavier_init_ #utils.xavier_init_ + """ + pose_embedding_fn, pose_decoder_fn = \ + PoseEncoderDecoder.select_pose_encoder_decoder_fn(args.pose_embedding_type) + self.pose_embedding = pose_embedding_fn(args)#args.pose_embedding + self.pose_decoder = pose_decoder_fn(args) #args.pose_decoder + + + self.transformer = Transformer( + num_encoder_layers=self.args.num_encoder_layers, + num_decoder_layers=self.args.num_decoder_layers, + model_dim=self.args.model_dim, + num_heads=self.args.num_heads, + dim_ffn=self.args.dim_ffn, + dropout=self.args.dropout, + init_fn=self.args.init_fn, + use_query_embedding=self.args.use_query_embedding, + pre_normalization=self.args.pre_normalization, + query_selection=self.args.query_selection, + pred_frames_num=self.args.pred_frames_num + ) + + self.position_encoder = PositionEncodings.PositionEncodings1D( + num_position_feats=self.args.model_dim, + temperature=self.args.pose_enc_beta, + alpha=self.args.pose_enc_alpha + ) + + self.position_decoder = PositionEncodings.PositionEncodings1D( + num_position_feats=self.args.model_dim, + temperature=self.args.pose_enc_beta, + alpha=self.args.pose_enc_alpha + ) + + self.init_position_encodings() # self.query_embedding + self.init_query_embedding() + # self.encoder_pose_encodings, + # self.decoder_pose_encodings, + # self.mask_look_ahead + + def init_query_embedding(self): + """Initialization of query sequence embedding.""" + self.query_embedding = nn.Embedding(self.args.pred_frames_num, self.args.model_dim) + print('[INFO] ({}) Init query embedding!'.format(self.__class__.__name__)) + nn.init.xavier_uniform_(self.query_embedding.weight.data) + # self._query_embed.weight.data.normal_(0.0, 0.004) + + def init_position_encodings(self): + src_len = self.args.obs_frames_num - 1 + # when using a token we need an extra element in the sequence + + encoder_position_encodings = self.position_encoder(src_len).view( + src_len, 1, self.args.model_dim) + decoder_position_encodings = self.position_decoder(self.args.pred_frames_num).view( + self.args.pred_frames_num, 1, self.args.model_dim) + mask_look_ahead = torch.from_numpy( + utils.create_look_ahead_mask( + self.args.pred_frames_num, self.args.non_autoregressive)) + + self.encoder_position_encodings = nn.Parameter( + encoder_position_encodings, requires_grad=False) + self.decoder_position_encodings = nn.Parameter( + decoder_position_encodings, requires_grad=False) + self.mask_look_ahead = nn.Parameter( + mask_look_ahead, requires_grad=False) + + def forward(self, inputs): + if self.args.training: + return self.forward_training(inputs) + elif self.args.non_autoregressive: + return self.forward_training(inputs) + else: + return self.forward_autoregressive(inputs) + + def handle_copy_query(self, indices, observed_expmap_pose): + """Handles the way queries are generated copying items from the inputs. + + Args: + indices: A list of tuples len `batch_size`. Each tuple contains has the + form (input_list, target_list) where input_list contains indices of + elements in the input to be copy to elements in the target specified by + target_list. + input_pose_seq_: Source skeleton sequence [batch_size, src_len, pose_dim]. + + Returns: + A tuple with first elements the decoder input skeletons with shape + [tgt_len, batch_size, skeleton_dim], and the skeleton embeddings of the + input sequence with shape [tgt_len, batch_size, pose_dim]. + """ + batch_size = observed_expmap_pose.size()[0] + decoder_inputs = torch.FloatTensor( + batch_size, + self.args.pred_frames_num, + self.args.pose_dim + ).to(self.decoder_position_encodings.device) + for i in range(batch_size): + for j in range(self._target_seq_length): + src_idx, tgt_idx = indices[i][0][j], indices[i][1][j] + decoder_inputs[i, tgt_idx] = observed_expmap_pose[i, src_idx] + dec_inputs_encode = self.pose_embedding(decoder_inputs) + + return torch.transpose(decoder_inputs, 0, 1), \ + torch.transpose(dec_inputs_encode, 0, 1) + + def forward_training(self, inputs): + + observed_expmap_pose = inputs['observed_expmap_pose'] + future_expmap_pose = inputs['future_expmap_pose'] + + # 1. Encode the sequence with given pose encoder + # [batch_size, sequence_length, model_dim] + if self.pose_embedding is not None: + observed_expmap_pose = self.pose_embedding(observed_expmap_pose) + future_expmap_pose = self.pose_embedding(future_expmap_pose) + + # 2. Compute the look-ahead mask and the positional encodings + # [sequence_length, batch_size, model_dim] + observed_expmap_pose = torch.transpose(observed_expmap_pose, 0, 1) + future_expmap_pose = torch.transpose(future_expmap_pose, 0, 1) + + def query_copy_fn(indices): + return self.handle_copy_query(indices, observed_expmap_pose) + # 3. Compute the attention weights using the transformer + # [future_expmap_pose_length, batch_size, model_dim] + + attn_output, memory, attn_weights, enc_weights, mat = self.transformer( + observed_expmap_pose, + future_expmap_pose, + query_embedding=self.query_embedding.weight, + encoder_position_encoding=self.encoder_position_encodings, + decoder_position_encoding=self.decoder_position_encodings, + mask_look_ahead=self.mask_look_ahead, + mask_target_padding=inputs['mask_target_padding'], + get_attn_wights=inputs['get_attn_weights'], + query_selection=query_copy_fn + ) + + end = self.args.input_dim if self.args.input_dim == self.args.pose_dim else self.args.pose_dim + out_sequence = [] + future_expmap_pose = mat[0] if self.args.query_selection else \ + torch.transpose(inputs['future_expmap_pose'], 0, 1) + + # 4. Decode qequence with pose decoder. + # The decoding process is time independent. + # It means non-autoregressive ar parallel decoding. + # [batch_size, pred_frames_num, pose_dim] + for l in range(self.args.num_decoder_layers): + # [pred_frames_num*batch_size, pose_dim] + out_sequence_ = self.pose_decoder( + attn_output[l].view(-1, self.args.model_dim)) + # [pred_frames_num, batch_size, pose_dim] + out_sequence_ = out_sequence_.view( + self.args.pred_frames_num, -1, self.args.pose_dim) + # apply residual connection between target query and predicted pose + # [pred_frames_num, batch_size, pose_dim] + out_sequence_ = out_sequence_ + future_expmap_pose[:, :, 0:end] + # [batch_size, pred_frames_num, pose_dim] + out_sequence_ = torch.transpose(out_sequence_, 0, 1) + out_sequence.append(out_sequence_) + + + outputs = { + 'pred_expmap_pose': out_sequence, + 'attn_weights': attn_weights, + 'enc_weights': enc_weights, + 'mat': mat + } + + return outputs + + def forward_autoregressive(self, inputs): + pass diff --git a/models/potr/position_encodings.py b/models/potr/position_encodings.py new file mode 100644 index 0000000..db7f129 --- /dev/null +++ b/models/potr/position_encodings.py @@ -0,0 +1,42 @@ +import numpy as np +import torch +from torch import nn + + +class PositionEncodings1D(object): + """Positional encodings for `1D` sequences. + + Implements the following equations: + + PE_{(pos, 2i)} = sin(pos/10000^{2i/d_model}) + PE_{(pos, 2i+1)} = cos(pos/10000^{2i/d_model}) + + Where d_model is the number of positional features. Also known as the + depth of the positional encodings. These are the positional encodings + proposed in [2]. + """ + + def __init__(self, num_pos_feats, temperature, alpha): + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.alpha = alpha + + def __call__(self, seq_length): + angle_rads = self.get_angles( + np.arange(seq_length)[:, np.newaxis], + np.arange(self.num_pos_feats)[np.newaxis, :] + ) + + # apply sin to even indices in the array; 2i + angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) + # apply cos to odd indices in the array; 2i+1 + angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) + pos_encoding = angle_rads[np.newaxis, ...] + pos_encoding = pos_encoding.astype(np.float32) + + return torch.from_numpy(pos_encoding) + + def get_angles(self, pos, i): + angle_rates = 1 / np.power( + self.temperature, (2 * (i//2)) / np.float32(self.num_pos_feats)) + return self.alpha * pos * angle_rates \ No newline at end of file diff --git a/models/potr/potr.py b/models/potr/potr.py new file mode 100644 index 0000000..f2b5957 --- /dev/null +++ b/models/potr/potr.py @@ -0,0 +1,290 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import os, sys + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +from potr.transformer import Transformer +from potr.position_encodings import PositionEncodings1D +from potr import utils +from potr.pose_encoder_decoder import select_pose_encoder_decoder_fn +from models.potr.data_process import train_preprocess, post_process_to_euler + +class POTR(nn.Module): + def __init__(self, args): + super(POTR, self).__init__() + self.args = args + + pose_embedding_fn, pose_decoder_fn = \ + select_pose_encoder_decoder_fn(args) + + self.pose_embedding = pose_embedding_fn(args)#args.pose_embedding + self.pose_decoder = pose_decoder_fn(args) #args.pose_decoder + + self.transformer = Transformer(args) + + transformer_params = filter(lambda p: p.requires_grad, self.transformer.parameters()) + n_transformer_params = sum([np.prod(p.size()) for p in transformer_params]) + thisname = self.__class__.__name__ + print('[INFO] ({}) Transformer has {} parameters!'.format(thisname, n_transformer_params)) + + + self.pos_encoder = PositionEncodings1D( + num_pos_feats=args.model_dim, + temperature=args.pos_enc_beta, + alpha=args.pos_enc_alpha + ) + + self.pos_decoder = PositionEncodings1D( + num_pos_feats=args.model_dim, + temperature=args.pos_enc_beta, + alpha=args.pos_enc_alpha + ) + + args.use_class_token = False + self.init_position_encodings() + self.init_query_embedding() + + if args.use_class_token: + self.init_class_token() + + if args.predict_activity: + self.action_head_size = self.model_dim if self.args.use_class_token \ + else args.model_dim*(args.obs_frames_num-1) + self.action_head = nn.Sequential( + nn.Linear(self.action_head_size, args.num_activities), + ) + + if args.consider_uncertainty: + self.uncertainty_matrix = nn.parameter.Parameter(data=torch.zeros(args.num_activities, args.n_major_joints), requires_grad=True) + nn.init.xavier_uniform_(self.uncertainty_matrix.data) + #self.uncertainty_matrix.data + + + def init_query_embedding(self): + """Initialization of query sequence embedding.""" + self.query_embed = nn.Embedding(self.args.future_frames_num, self.args.model_dim) + print('[INFO] ({}) Init query embedding!'.format(self.__class__.__name__)) + nn.init.xavier_uniform_(self.query_embed.weight.data) + # self._query_embed.weight.data.normal_(0.0, 0.004) + + def init_class_token(self): + token = torch.FloatTensor(1, self.args.model_dim) + print('[INFO] ({}) Init class token!'.format(self.__class__.__name__)) + self.class_token = nn.Parameter(token, requires_grad=True) + nn.init.xavier_uniform_(self.class_token.data) + + def init_position_encodings(self): + src_len = self.args.obs_frames_num-1 + + # when using a token we need an extra element in the sequence + if self.args.use_class_token: + src_len = src_len + 1 + + encoder_pos_encodings = self.pos_encoder(src_len).view( + src_len, 1, self.args.model_dim) + + decoder_pos_encodings = self.pos_decoder(self.args.future_frames_num).view( + self.args.future_frames_num, 1, self.args.model_dim) + + mask_look_ahead = torch.from_numpy( + utils.create_look_ahead_mask( + self.args.future_frames_num, self.args.non_autoregressive)) + + self.encoder_pos_encodings = nn.Parameter( + encoder_pos_encodings, requires_grad=False) + + self.decoder_pos_encodings = nn.Parameter( + decoder_pos_encodings, requires_grad=False) + + self.mask_look_ahead = nn.Parameter( + mask_look_ahead, requires_grad=False) + + def handle_class_token(self, input_pose_seq): + _, B, _ = input_pose_seq.size() + token = self.class_token.squeeze().repeat(1, B, 1) + input_pose_seq = torch.cat([token, input_pose_seq], axis=0) + + return input_pose_seq + + def handle_copy_query(self, indices, input_pose_seq_): + + batch_size = input_pose_seq_.size()[0] + decoder_inputs = torch.FloatTensor( + batch_size, + self.args.future_frames_num, + self.args.pose_dim * self.args.n_joints + ).to(self.decoder_pos_encodings.device) + for i in range(batch_size): + for j in range(self.args.future_frames_num): + src_idx, tgt_idx = indices[i][0][j], indices[i][1][j] + decoder_inputs[i, tgt_idx] = input_pose_seq_[i, src_idx] + dec_inputs_encode = self.pose_embedding(decoder_inputs) + + return torch.transpose(decoder_inputs, 0, 1), \ + torch.transpose(dec_inputs_encode, 0, 1) + + def forward(self, + inputs, + mask_target_padding=None, + get_attn_weights=False): + + preprocessed_inputs = train_preprocess(inputs, self.args) + enc_shape = preprocessed_inputs['encoder_inputs'].shape + dec_shape = preprocessed_inputs['decoder_inputs'].shape + input_pose_seq = preprocessed_inputs['encoder_inputs'].reshape((*enc_shape[:-2], -1)) + target_pose_seq = preprocessed_inputs['decoder_inputs'].reshape((*dec_shape[:-2], -1)) + + return self.forward_training( + input_pose_seq, target_pose_seq, mask_target_padding, get_attn_weights) + + + def forward_training(self, + input_pose_seq_, + target_pose_seq_, + mask_target_padding, + get_attn_weights): + + input_pose_seq = input_pose_seq_ + target_pose_seq = target_pose_seq_ + if self.pose_embedding is not None: + input_pose_seq = self.pose_embedding(input_pose_seq) + target_pose_seq = self.pose_embedding(target_pose_seq) + + input_pose_seq = torch.transpose(input_pose_seq, 0, 1) + target_pose_seq = torch.transpose(target_pose_seq, 0, 1) + + def query_copy_fn(indices): + return self.handle_copy_query(indices, input_pose_seq_) + + if self.args.use_class_token: + input_pose_seq = self.handle_class_token(input_pose_seq) + + + attn_output, memory, attn_weights, enc_weights, mat = self.transformer( + input_pose_seq, + target_pose_seq, + query_embedding=self.query_embed.weight, + encoder_position_encodings=self.encoder_pos_encodings, + decoder_position_encodings=self.decoder_pos_encodings, + mask_look_ahead=self.mask_look_ahead, + mask_target_padding=mask_target_padding, + get_attn_weights=get_attn_weights, + query_selection_fn=query_copy_fn + ) + + end = self.args.pose_dim * self.args.n_major_joints + out_sequence = [] + target_pose_seq_ = mat[0] if self.args.query_selection else \ + torch.transpose(target_pose_seq_, 0, 1) + + for l in range(self.args.num_decoder_layers): + out_sequence_ = self.pose_decoder( + attn_output[l].view(-1, self.args.model_dim)) + out_sequence_ = out_sequence_.view( + self.args.future_frames_num, -1, self.args.pose_dim * self.args.n_major_joints) + out_sequence_ = out_sequence_ + target_pose_seq_[:, :, 0:end] + + out_sequence_ = torch.transpose(out_sequence_, 0, 1) + + shape = out_sequence_.shape + out_sequence_ = out_sequence_.reshape((*shape[:-1], self.args.n_major_joints, self.args.pose_dim)) + out_sequence.append(out_sequence_) + + + pred_euler_pose = torch.tensor(post_process_to_euler( # convert to post_process_to_format + out_sequence[-1].detach().cpu().numpy(), + self.args.n_major_joints, + self.args.n_h36m_joints, + self.args.pose_format)) + + outputs = { + 'pred_metric_pose': pred_euler_pose, + 'pred_pose': out_sequence, + 'attn_weights': attn_weights, + 'enc_weights': enc_weights, + 'mat': mat + } + + if self.args.predict_activity: + outputs['out_class'] = self.predict_activity(attn_output, memory) + + if self.args.consider_uncertainty: + outputs['uncertainty_matrix'] = torch.sigmoid(self.uncertainty_matrix) + + + return outputs + + + + def predict_activity(self, attn_output, memory): + in_act = torch.transpose(memory, 0, 1) + + if self.args.use_class_token: + token = in_act[:, 0] + actions = self.action_head(token) + return [actions] + + in_act = torch.reshape(in_act, (-1, self.action_head_size)) + actions = self.action_head(in_act) + return [actions] + + +if __name__ == '__main__': + thispath = os.path.dirname(os.path.abspath(__file__)) + sys.path.insert(0, thispath+"/../") + import potr.utils as utils + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--num_encoder_layers', default=4) + parser.add_argument('--num_decoder_layers', default=4) + parser.add_argument('--query_selection', default=False) + parser.add_argument('--future_frames_num', default=20) + parser.add_argument('--obs_frames_num', default=50) + parser.add_argument('--use_query_embedding', default=False) + parser.add_argument('--num_layers', default=6) + parser.add_argument('--model_dim', default=128) + parser.add_argument('--num_heads', default=2) + parser.add_argument('--dim_ffn', default=16) + parser.add_argument('--dropout', default=0.5) + parser.add_argument('--init_fn_name', default='xavier') + parser.add_argument('--pre_normalization', default=True) + parser.add_argument('--pose_dim', default=9) + parser.add_argument('--pose_embedding_type', default='gcn_enc') + parser.add_argument('--pos_enc_beta', default=500) + parser.add_argument('--pos_enc_alpha', default=10) + parser.add_argument('--use_class_token', default=False) + parser.add_argument('--predict_activity', default=True) + parser.add_argument('--num_activities', default=15) + parser.add_argument('--non_autoregressive', default=True) + parser.add_argument('--n_joints', default=21) + parser.add_argument('--pose_format', default='rotmat') + #parser.add_argument('--pose_embedding') + #parser.add_argument('--') + #parser.add_argument('--') + args = parser.parse_args() + + + src_seq_length = args.obs_frames_num + tgt_seq_length = args.future_frames_num + batch_size = 8 + + src_seq = torch.FloatTensor(batch_size, src_seq_length - 1, args.pose_dim*args.n_joints).uniform_(0, 1) + tgt_seq = torch.FloatTensor(batch_size, tgt_seq_length, args.pose_dim*args.n_joints).fill_(1) + + + #mask_look_ahead = utils.create_look_ahead_mask(tgt_seq_length) + #mask_look_ahead = torch.from_numpy(mask_look_ahead) + + #encodings = torch.FloatTensor(tgt_seq_length, 1, args.model_dim).uniform_(0,1) + + model = POTR(args) + + out_attn, memory, out_weights_, enc_weights_, (tgt_plain, prob_matrix_) = model(src_seq, + tgt_seq, + None, + get_attn_weights=False) \ No newline at end of file diff --git a/models/potr/transformer.py b/models/potr/transformer.py new file mode 100644 index 0000000..9330269 --- /dev/null +++ b/models/potr/transformer.py @@ -0,0 +1,115 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from scipy.optimize import linear_sum_assignment +import os +import sys +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") + +from potr.transformer_encoder import TransformerEncoder +from potr.transformer_decoder import TransformerDecoder + +class Transformer(nn.Module): + def __init__(self, transformer_args): + super(Transformer, self).__init__() + self.args = transformer_args + self.use_query_embedding = self.args.use_query_embedding + self.query_selection = self.args.query_selection + + self.encoder = TransformerEncoder( + num_layers=self.args.num_encoder_layers, + model_dim=self.args.model_dim, + num_heads=self.args.num_heads, + dim_ffn=self.args.dim_ffn, + dropout=self.args.dropout, + init_fn_name=self.args.init_fn_name, + pre_normalization=self.args.pre_normalization + ) + + self.decoder = TransformerDecoder( + num_layers=self.args.num_decoder_layers, + model_dim=self.args.model_dim, + num_heads=self.args.num_heads, + dim_ffn=self.args.dim_ffn, + dropout=self.args.dropout, + init_fn_name=self.args.init_fn_name, + use_query_embedding=self.args.use_query_embedding, + pre_normalization=self.args.pre_normalization + ) + + if self.args.query_selection: + self.position_predictor = nn.Linear(self.args.model_dim, self.args.future_frames_num) + + def process_index_selection(self, self_attn, one_to_one_selection=False): + """Selection of query elments using position predictor from encoder memory. + + After prediction a maximum assignement problem is solved to get indices for + each element in the query sequence. + + Args: + self_attn: Encoder memory with shape [src_len, batch_size, model_dim] + + Returns: + A tuple with two list of i and j matrix entries of m + """ + batch_size = self_attn.size()[1] + in_pos = torch.transpose(self_attn, 0, 1) + prob_matrix = self.position_predictor(in_pos) + + if one_to_one_selection: + soft_matrix = F.softmax(prob_matrix, dim=2) + indices = [linear_sum_assignment(soft_matrix[i].cpu().detach(), maximize=True) + for i in range(batch_size)] + else: + soft_matrix = F.softmax(prob_matrix, dim=1) + indices_rows = torch.argmax(soft_matrix, 1) + indices = [(indices_rows[i], list(range(prob_matrix.size()[2]))) + for i in range(batch_size)] + + return indices, soft_matrix + + def forward(self, + source_seq, + target_seq, + encoder_position_encodings=None, + decoder_position_encodings=None, + query_embedding=None, + mask_target_padding=None, + mask_look_ahead=None, + get_attn_weights=False, + query_selection_fn=None): + + if self.use_query_embedding: + bs = source_seq.size()[1] + query_embedding = query_embedding.unsqueeze(1).repeat(1, bs, 1) + decoder_position_encodings = encoder_position_encodings + + memory, enc_weights = self.encoder(source_seq, encoder_position_encodings) + + tgt_plain = None + if self.query_selection: + indices, prob_matrix = self.process_index_selection(memory) + tgt_plain, target_seq = query_selection_fn(indices) + + out_attn, out_weights = self.decoder( + target_seq, + memory, + decoder_position_encodings, + query_embedding=query_embedding, + mask_target_padding=mask_target_padding, + mask_look_ahead=mask_look_ahead, + get_attn_weights=get_attn_weights + ) + + out_weights_ = None + enc_weights_ = None + prob_matrix_ = None + if get_attn_weights: + out_weights_, enc_weights_ = out_weights, enc_weights + + if self.query_selection: + prob_matrix_ = prob_matrix + + return out_attn, memory, out_weights_, enc_weights_, (tgt_plain, prob_matrix_) \ No newline at end of file diff --git a/models/potr/transformer_decoder.py b/models/potr/transformer_decoder.py new file mode 100644 index 0000000..6934d76 --- /dev/null +++ b/models/potr/transformer_decoder.py @@ -0,0 +1,255 @@ +import numpy as np +import sys +import os + +import torch +import torch.nn as nn + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") +import potr.utils as utils +from potr import INIT_FUNC + +class DecoderLayer(nn.Module): + + def __init__(self, + model_dim, + num_heads, + dim_ffn, + dropout, + init_fn_name, + pre_normalization, + use_query_embedding): + super(DecoderLayer, self).__init__() + init_fn = INIT_FUNC[init_fn_name] + self.use_query_embedding = use_query_embedding + + self.self_attn = nn.MultiheadAttention(model_dim, num_heads, dropout=dropout) + self.multihead_attn = nn.MultiheadAttention(model_dim, num_heads, dropout=dropout) + + self.linear1 = nn.Linear(model_dim, dim_ffn) + self.linear2 = nn.Linear(dim_ffn, model_dim) + self.relu = nn.ReLU() + + self.norm1 = nn.LayerNorm(model_dim) + self.norm2 = nn.LayerNorm(model_dim) + self.norm3 = nn.LayerNorm(model_dim) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + self.dropout3 = nn.Dropout(dropout) + self.dropout4 = nn.Dropout(dropout) + + utils.weight_init(self.linear1, init_fn=init_fn) + utils.weight_init(self.linear2, init_fn=init_fn) + + self.forward_fn = self.forward_pre if pre_normalization else self.forward_post + + def forward(self, + target_seq, + memory, + pos_encodings, + query_embedding=None, + mask_look_ahead=None, + mask_target_padding=None): + + return self.forward_fn( + target_seq, + memory, + pos_encodings, + query_embedding=query_embedding, + mask_look_ahead=mask_look_ahead, + mask_target_padding=mask_target_padding + ) + + def handle_query_embedding(self, sequence, embedding): + """Handle """ + if self.use_query_embedding: + return sequence + embedding + return sequence + + + def forward_post(self, + target_seq, + memory, + pos_encodings, + query_embedding=None, + mask_look_ahead=None, + mask_target_padding=None): + + if self.use_query_embedding: + q = k = v = target_seq + query_embedding + else: + q = k = v = target_seq + pos_encodings + + self_attn, self_attn_weights = self.self_attn( + query=q, key=k, value=v, + attn_mask=mask_look_ahead, + key_padding_mask=mask_target_padding + ) + self_attn = self.dropout1(self_attn) + out_self_attn = self.norm1(self_attn + target_seq) + + attn, attn_weights = self.multihead_attn( + query=self.handle_query_embedding(out_self_attn, query_embedding), + key=self.handle_query_embedding(memory, pos_encodings), + value=memory) + attn = self.dropout2(attn) + out_attn = self.norm2(attn + out_self_attn) + + ffn_output = self.linear1(out_attn) + ffn_output = self.relu(ffn_output) + ffn_output = self.dropout4(ffn_output) + ffn_output = self.linear2(ffn_output) + + ffn_output = self.dropout3(ffn_output) + outputs = self.norm3(ffn_output + out_attn) + + return outputs, self_attn_weights, attn_weights + + def forward_pre(self, + target_seq_, + memory, + pos_encodings, + query_embedding=None, + mask_look_ahead=None, + mask_target_padding=None): + """Forward pass of the layer with pre normalization. + + Args: + target_seq: [target_seq_length, batch_size, model_dim] + memory: [source_seq_length, batch_size, model_dim] + mask_look_ahead: [] + mask_target_padding: + """ + target_seq = self.norm1(target_seq_) + # 1) Compute self attention with current sequence of inferred tokens + # query is the same as key for self attention + if self.use_query_embedding: + # in case of using only the query embedding follow DETR [2] which drops + # values to zero and uses only the query embeddings + q = k = target_seq + query_embedding + v = target_seq + else: + q = k = v = target_seq + pos_encodings + + self_attn, self_attn_weights = self.self_attn( + query=q, key=k, value=v, + attn_mask=mask_look_ahead, + key_padding_mask=mask_target_padding + ) + self_attn = self.dropout1(self_attn) + out_self_attn = self.norm2(self_attn + target_seq_) + + # 2) Attend the encoder's memory given the comptued self attention + attn, attn_weights = self.multihead_attn( + query=self.handle_query_embedding(out_self_attn, query_embedding), + key=self.handle_query_embedding(memory, pos_encodings), + value=memory) + attn = self.dropout2(attn) + out_attn = self.norm3(attn + out_self_attn) + + # 3) Compute pointwise embeding by expanding and projecting + dropout + ffn_output = self.linear1(out_attn) + ffn_output = self.relu(ffn_output) + ffn_output = self.dropout4(ffn_output) + ffn_output = self.linear2(ffn_output) + + ffn_output = self.dropout3(ffn_output) + + return ffn_output, self_attn_weights, attn_weights + + +class TransformerDecoder(nn.Module): + """Transformer decoder module.""" + def __init__(self, + num_layers, + model_dim, + num_heads, + dim_ffn, + dropout, + init_fn_name, + pre_normalization, + use_query_embedding): + super(TransformerDecoder, self).__init__() + self.use_query_embedding = use_query_embedding + self.num_layers = num_layers + + self.decoder_stack = nn.ModuleList( + [DecoderLayer( + model_dim=model_dim, + num_heads=num_heads, + dim_ffn=dim_ffn, + dropout=dropout, + init_fn_name=init_fn_name, + pre_normalization=pre_normalization, + use_query_embedding=use_query_embedding) + for _ in range(num_layers) + ] + ) + + def forward(self, + target_seq, + memory, + pos_encodings, + query_embedding=None, + mask_target_padding=None, + mask_look_ahead=None, + get_attn_weights=False): + """Computes forward pass of decoder. + + Args: + target_seq: [target_sequence_length, batch_size, model_dim]. + memory: [source_sequence_length, batch_size, model_dim]. + pos_encodings: [target_seq_length, model_dim]. + mask_look_ahead: [target_seq_length, model_dim]. + + Returns: + A tensor with the decoded attention with shape [target_sequence_length, + batch_size, model_dim]. + """ + seq_length = target_seq.size()[0] + output_list = [] + attn_weights_list = [] if get_attn_weights else None + outputs = torch.zeros_like(target_seq) if self.use_query_embedding else target_seq + + for l in range(self.num_layers): + outputs, self_attn_weights, attn_weights = self.decoder_stack[l]( + outputs, memory, + pos_encodings=pos_encodings, + query_embedding=query_embedding, + mask_target_padding=mask_target_padding, + mask_look_ahead=mask_look_ahead + ) + if get_attn_weights: + attn_weights_list.append(attn_weights) + output_list.append(outputs) + + return output_list, attn_weights_list + +if __name__ == '__main__': + thispath = os.path.dirname(os.path.abspath(__file__)) + sys.path.insert(0, thispath+"/../") + import potr.utils as utils + + seq_length = 55 + batch_size = 8 + model_dim = 128 + tgt_seq = torch.FloatTensor(seq_length, batch_size, model_dim).fill_(1) + memory = torch.FloatTensor(seq_length, batch_size, model_dim).uniform_(0, 1) + + mask_look_ahead = utils.create_look_ahead_mask(seq_length) + mask_look_ahead = torch.from_numpy(mask_look_ahead) + + encodings = torch.FloatTensor(seq_length, 1, model_dim).uniform_(0,1) + + decoder = TransformerDecoder( + num_layers=25, + model_dim=128, + num_heads=2, + dim_ffn=16, + dropout=0.5, + init_fn_name='xavier', + pre_normalization=True, + use_query_embedding=False) + outputs, attn_weights_list = decoder(tgt_seq, memory, encodings, mask_look_ahead=mask_look_ahead) + diff --git a/models/potr/transformer_encoder.py b/models/potr/transformer_encoder.py new file mode 100644 index 0000000..856351a --- /dev/null +++ b/models/potr/transformer_encoder.py @@ -0,0 +1,197 @@ +from argparse import ArgumentParser +import torch +import torch.nn as nn +import os +import sys + + +thispath = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, thispath+"/../") +from potr.pose_encoder_decoder import pose_decoder_mlp +from potr import utils +from potr import INIT_FUNC + +class EncoderLayer(nn.Module): + def __init__(self, model_dim, num_heads, dropout, dim_ffn, init_fn, pre_normalization): + super(EncoderLayer, self).__init__() + + init_fn = INIT_FUNC[init_fn] + self.pre_normalization = pre_normalization + + self.self_attn = nn.MultiheadAttention(model_dim, num_heads, dropout) + self.relu = nn.ReLU() + self.dropout_layer = nn.Dropout(dropout) + + self.linear1 = nn.Linear(model_dim, dim_ffn) + self.linear2 = nn.Linear(dim_ffn, model_dim) + self.norm1 = nn.LayerNorm(model_dim, eps=1e-5) + self.norm2 = nn.LayerNorm(model_dim, eps=1e-5) + + utils.weight_init(self.linear1, init_fn=init_fn) + utils.weight_init(self.linear2, init_fn=init_fn) + + def forward(self, source_seq, pos_encodings): + """Computes forward pass according. + + Args: + source_seq: [sequence_length, batch_size, model_dim]. + pos_encodings: [sequence_length, model_dim]. + + Returns: + Tensor of shape [sequence_length, batch_size, model_dim]. + """ + if self.pre_normalization: + return self.forward_pre(source_seq, pos_encodings) + + return self.forward_post(source_seq, pos_encodings) + + def forward_post(self, source_seq, pos_encodings): + """Computes decoder layer forward pass with pre normalization. + + Args: + source_seq: [sequence_length, batch_size, model_dim]. + pos_encodings: [sequence_length, model_dim]. + + Returns: + Tensor of shape [sequence_length, batch_size, model_dim]. + """ + # add positional encodings to the input sequence + # for self attention query is the same as key + query = source_seq + pos_encodings + key = query + value = source_seq + + attn_output, attn_weights = self.self_attn( + query, + key, + value, + need_weights=True + ) + + norm_attn = self.dropout_layer(attn_output) + source_seq + norm_attn = self.norm1(norm_attn) + + output = self.linear1(norm_attn) + output = self.relu(output) + output = self.dropout_layer(output) + output = self.linear2(output) + output = self.dropout_layer(output) + norm_attn + output = self.norm2(output) + + return output, attn_weights + + def forward_pre(self, source_seq_, pos_encodings): + """Computes decoder layer forward pass with pre normalization. + + Args: + source_seq: [sequence_length, batch_size, model_dim]. + pos_encodings: [sequence_length, model_dim]. + + Returns: + Tensor of shape [sequence_length, batch_size, model_dim]. + """ + # add positional encodings to the input sequence + # for self attention query is the same as key + source_seq = self.norm1(source_seq_) + query = source_seq + pos_encodings + key = query + value = source_seq + + attn_output, attn_weights = self.self_attn( + query, + key, + value, + need_weights=True + ) + + norm_attn_ = self.dropout_layer(attn_output) + source_seq_ + norm_attn = self.norm2(norm_attn_) + + output = self.linear1(norm_attn) + output = self.relu(output) + output = self.dropout_layer(output) + output = self.linear2(output) + output = self.dropout_layer(output) + norm_attn_ + + return output, attn_weights + + + + +class TransformerEncoder(nn.Module): + def __init__(self, num_layers, model_dim, num_heads, dim_ffn, dropout, init_fn_name, pre_normalization): + super(TransformerEncoder, self).__init__() + + self.num_layers = num_layers + self.model_dim = model_dim + self.num_heads = num_heads + self.dim_ffn = dim_ffn + self.dropout = dropout + self.init_fn = INIT_FUNC[init_fn_name] + self.pre_normalization = pre_normalization + + + self.encoder_stack = nn.ModuleList( + [EncoderLayer( + model_dim=self.model_dim, + num_heads=self.num_heads, + dim_ffn=self.dim_ffn, + dropout=self.dropout, + init_fn=init_fn_name, # (str) + pre_normalization=self.pre_normalization) + for s in range(self.num_layers)] + ) + + def forward(self, input_sequence, pos_encodings): + """Computes decoder forward pass. + + Args: + source_seq: [sequence_length, batch_size, model_dim]. + pos_encodings: [sequence_length, model_dim]. + + Returns: + Tensor of shape [sequence_length, batch_size, model_dim]. + """ + outputs = input_sequence + for l in range(self.num_layers): + outputs, attn_weights = self.encoder_stack[l](outputs, pos_encodings) + + # if self._norm: + # outputs = self._norm(outputs) + + return outputs, attn_weights + +if __name__ == '__main__': + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + seq_length = 50 + + pos_encodings = torch.FloatTensor(seq_length, 1, 128).uniform_(0,1) + seq = torch.FloatTensor(seq_length, 8, 128).fill_(1.0) + + pos_encodings = pos_encodings.to(device) + seq = seq.to(device) + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--num_layers', default=6) + parser.add_argument('--model_dim', default=128) + parser.add_argument('--num_heads', default=2) + parser.add_argument('--dim_ffn', default=16) + parser.add_argument('--dropout', default=0.5) + parser.add_argument('--init_fn_name', default='xavier') + parser.add_argument('--pre_normalization', default=True) + args = parser.parse_args() + + encoder = TransformerEncoder( + num_layers = args.num_layers, + model_dim = args.model_dim, + num_heads=args.num_heads, + dim_ffn=args.dim_ffn, + dropout=args.dropout, + init_fn_name=args.init_fn_name, + pre_normalization=args.pre_normalization + ) + encoder.to(device) + encoder.eval() + + output, attn_weights = encoder(seq, pos_encodings) \ No newline at end of file diff --git a/models/potr/utils.py b/models/potr/utils.py new file mode 100644 index 0000000..408dbc7 --- /dev/null +++ b/models/potr/utils.py @@ -0,0 +1,55 @@ +import torch.nn as nn +import numpy as np + +def xavier_init(layer, mean_, sd_, bias, norm_bias=True): + classname = layer.__class__.__name__ + if classname.find('Linear')!=-1: + print('[INFO] (xavier_init) Initializing layer {}'.format(classname)) + nn.init.xavier_uniform_(layer.weight.data) + # nn.init.xavier_normal(layer.bias.data) + if norm_bias: + layer.bias.data.normal_(0, 0.05) + else: + layer.bias.data.zero_() + +def normal_init(layer, mean_, sd_, bias, norm_bias=True): + """Intialization of layers with normal distribution with mean and bias""" + classname = layer.__class__.__name__ + # Only use the convolutional layers of the module + if classname.find('Linear') != -1: + print('[INFO] (normal_init) Initializing layer {}'.format(classname)) + layer.weight.data.normal_(mean_, sd_) + if norm_bias: + layer.bias.data.normal_(bias, 0.05) + else: + layer.bias.data.fill_(bias) + +def weight_init( + module, + mean_=0, + sd_=0.004, + bias=0.0, + norm_bias=False, + init_fn=normal_init): + """Initialization of layers with normal distribution""" + moduleclass = module.__class__.__name__ + try: + for layer in module: + if layer.__class__.__name__ == 'Sequential': + for l in layer: + init_fn(l, mean_, sd_, bias, norm_bias) + else: + init_fn(layer, mean_, sd_, bias, norm_bias) + except TypeError: + init_fn(module, mean_, sd_, bias, norm_bias) + + +def create_look_ahead_mask(seq_length, is_nonautoregressive=False): + """Generates a binary mask to prevent to use future context in a sequence.""" + if is_nonautoregressive: + return np.zeros((seq_length, seq_length), dtype=np.float32) + x = np.ones((seq_length, seq_length), dtype=np.float32) + mask = np.triu(x, 1).astype(np.float32) + return mask # (seq_len, seq_len) + + diff --git a/models/pv_lstm.py b/models/pv_lstm.py new file mode 100644 index 0000000..309003f --- /dev/null +++ b/models/pv_lstm.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn + +from utils.others import pose_from_vel + + +class PVLSTM(nn.Module): + def __init__(self, args): + super(PVLSTM, self).__init__() + self.args = args + input_size = output_size = int(args.keypoints_num * args.keypoint_dim) + self.pose_encoder = Encoder(input_size, args.hidden_size, args.n_layers, args.dropout_enc) + self.vel_encoder = Encoder(input_size, args.hidden_size, args.n_layers, args.dropout_enc) + self.vel_decoder = Decoder(args.pred_frames_num, input_size, output_size, args.hidden_size, args.n_layers, + args.dropout_pose_dec, 'hardtanh', args.hardtanh_limit) + + def forward(self, inputs): + pose = inputs['observed_pose'] + vel = pose[..., 1:, :] - pose[..., :-1, :] + + (hidden_vel, cell_vel) = self.vel_encoder(vel.permute(1, 0, 2)) + hidden_vel = hidden_vel.squeeze(0) + cell_vel = cell_vel.squeeze(0) + + (hidden_pose, cell_pose) = self.pose_encoder(pose.permute(1, 0, 2)) + hidden_pose = hidden_pose.squeeze(0) + cell_pose = cell_pose.squeeze(0) + + vel_dec_input = vel[:, -1, :] + hidden_dec = hidden_pose + hidden_vel + cell_dec = cell_pose + cell_vel + pred_vel = self.vel_decoder(vel_dec_input, hidden_dec, cell_dec) + pred_pose = pose_from_vel(pred_vel, pose[..., -1, :]) + outputs = {'pred_pose': pred_pose, 'pred_vel': pred_vel} + + return outputs + + +class Encoder(nn.Module): + def __init__(self, input_size, hidden_size, n_layers, dropout): + super().__init__() + self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=n_layers, dropout=dropout) + + def forward(self, inpput_): + outputs, (hidden, cell) = self.lstm(inpput_) + return hidden, cell + + +class Decoder(nn.Module): + def __init__(self, outputs_num, input_size, output_size, hidden_size, n_layers, dropout, activation_type, + hardtanh_limit=None): + super().__init__() + self.outputs_num = outputs_num + self.dropout = nn.Dropout(dropout) + lstms = [ + nn.LSTMCell(input_size=input_size if i == 0 else hidden_size, hidden_size=hidden_size) for + i in range(n_layers)] + self.lstms = nn.Sequential(*lstms) + self.fc_out = nn.Linear(in_features=hidden_size, out_features=output_size) + if activation_type == 'hardtanh': + self.activation = nn.Hardtanh(min_val=-1 * hardtanh_limit, max_val=hardtanh_limit, inplace=False) + else: + self.activation = nn.Sigmoid() + + def forward(self, inputs, hiddens, cells): + dec_inputs = self.dropout(inputs) + if len(hiddens.shape) < 3 or len(cells.shape) < 3: + hiddens = torch.unsqueeze(hiddens, 0) + cells = torch.unsqueeze(cells, 0) + device = 'cuda' if inputs.is_cuda else 'cpu' + outputs = torch.tensor([], device=device) + for j in range(self.outputs_num): + for i, lstm in enumerate(self.lstms): + if i == 0: + hiddens[i], cells[i] = lstm(dec_inputs, (hiddens.clone()[i], cells.clone()[i])) + else: + hiddens[i], cells[i] = lstm(hiddens.clone()[i - 1], (hiddens.clone()[i], cells.clone()[i])) + output = self.activation(self.fc_out(hiddens.clone()[-1])) + dec_inputs = output.detach() + outputs = torch.cat((outputs, output.unsqueeze(1)), 1) + return outputs diff --git a/models/st_trans/ST_Trans.py b/models/st_trans/ST_Trans.py new file mode 100644 index 0000000..cef186e --- /dev/null +++ b/models/st_trans/ST_Trans.py @@ -0,0 +1,251 @@ +import math + +import torch +import torch.nn.functional as F +from torch import nn + +from models.st_trans.data_proc import Preprocess, Postprocess, Human36m_Postprocess, Human36m_Preprocess, AMASS_3DPW_Postprocess, AMASS_3DPW_Preprocess + + +def get_torch_trans(heads=8, layers=1, channels=64): + encoder_layer = nn.TransformerEncoderLayer( + d_model=channels, nhead=heads, dim_feedforward=64, activation="gelu" + ) + return nn.TransformerEncoder(encoder_layer, num_layers=layers) + + +def Conv1d_with_init(in_channels, out_channels, kernel_size): + layer = nn.Conv1d(in_channels, out_channels, kernel_size) + nn.init.kaiming_normal_(layer.weight) + return layer + + +class diff_CSDI(nn.Module): + def __init__(self, args, inputdim, side_dim): + super().__init__() + self.args = args + self.channels = args.diff_channels + + self.input_projection = Conv1d_with_init(inputdim, self.channels, 1) + self.output_projection1 = Conv1d_with_init(self.channels, self.channels, 1) + self.output_projection2 = Conv1d_with_init(self.channels, 1, 1) + nn.init.zeros_(self.output_projection2.weight) + + self.residual_layers = nn.ModuleList( + [ + ResidualBlock( + side_dim=side_dim, + channels=self.channels, + nheads=args.diff_nheads + ) + for _ in range(args.diff_layers) + ] + ) + + def forward(self, x, cond_info): + B, inputdim, K, L = x.shape + + x = x.reshape(B, inputdim, K * L) + x = self.input_projection(x) + x = F.relu(x) + x = x.reshape(B, self.channels, K, L) + + skip = [] + for layer in self.residual_layers: + x, skip_connection = layer(x, cond_info) + skip.append(skip_connection) + + x = torch.sum(torch.stack(skip), dim=0) / math.sqrt(len(self.residual_layers)) + x = x.reshape(B, self.channels, K * L) + x = self.output_projection1(x) + x = F.relu(x) + x = self.output_projection2(x) + x = x.reshape(B, K, L) + return x + + +class ResidualBlock(nn.Module): + def __init__(self, side_dim, channels, nheads): + super().__init__() + self.cond_projection = Conv1d_with_init(side_dim, 2 * channels, 1) + self.mid_projection = Conv1d_with_init(channels, 2 * channels, 1) + self.output_projection = Conv1d_with_init(channels, 2 * channels, 1) + + self.time_layer = get_torch_trans(heads=nheads, layers=1, channels=channels) + self.feature_layer = get_torch_trans(heads=nheads, layers=1, channels=channels) + + def forward_time(self, y, base_shape): + B, channel, K, L = base_shape + if L == 1: + return y + y = y.reshape(B, channel, K, L).permute(0, 2, 1, 3).reshape(B * K, channel, L) + y = self.time_layer(y.permute(2, 0, 1)).permute(1, 2, 0) + y = y.reshape(B, K, channel, L).permute(0, 2, 1, 3).reshape(B, channel, K * L) + return y + + def forward_feature(self, y, base_shape): + B, channel, K, L = base_shape + if K == 1: + return y + y = y.reshape(B, channel, K, L).permute(0, 3, 1, 2).reshape(B * L, channel, K) + y = self.feature_layer(y.permute(2, 0, 1)).permute(1, 2, 0) + y = y.reshape(B, L, channel, K).permute(0, 2, 3, 1).reshape(B, channel, K * L) + return y + + def forward(self, x, cond_info): + B, channel, K, L = x.shape + base_shape = x.shape + x = x.reshape(B, channel, K * L) + + y = x + y = self.forward_time(y, base_shape) + y = self.forward_feature(y, base_shape) + y = self.mid_projection(y) + + _, cond_dim, _, _ = cond_info.shape + cond_info = cond_info.reshape(B, cond_dim, K * L) + cond_info = self.cond_projection(cond_info) + y = y + cond_info + + gate, filter = torch.chunk(y, 2, dim=1) + y = torch.sigmoid(gate) * torch.tanh(filter) + y = self.output_projection(y) + + residual, skip = torch.chunk(y, 2, dim=1) + x = x.reshape(base_shape) + residual = residual.reshape(base_shape) + skip = skip.reshape(base_shape) + return (x + residual) / math.sqrt(2.0), skip + + +class CSDI_base(nn.Module): + def __init__(self, args): + super().__init__() + self.args = args + self.device = args.device + self.target_dim = args.keypoint_dim * args.n_major_joints + + self.emb_time_dim = args.model_timeemb + self.emb_feature_dim = args.model_featureemb + self.is_unconditional = args.model_is_unconditional + + self.emb_total_dim = self.emb_time_dim + self.emb_feature_dim + if self.is_unconditional == False: + self.emb_total_dim += 1 + self.embed_layer = nn.Embedding( + num_embeddings=self.target_dim, embedding_dim=self.emb_feature_dim + ) + + input_dim = 1 if self.is_unconditional == True else 2 + self.diffmodel = diff_CSDI(args, input_dim, self.emb_total_dim) + + def time_embedding(self, pos, d_model=128): + pe = torch.zeros(pos.shape[0], pos.shape[1], d_model).to(self.device) + position = pos.unsqueeze(2) + div_term = 1 / torch.pow( + 10000.0, torch.arange(0, d_model, 2).to(self.device) / d_model + ) + pe[:, :, 0::2] = torch.sin(position * div_term) + pe[:, :, 1::2] = torch.cos(position * div_term) + return pe + + def get_side_info(self, observed_tp, cond_mask): + B, K, L = cond_mask.shape + + time_embed = self.time_embedding(observed_tp, self.emb_time_dim) # (B,L,emb) + time_embed = time_embed.unsqueeze(2).expand(-1, -1, K, -1) + feature_embed = self.embed_layer( + torch.arange(self.target_dim).to(self.device) + ) # (K,emb) + feature_embed = feature_embed.unsqueeze(0).unsqueeze(0).expand(B, L, -1, -1) + + side_info = torch.cat([time_embed, feature_embed], dim=-1) # (B,L,K,*) + side_info = side_info.permute(0, 3, 2, 1) # (B,*,K,L) + + if self.is_unconditional == False: + side_mask = cond_mask.unsqueeze(1) # (B,1,K,L) + side_info = torch.cat([side_info, side_mask], dim=1) + + return side_info + + def set_input_to_diffmodel(self, noisy_data, observed_data, cond_mask): + if self.is_unconditional == True: + total_input = noisy_data.unsqueeze(1) # (B,1,K,L) + else: + cond_obs = (cond_mask * observed_data).unsqueeze(1) + noisy_target = ((1 - cond_mask) * noisy_data).unsqueeze(1) + total_input = torch.cat([cond_obs, noisy_target], dim=1) # (B,2,K,L) + + return total_input + + def forward(self, batch): + ( + observed_data, + observed_tp, + cond_mask + ) = self.preprocess_data(batch) + + side_info = self.get_side_info(observed_tp, cond_mask) + + B, K, L = observed_data.shape + noisy_data = torch.zeros_like(observed_data).to(self.device) + + total_input = self.set_input_to_diffmodel(noisy_data, observed_data, cond_mask) + + predicted = self.diffmodel(total_input, side_info) # (B,K,L) + return self.postprocess_data(batch, predicted) + + +class ST_Trans(CSDI_base): + def __init__(self, args): + super(ST_Trans, self).__init__(args) + self.Lo = args.obs_frames_num + self.Lp = args.pred_frames_num + + if args.pre_post_process == 'human3.6m': + self.preprocess = Human36m_Preprocess(args).to(args.device) + self.postprocess = Human36m_Postprocess(args).to(args.device) + elif args.pre_post_process == 'AMASS' or args.pre_post_process == '3DPW': + self.preprocess = AMASS_3DPW_Preprocess(args).to(args.device) + self.postprocess = AMASS_3DPW_Postprocess(args).to(args.device) + else: + self.preprocess = Preprocess(args).to(args.device) + self.postprocess = Postprocess(args).to(args.device) + + for p in self.preprocess.parameters(): + p.requires_grad = False + + for p in self.postprocess.parameters(): + p.requires_grad = False + + def preprocess_data(self, batch): + observed_data = batch["observed_pose"].to(self.device) + observed_data = self.preprocess(observed_data) + + B, L, K = observed_data.shape + Lp = self.args.pred_frames_num + + observed_data = observed_data.permute(0, 2, 1) # B, K, L + + observed_data = torch.cat([ + observed_data, torch.zeros([B, K, Lp]).to(self.device) + ], dim=-1) + + observed_tp = torch.arange(self.Lo + self.Lp).unsqueeze(0).expand(B, -1).to(self.device) + cond_mask = torch.zeros_like(observed_data).to(self.device) + cond_mask[:, :, :L] = 1 + + return ( + observed_data, + observed_tp, + cond_mask + ) + + def postprocess_data(self, batch, predicted): + predicted = predicted[:, :, self.Lo:] + predicted = predicted.permute(0, 2, 1) + + return { + 'pred_pose': self.postprocess(batch['observed_pose'], predicted), # B, T, JC + } + \ No newline at end of file diff --git a/models/st_trans/__init__.py b/models/st_trans/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/st_trans/data_proc.py b/models/st_trans/data_proc.py new file mode 100644 index 0000000..50eeae1 --- /dev/null +++ b/models/st_trans/data_proc.py @@ -0,0 +1,145 @@ +import numpy as np +import torch +from torch import nn + + +def joint_to_index(x): + return np.concatenate((x * 3, x * 3 + 1, x * 3 + 2)) + + +class AMASS_3DPW_values(): + def __init__(self) -> None: + self.mean = np.array([[[ 144.42061, -471.00433, 42.905945, -144.2189, -471.00433, + 55.37049, 5.0257893, 235.50217, 6.188506, 131.57523, + -916.6374, -73.56259, -129.137, -914.8206, -67.33688, + 5.8155527, 278.29196, 43.45168, 189.25381, -942.00867, + 31.58287, -185.46811, -942.00867, 38.146023, 1.6395822, + 504.07196, 65.04476, 90.96787, 467.06006, 46.06653, + -79.57573, 464.56763, 35.583405, 5.7321978, 544.3716, + 132.00195, 189.30196, 464.18073, 46.495617, -181.78586, + 461.8248, 38.285446, 242.19247, 208.86894, 10.837954, + -243.21066, 220.56078, 20.73184, 256.45264, 66.64482, + 116.55112, -262.37643, 88.037315, 129.05185 ]]]) + self.std = np.array([[[ 48.352272, 68.6015, 119.17078, 47.49278, 69.03037, 120.7153, + 9.933628, 16.11266, 32.15347, 81.19508, 148.52235, 160.55476, + 78.806435, 148.95927, 161.60782, 15.046006, 26.999517, 41.232426, + 90.12439, 126.81438, 174.0965, 87.97808, 128.31987, 173.8965, + 38.010742, 43.834454, 91.36834, 28.467258, 66.382034, 72.003075, + 26.970959, 66.33471, 69.758385, 48.895977, 62.34938, 100.590385, + 33.747925, 76.94056, 85.15882, 32.314583, 77.06175, 83.645386, + 80.88272, 109.25045, 123.5628, 79.029915, 115.18032, 127.966995, + 158.545, 217.86617, 164.67949, 156.79645, 235.94897, 175.8384 ]]]) + +class Human36m_values(): + def __init__(self) -> None: + self.dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + + self.mean = np.array([[[-107.9520, -334.9428, 159.4178, -59.2900, -708.5010, 61.4318, + -61.6391, -757.3103, 189.2157, -72.0266, -761.0327, 251.2684, + 151.7538, -326.8112, 161.4840, 134.0947, -709.5225, 71.7927, + 153.4157, -744.0466, 200.7195, 163.8421, -737.7441, 261.8018, + -17.9565, 210.8857, -12.5731, -30.5735, 429.6271, 36.1767, + -43.2606, 489.0777, 114.7583, -54.7775, 578.9327, 88.4990, + 108.9527, 394.7472, 26.0654, 237.0195, 213.8401, 44.9180, + 188.2216, 135.0727, 139.9878, 152.3083, 163.3067, 155.1163, + 196.3242, 118.3158, 182.5405, -163.6815, 375.3079, 23.2578, + -266.1268, 186.6490, 53.2938, -217.2098, 156.2352, 160.8916, + -200.4095, 191.2718, 165.2301, -223.5151, 149.2325, 211.9896]]]) + self.std = np.array([[[65.4117, 166.9468, 160.5147, 109.2458, 295.7622, 210.9699, 122.5746, + 308.4443, 228.9709, 131.0754, 310.0372, 235.9644, 74.9162, 174.3366, + 163.9575, 129.1666, 296.0691, 209.0041, 154.0681, 305.1154, 224.3635, + 165.3411, 304.1239, 230.2749, 19.6905, 71.2422, 64.0733, 52.6362, + 150.2302, 141.1058, 68.3720, 177.7844, 164.2342, 78.0215, 203.7356, + 192.8816, 47.0527, 137.0687, 138.8337, 72.1145, 127.8964, 170.1875, + 151.9798, 210.0934, 199.3142, 155.3852, 219.3135, 193.1652, 191.3546, + 254.2903, 225.2465, 45.0912, 135.5994, 133.7429, 74.3784, 133.9870, + 160.7077, 143.9800, 235.9862, 196.2391, 147.1276, 232.4836, 188.2000, + 189.1858, 308.0274, 235.1181]]]) + + index_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + self.index_to_ignore = joint_to_index(index_to_ignore) + + index_to_equal = np.array([13, 19, 22, 13, 27, 30]) + self.index_to_equal = joint_to_index(index_to_equal) + + index_to_copy = np.array([0, 1, 6, 11]) + self.index_to_copy = joint_to_index(index_to_copy) + +human36m = Human36m_values() +amass_3dpw = AMASS_3DPW_values() + + +class Human36m_Preprocess(nn.Module): + def __init__(self, args): + super(Human36m_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + observed_pose = observed_pose[:, :, human36m.dim_used] + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class Human36m_Postprocess(nn.Module): + def __init__(self, args): + super(Human36m_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + + x = torch.zeros([pred_pose.shape[0], pred_pose.shape[1], 96]).to(self.args.device) + x[:, :, human36m.dim_used] = pred_pose + x[:, :, human36m.index_to_copy] = observed_pose[:, -1:, human36m.index_to_copy] + x[:, :, human36m.index_to_ignore] = x[:, :, human36m.index_to_equal] + return x + +class AMASS_3DPW_Preprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class AMASS_3DPW_Postprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + return pred_pose + +class Preprocess(nn.Module): + def __init__(self, args): + super(Preprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, normal=True): + return observed_pose + +class Postprocess(nn.Module): + def __init__(self, args): + super(Postprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, pred_pose, normal=True): + return pred_pose \ No newline at end of file diff --git a/models/sts_gcn/__init__.py b/models/sts_gcn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/sts_gcn/data_proc.py b/models/sts_gcn/data_proc.py new file mode 100644 index 0000000..50eeae1 --- /dev/null +++ b/models/sts_gcn/data_proc.py @@ -0,0 +1,145 @@ +import numpy as np +import torch +from torch import nn + + +def joint_to_index(x): + return np.concatenate((x * 3, x * 3 + 1, x * 3 + 2)) + + +class AMASS_3DPW_values(): + def __init__(self) -> None: + self.mean = np.array([[[ 144.42061, -471.00433, 42.905945, -144.2189, -471.00433, + 55.37049, 5.0257893, 235.50217, 6.188506, 131.57523, + -916.6374, -73.56259, -129.137, -914.8206, -67.33688, + 5.8155527, 278.29196, 43.45168, 189.25381, -942.00867, + 31.58287, -185.46811, -942.00867, 38.146023, 1.6395822, + 504.07196, 65.04476, 90.96787, 467.06006, 46.06653, + -79.57573, 464.56763, 35.583405, 5.7321978, 544.3716, + 132.00195, 189.30196, 464.18073, 46.495617, -181.78586, + 461.8248, 38.285446, 242.19247, 208.86894, 10.837954, + -243.21066, 220.56078, 20.73184, 256.45264, 66.64482, + 116.55112, -262.37643, 88.037315, 129.05185 ]]]) + self.std = np.array([[[ 48.352272, 68.6015, 119.17078, 47.49278, 69.03037, 120.7153, + 9.933628, 16.11266, 32.15347, 81.19508, 148.52235, 160.55476, + 78.806435, 148.95927, 161.60782, 15.046006, 26.999517, 41.232426, + 90.12439, 126.81438, 174.0965, 87.97808, 128.31987, 173.8965, + 38.010742, 43.834454, 91.36834, 28.467258, 66.382034, 72.003075, + 26.970959, 66.33471, 69.758385, 48.895977, 62.34938, 100.590385, + 33.747925, 76.94056, 85.15882, 32.314583, 77.06175, 83.645386, + 80.88272, 109.25045, 123.5628, 79.029915, 115.18032, 127.966995, + 158.545, 217.86617, 164.67949, 156.79645, 235.94897, 175.8384 ]]]) + +class Human36m_values(): + def __init__(self) -> None: + self.dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + + self.mean = np.array([[[-107.9520, -334.9428, 159.4178, -59.2900, -708.5010, 61.4318, + -61.6391, -757.3103, 189.2157, -72.0266, -761.0327, 251.2684, + 151.7538, -326.8112, 161.4840, 134.0947, -709.5225, 71.7927, + 153.4157, -744.0466, 200.7195, 163.8421, -737.7441, 261.8018, + -17.9565, 210.8857, -12.5731, -30.5735, 429.6271, 36.1767, + -43.2606, 489.0777, 114.7583, -54.7775, 578.9327, 88.4990, + 108.9527, 394.7472, 26.0654, 237.0195, 213.8401, 44.9180, + 188.2216, 135.0727, 139.9878, 152.3083, 163.3067, 155.1163, + 196.3242, 118.3158, 182.5405, -163.6815, 375.3079, 23.2578, + -266.1268, 186.6490, 53.2938, -217.2098, 156.2352, 160.8916, + -200.4095, 191.2718, 165.2301, -223.5151, 149.2325, 211.9896]]]) + self.std = np.array([[[65.4117, 166.9468, 160.5147, 109.2458, 295.7622, 210.9699, 122.5746, + 308.4443, 228.9709, 131.0754, 310.0372, 235.9644, 74.9162, 174.3366, + 163.9575, 129.1666, 296.0691, 209.0041, 154.0681, 305.1154, 224.3635, + 165.3411, 304.1239, 230.2749, 19.6905, 71.2422, 64.0733, 52.6362, + 150.2302, 141.1058, 68.3720, 177.7844, 164.2342, 78.0215, 203.7356, + 192.8816, 47.0527, 137.0687, 138.8337, 72.1145, 127.8964, 170.1875, + 151.9798, 210.0934, 199.3142, 155.3852, 219.3135, 193.1652, 191.3546, + 254.2903, 225.2465, 45.0912, 135.5994, 133.7429, 74.3784, 133.9870, + 160.7077, 143.9800, 235.9862, 196.2391, 147.1276, 232.4836, 188.2000, + 189.1858, 308.0274, 235.1181]]]) + + index_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + self.index_to_ignore = joint_to_index(index_to_ignore) + + index_to_equal = np.array([13, 19, 22, 13, 27, 30]) + self.index_to_equal = joint_to_index(index_to_equal) + + index_to_copy = np.array([0, 1, 6, 11]) + self.index_to_copy = joint_to_index(index_to_copy) + +human36m = Human36m_values() +amass_3dpw = AMASS_3DPW_values() + + +class Human36m_Preprocess(nn.Module): + def __init__(self, args): + super(Human36m_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + observed_pose = observed_pose[:, :, human36m.dim_used] + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class Human36m_Postprocess(nn.Module): + def __init__(self, args): + super(Human36m_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(human36m.mean).to(args.device).float() + self.std = torch.tensor(human36m.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + + x = torch.zeros([pred_pose.shape[0], pred_pose.shape[1], 96]).to(self.args.device) + x[:, :, human36m.dim_used] = pred_pose + x[:, :, human36m.index_to_copy] = observed_pose[:, -1:, human36m.index_to_copy] + x[:, :, human36m.index_to_ignore] = x[:, :, human36m.index_to_equal] + return x + +class AMASS_3DPW_Preprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Preprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, normal=True): + if normal: + observed_pose = (observed_pose - self.mean) / self.std + return observed_pose + + +class AMASS_3DPW_Postprocess(nn.Module): + def __init__(self, args): + super(AMASS_3DPW_Postprocess, self).__init__() + self.args = args + self.mean = torch.tensor(amass_3dpw.mean).to(args.device).float() + self.std = torch.tensor(amass_3dpw.std).to(args.device).float() + + def forward(self, observed_pose, pred_pose, normal=True): + if normal: + pred_pose = (pred_pose * self.std) + self.mean + return pred_pose + +class Preprocess(nn.Module): + def __init__(self, args): + super(Preprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, normal=True): + return observed_pose + +class Postprocess(nn.Module): + def __init__(self, args): + super(Postprocess, self).__init__() + self.args = args + + def forward(self, observed_pose, pred_pose, normal=True): + return pred_pose \ No newline at end of file diff --git a/models/sts_gcn/sts_gcn.py b/models/sts_gcn/sts_gcn.py new file mode 100644 index 0000000..9146081 --- /dev/null +++ b/models/sts_gcn/sts_gcn.py @@ -0,0 +1,264 @@ +import math + +import torch +from torch import nn + +from .data_proc import Human36m_Postprocess, Human36m_Preprocess +from .data_proc import AMASS_3DPW_Postprocess, AMASS_3DPW_Preprocess +from .data_proc import Postprocess, Preprocess + + +class ConvTemporalGraphical(nn.Module): + # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py + r"""The basic module for applying a graph convolution. + Args: + in_channels (int): Number of channels in the input sequence data + out_channels (int): Number of channels produced by the convolution + kernel_size (int): Size of the graph convolving kernel + t_kernel_size (int): Size of the temporal convolving kernel + t_stride (int, optional): Stride of the temporal convolution. Default: 1 + t_padding (int, optional): Temporal zero-padding added to both sides of + the input. Default: 0 + t_dilation (int, optional): Spacing between temporal kernel elements. + Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. + Default: ``True`` + Shape: + - Input: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Output: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + """ + + def __init__(self, + time_dim, + joints_dim + ): + super(ConvTemporalGraphical, self).__init__() + + self.A = nn.Parameter(torch.FloatTensor(time_dim, joints_dim, + joints_dim)) # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix) + stdv = 1. / math.sqrt(self.A.size(1)) + self.A.data.uniform_(-stdv, stdv) + + self.T = nn.Parameter(torch.FloatTensor(joints_dim, time_dim, time_dim)) + stdv = 1. / math.sqrt(self.T.size(1)) + self.T.data.uniform_(-stdv, stdv) + ''' + self.prelu = nn.PReLU() + + self.Z=nn.Parameter(torch.FloatTensor(joints_dim, joints_dim, time_dim, time_dim)) + stdv = 1. / math.sqrt(self.Z.size(2)) + self.Z.data.uniform_(-stdv,stdv) + ''' + + def forward(self, x): + x = torch.einsum('nctv,vtq->ncqv', (x, self.T)) + ## x=self.prelu(x) + x = torch.einsum('nctv,tvw->nctw', (x, self.A)) + ## x = torch.einsum('nctv,wvtq->ncqw', (x, self.Z)) + return x.contiguous() + + +class ST_GCNN_layer(nn.Module): + """ + Shape: + - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format + - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + :in_channels= dimension of coordinates + : out_channels=dimension of coordinates + + + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + time_dim, + joints_dim, + dropout, + bias=True): + + super(ST_GCNN_layer, self).__init__() + self.kernel_size = kernel_size + assert self.kernel_size[0] % 2 == 1 + assert self.kernel_size[1] % 2 == 1 + padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2) + + self.gcn = ConvTemporalGraphical(time_dim, joints_dim) # the convolution layer + + self.tcn = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels, + (self.kernel_size[0], self.kernel_size[1]), + (stride, stride), + padding, + ), + nn.BatchNorm2d(out_channels), + nn.Dropout(dropout, inplace=True), + ) + + if stride != 1 or in_channels != out_channels: + + self.residual = nn.Sequential(nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=(1, 1)), + nn.BatchNorm2d(out_channels), + ) + + + else: + self.residual = nn.Identity() + + self.prelu = nn.PReLU() + + def forward(self, x): + # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size) + res = self.residual(x) + x = self.gcn(x) + x = self.tcn(x) + x = x + res + x = self.prelu(x) + return x + + +class CNN_layer( + nn.Module): # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension) + + def __init__(self, + in_channels, + out_channels, + kernel_size, + dropout, + bias=True): + super(CNN_layer, self).__init__() + self.kernel_size = kernel_size + padding = ( + (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained + assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1 + + self.block = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding) + , nn.BatchNorm2d(out_channels), nn.Dropout(dropout, inplace=True)] + + self.block = nn.Sequential(*self.block) + + def forward(self, x): + output = self.block(x) + return output + + +class STsGCN(nn.Module): + """ + Shape: + - Input[0]: Input sequence in :math:`(N, in_channels,T_in, V)` format + - Output[0]: Output sequence in :math:`(N,T_out,in_channels, V)` format + where + :math:`N` is a batch size, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + :in_channels=number of channels for the coordiantes(default=3) + + + """ + + # def __init__(self, + # input_channels, + # input_time_frame, + # output_time_frame, + # st_gcnn_dropout, + # joints_to_consider, + # n_txcnn_layers, + # txc_kernel_size, + # txc_dropout, + # bias=True): + def __init__(self, args): + super(STsGCN, self).__init__() + + self.args = args + + if args.pre_post_process == 'human3.6m': + self.preprocess = Human36m_Preprocess(args).to(args.device) + self.postprocess = Human36m_Postprocess(args).to(args.device) + elif args.pre_post_process == 'AMASS' or args.pre_post_process == '3DPW': + self.preprocess = AMASS_3DPW_Preprocess(args).to(args.device) + self.postprocess = AMASS_3DPW_Postprocess(args).to(args.device) + else: + self.preprocess = Preprocess(args).to(args.device) + self.postprocess = Postprocess(args).to(args.device) + + for p in self.preprocess.parameters(): + p.requires_grad = False + + for p in self.postprocess.parameters(): + p.requires_grad = False + + input_channels = args.keypoint_dim + input_time_frame = args.obs_frames_num + output_time_frame = args.pred_frames_num + joints_to_consider = args.n_major_joints + st_gcnn_dropout = args.st_gcnn_dropout + n_txcnn_layers = args.n_txcnn_layers + txc_kernel_size = args.txc_kernel_size + txc_dropout = args.txc_dropout + + self.st_gcnns = nn.ModuleList() + self.n_txcnn_layers = n_txcnn_layers + self.txcnns = nn.ModuleList() + + self.st_gcnns.append(ST_GCNN_layer(input_channels, 64, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + self.st_gcnns.append(ST_GCNN_layer(64, 32, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + self.st_gcnns.append(ST_GCNN_layer(32, 64, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + self.st_gcnns.append(ST_GCNN_layer(64, input_channels, [1, 1], 1, input_time_frame, + joints_to_consider, st_gcnn_dropout)) + + # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V) + self.txcnns.append(CNN_layer(input_time_frame, output_time_frame, txc_kernel_size, + txc_dropout)) # with kernel_size[3,3] the dimensinons of C,V will be maintained + for i in range(1, n_txcnn_layers): + self.txcnns.append(CNN_layer(output_time_frame, output_time_frame, txc_kernel_size, txc_dropout)) + + self.prelus = nn.ModuleList() + for j in range(n_txcnn_layers): + self.prelus.append(nn.PReLU()) + + def forward(self, batch): + + x = self.preprocess(batch['observed_pose'], True) + + x = x.view(-1, + self.args.obs_frames_num, + self.args.n_major_joints, + self.args.keypoint_dim).permute(0, 3, 1, 2) + + for gcn in self.st_gcnns: + x = gcn(x) + + x = x.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV) + + x = self.prelus[0](self.txcnns[0](x)) + + for i in range(1, self.args.n_txcnn_layers): + x = self.prelus[i](self.txcnns[i](x)) + x # residual connection + + x = x.permute(0, 1, 3, 2).reshape(-1, self.args.pred_frames_num, + self.args.n_major_joints * self.args.keypoint_dim) + + x = self.postprocess(batch['observed_pose'], x, True) + return {'pred_pose': x} diff --git a/models/sts_gcn/utils/__init__.py b/models/sts_gcn/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/sts_gcn/utils/amass_3d.py b/models/sts_gcn/utils/amass_3d.py new file mode 100644 index 0000000..afb74ae --- /dev/null +++ b/models/sts_gcn/utils/amass_3d.py @@ -0,0 +1,217 @@ +from torch.utils.data import Dataset,DataLoader +import numpy as np +#from h5py import File +#import scipy.io as sio +from matplotlib import pyplot as plt +import torch +import os +from utils.ang2joint import * +import networkx as nx + +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/amass3d.py +''' + + +class Datasets(Dataset): + + def __init__(self,parser.data_dir,parser.input_n,parser.output_n,parser.skip_rate, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.path_to_data = os.path.join(parser.data_dir,'AMASS') # "D:\data\AMASS\\" + self.split = split + self.in_n = parser.input_n + self.out_n = parser.output_n + # self.sample_rate = opt.sample_rate + self.p3d = [] + self.keys = [] + self.data_idx = [] + self.joint_used = np.arange(4, 22) # start from 4 for 17 joints, removing the non moving ones + seq_len = self.in_n + self.out_n + + amass_splits = [ + ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], + ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], + ['BioMotionLab_NTroje'], + ] + # amass_splits = [['BioMotionLab_NTroje'], ['HumanEva'], ['SSM_synced']] + # amass_splits = [['HumanEva'], ['HumanEva'], ['HumanEva']] + # amass_splits[0] = list( + # set(amass_splits[0]).difference(set(amass_splits[1] + amass_splits[2]))) + + # from human_body_prior.body_model.body_model import BodyModel + # from smplx import lbs + # root_path = os.path.dirname(__file__) + # bm_path = root_path[:-6] + '/body_models/smplh/neutral/model.npz' + # bm = BodyModel(bm_path=bm_path, num_betas=16, batch_size=1, model_type='smplh') + # beta_mean = np.array([0.41771687, 0.25984767, 0.20500051, 0.13503872, 0.25965645, -2.10198147, -0.11915666, + # -0.5498772, 0.30885323, 1.4813145, -0.60987528, 1.42565269, 2.45862726, 0.23001716, + # -0.64180912, 0.30231911]) + # beta_mean = torch.from_numpy(beta_mean).unsqueeze(0).float() + # # Add shape contribution + # v_shaped = bm.v_template + lbs.blend_shapes(beta_mean, bm.shapedirs) + # # Get the joints + # # NxJx3 array + # p3d0 = lbs.vertices2joints(bm.J_regressor, v_shaped) # [1,52,3] + # p3d0 = (p3d0 - p3d0[:, 0:1, :]).float().cuda().cpu().data.numpy() + # parents = bm.kintree_table.data.numpy()[0, :] + # np.savez_compressed('smpl_skeleton.npz', p3d0=p3d0, parents=parents) + + # load mean skeleton + skel = np.load('./body_models/smpl_skeleton.npz') + p3d0 = torch.from_numpy(skel['p3d0']).float().cuda() + parents = skel['parents'] + parent = {} + for i in range(len(parents)): + parent[i] = parents[i] + n = 0 + for ds in amass_splits[split]: + if not os.path.isdir(self.path_to_data + ds): + print(ds) + continue + print('>>> loading {}'.format(ds)) + for sub in os.listdir(self.path_to_data + ds): + if not os.path.isdir(self.path_to_data + ds + '/' + sub): + continue + for act in os.listdir(self.path_to_data + ds + '/' + sub): + if not act.endswith('.npz'): + continue + # if not ('walk' in act or 'jog' in act or 'run' in act or 'treadmill' in act): + # continue + pose_all = np.load(self.path_to_data + ds + '/' + sub + '/' + act) + try: + poses = pose_all['poses'] + except: + print('no poses at {}_{}_{}'.format(ds, sub, act)) + continue + frame_rate = pose_all['mocap_framerate'] + # gender = pose_all['gender'] + # dmpls = pose_all['dmpls'] + # betas = pose_all['betas'] + # trans = pose_all['trans'] + fn = poses.shape[0] + sample_rate = int(frame_rate // 25) + fidxs = range(0, fn, sample_rate) + fn = len(fidxs) + poses = poses[fidxs] + poses = torch.from_numpy(poses).float().cuda() + poses = poses.reshape([fn, -1, 3]) + # remove global rotation + poses[:, 0] = 0 + p3d0_tmp = p3d0.repeat([fn, 1, 1]) + p3d = ang2joint(p3d0_tmp, poses, parent) + # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy() + self.p3d.append(p3d.cpu().data.numpy()) + if split == 2: + valid_frames = np.arange(0, fn - seq_len + 1, parser.skip_rate) + else: + valid_frames = np.arange(0, fn - seq_len + 1, parser.skip_rate) + + # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames) + self.keys.append((ds, sub, act)) + tmp_data_idx_1 = [n] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + n += 1 + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.p3d[key][fs] # , key + + +# In[12]: + + +def normalize_A(A): # given an adj.matrix, normalize it by multiplying left and right with the degree matrix, in the -1/2 power + + A=A+np.eye(A.shape[0]) + + D=np.sum(A,axis=0) + + + D=np.diag(D.A1) + + + D_inv = D**-0.5 + D_inv[D_inv==np.infty]=0 + + return D_inv*A*D_inv + + +# In[ ]: + + +def spatio_temporal_graph(joints_to_consider,temporal_kernel_size,spatial_adjacency_matrix): # given a normalized spatial adj.matrix,creates a spatio-temporal adj.matrix + + + number_of_joints=joints_to_consider + + spatio_temporal_adj=np.zeros((temporal_kernel_size,number_of_joints,number_of_joints)) + for t in range(temporal_kernel_size): + for i in range(number_of_joints): + spatio_temporal_adj[t,i,i]=1 # create edge between same body joint,for t consecutive frames + for j in range(number_of_joints): + if spatial_adjacency_matrix[i,j]!=0: # if the body joints are connected + spatio_temporal_adj[t,i,j]=spatial_adjacency_matrix[i,j] + return spatio_temporal_adj + + +# In[20]: + + +def get_adj_AMASS(joints_to_consider,temporal_kernel_size): # returns adj.matrix to be fed to the network + if joints_to_consider==22: + edgelist = [ + (0, 1), (0, 2), #(0, 3), + (1, 4), (5, 2), #(3, 6), + (7, 4), (8, 5), #(6, 9), + (7, 10), (8, 11), #(9, 12), + #(12, 13), (12, 14), + (12, 15), + #(13, 16), (12, 16), (14, 17), (12, 17), + (12, 16), (12, 17), + (16, 18), (19, 17), (20, 18), (21, 19), + #(22, 20), #(23, 21),#wrists + (1, 16), (2, 17)] + + # create a graph + G=nx.Graph() + G.add_edges_from(edgelist) + # create adjacency matrix + A = nx.adjacency_matrix(G,nodelist=list(range(0,joints_to_consider))).todense() + #normalize adjacency matrix + A=normalize_A(A) + return torch.Tensor(spatio_temporal_graph(joints_to_consider,temporal_kernel_size,A)) + + +# In[23]: + + +def mpjpe_error(batch_pred,batch_gt): + #assert batch_pred.requires_grad==True + #assert batch_gt.requires_grad==False + + + batch_pred=batch_pred.contiguous().view(-1,3) + batch_gt=batch_gt.contiguous().view(-1,3) + + return torch.mean(torch.norm(batch_gt-batch_pred,2,1)) + + +# In[ ]: + + + + diff --git a/models/sts_gcn/utils/amass_3d_viz.py b/models/sts_gcn/utils/amass_3d_viz.py new file mode 100644 index 0000000..2e8db72 --- /dev/null +++ b/models/sts_gcn/utils/amass_3d_viz.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# coding: utf-8 + + +import numpy as np +import torch +from torch.utils.data import DataLoader +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.animation as animation +#from utils.amass_3d import * +from utils.dpw3d import * # choose dataset to visualize on the dataset class that we import +from utils.loss_funcs import mpjpe_error + + +# In[10]: + + +def create_pose(ax,plots,vals,pred=True,update=False): + connect = [ + (0, 1), (0, 2), #(0, 3), + (1, 4), (5, 2), #(3, 6), + (7, 4), (8, 5), #(6, 9), + (7, 10), (8, 11), #(9, 12), + # (12, 13), (12, 14), + (12, 15), + #(13, 16), (12, 16), (14, 17), (12, 17), + (12, 16), (12, 17), + (16, 18), (19, 17), (20, 18), (21, 19), + # (22, 20), (23, 21),# wrists + (1, 16), (2, 17)] + + + + LR = np.array([ + False, + True, False, + False, + True, False, + False, + True, False, + False, + True, False, + False, + True, False, + True, True, + False, + True, False, + True, False, + True, False, + True, False]) + +# Start and endpoints of our representation + I = np.array([touple[0] for touple in connect]) + J = np.array([touple[1] for touple in connect]) +# Left / right indicator + LR = np.array([LR[a] or LR[b] for a,b in connect]) + if pred: + lcolor = "#9b59b6" + rcolor = "#2ecc71" + else: + lcolor = "#8e8e8e" + rcolor = "#383838" + + for i in np.arange( len(I) ): + x = np.array( [vals[I[i], 0], vals[J[i], 0]] ) + z = np.array( [vals[I[i], 1], vals[J[i], 1]] ) + y = np.array( [vals[I[i], 2], vals[J[i], 2]] ) + if not update: + + if i ==0: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--' ,c=lcolor if LR[i] else rcolor,label=['GT' if not pred else 'Pred'])) + else: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--', c=lcolor if LR[i] else rcolor)) + + elif update: + plots[i][0].set_xdata(x) + plots[i][0].set_ydata(y) + plots[i][0].set_3d_properties(z) + plots[i][0].set_color(lcolor if LR[i] else rcolor) + + return plots + # ax.legend(loc='lower left') + + +# In[11]: + + +def update(num,data_gt,data_pred,plots_gt,plots_pred,fig,ax): + + gt_vals=data_gt[num] + pred_vals=data_pred[num] + plots_gt=create_pose(ax,plots_gt,gt_vals,pred=False,update=True) + plots_pred=create_pose(ax,plots_pred,pred_vals,pred=True,update=True) + + + + + + r = 0.75 + xroot, zroot, yroot = gt_vals[0,0], gt_vals[0,1], gt_vals[0,2] + ax.set_xlim3d([-r+xroot, r+xroot]) + ax.set_zlim3d([-r+zroot, r+zroot]) + ax.set_ylim3d([-r+yroot, r+yroot]) + #ax.set_title('pose at time frame: '+str(num)) + #ax.set_aspect('equal') + + return plots_gt,plots_pred + + + +# In[12]: + + +def visualize(input_n,output_n,visualize_from,path,modello,device,n_viz,skip_rate): + + if visualize_from=='train': + loader=Datasets(path,input_n,output_n,skip_rate,split=0) + elif visualize_from=='validation': + loader=Datasets(path,input_n,output_n,skip_rate,split=1) + elif visualize_from=='test': + loader=Datasets(path,input_n,output_n,skip_rate,split=2) + + joint_used=np.arange(4,22) + + full_joint_used=np.arange(0,22) + + + loader = DataLoader( + loader, + batch_size=1, + shuffle = True, + num_workers=0) + + + + for cnt,batch in enumerate(loader): + batch = batch.float().to(device) # multiply by 1000 for milimeters + sequences_train=batch[:,0:input_n,joint_used,:].permute(0,3,1,2) + sequences_predict_gt=batch[:,input_n:input_n+output_n,full_joint_used,:] + + sequences_predict=modello(sequences_train).permute(0,1,3,2) + + all_joints_seq=sequences_predict_gt.clone() + + all_joints_seq[:,:,joint_used,:]=sequences_predict + + loss=mpjpe_error(all_joints_seq,sequences_predict_gt)*1000# # both must have format (batch,T,V,C) + + data_pred=torch.squeeze(all_joints_seq,0).cpu().data.numpy() + data_gt=torch.squeeze(sequences_predict_gt,0).cpu().data.numpy() + + + fig = plt.figure() + ax = Axes3D(fig) + vals = np.zeros((22, 3)) + gt_plots=[] + pred_plots=[] + + gt_plots=create_pose(ax,gt_plots,vals,pred=False,update=False) + pred_plots=create_pose(ax,pred_plots,vals,pred=True,update=False) + + ax.set_xlabel("x") + ax.set_ylabel("y") + ax.set_zlabel("z") + ax.legend(loc='lower left') + + + + ax.set_xlim3d([-1, 1.5]) + ax.set_xlabel('X') + + ax.set_ylim3d([-1, 1.5]) + ax.set_ylabel('Y') + + ax.set_zlim3d([0.0, 1.5]) + ax.set_zlabel('Z') + ax.set_title('loss in mm is: '+str(round(loss.item(),4))+' for frames= '+str(output_n)) + + line_anim = animation.FuncAnimation(fig, update, output_n, fargs=(data_gt,data_pred,gt_plots,pred_plots + ,fig,ax),interval=70, blit=False) + plt.show() + + # line_anim.save('amass_3d.gif') + + + if cnt==n_viz-1: + break + diff --git a/models/sts_gcn/utils/ang2joint.py b/models/sts_gcn/utils/ang2joint.py new file mode 100644 index 0000000..bfa0cc3 --- /dev/null +++ b/models/sts_gcn/utils/ang2joint.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch + +''' +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/ang2joint.py +''' + +def ang2joint(p3d0, pose, + parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, + 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): + """ + :param p3d0:[batch_size, joint_num, 3] + :param pose:[batch_size, joint_num, 3] + :param parent: + :return: + """ + # model_path = './model.npz' + # params = np.load(model_path, allow_pickle=True) + # kintree_table = params['kintree_table'] + batch_num = p3d0.shape[0] + # id_to_col = {kintree_table[1, i]: i + # for i in range(kintree_table.shape[1])} + # parent = { + # i: id_to_col[kintree_table[0, i]] + # for i in range(1, kintree_table.shape[1]) + # } + # parent = {1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13, + # 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21} + jnum = len(parent.keys()) + # v_shaped = torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template + # J = torch.matmul(self.J_regressor, v_shaped) + # face_J = v_shaped[:, [333, 2801, 6261], :] + J = p3d0 + R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) + results = [] + results.append( + with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) + ) + # for i in range(1, kintree_table.shape[1]): + for i in range(1, jnum): + results.append( + torch.matmul( + results[parent[i]], + with_zeros( + torch.cat( + (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), + dim=2 + ) + ) + ) + ) + + stacked = torch.stack(results, dim=1) + J_transformed = stacked[:, :, :3, 3] + return J_transformed + + +# In[ ]: + + +def rodrigues(r): + """ + Rodrigues' rotation formula that turns axis-angle tensor into rotation + matrix in a batch-ed manner. + Parameter: + ---------- + r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3]. + Return: + ------- + Rotation matrix of shape [batch_size * angle_num, 3, 3]. + """ + eps = r.clone().normal_(std=1e-8) + theta = torch.norm(r + eps, dim=(1, 2), keepdim=True) + # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple + theta_dim = theta.shape[0] + r_hat = r / theta + cos = torch.cos(theta) + z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device) + m = torch.stack( + (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick, + -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1) + m = torch.reshape(m, (-1, 3, 3)) + i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device) + A = r_hat.permute(0, 2, 1) + dot = torch.matmul(A, r_hat) + R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m + return R + + +# In[ ]: + + +def with_zeros(x): + """ + Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. + Parameter: + --------- + x: Tensor to be appended. + Return: + ------ + Tensor after appending of shape [4,4] + """ + ones = torch.tensor( + [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float + ).expand(x.shape[0], -1, -1).to(x.device) + ret = torch.cat((x, ones), dim=1) + return ret + + +def pack(x): + """ + Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor. + Parameter: + ---------- + x: A tensor of shape [batch_size, 4, 1] + Return: + ------ + A tensor of shape [batch_size, 4, 4] after appending. + """ + zeros43 = torch.zeros( + (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device) + ret = torch.cat((zeros43, x), dim=3) + return ret + diff --git a/models/sts_gcn/utils/data_utils.py b/models/sts_gcn/utils/data_utils.py new file mode 100644 index 0000000..1bec35d --- /dev/null +++ b/models/sts_gcn/utils/data_utils.py @@ -0,0 +1,663 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import torch +# from torch.autograd.variable import Variable +import os +from . import forward_kinematics + + +def rotmat2euler(R): + """ + Converts a rotation matrix to Euler angles + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 + + Args + R: a 3x3 rotation matrix + Returns + eul: a 3x1 Euler angle representation of R + """ + if R[0, 2] == 1 or R[0, 2] == -1: + # special case + E3 = 0 # set arbitrarily + dlta = np.arctan2(R[0, 1], R[0, 2]); + + if R[0, 2] == -1: + E2 = np.pi / 2; + E1 = E3 + dlta; + else: + E2 = -np.pi / 2; + E1 = -E3 + dlta; + + else: + E2 = -np.arcsin(R[0, 2]) + E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) + E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) + + eul = np.array([E1, E2, E3]); + return eul + + +def rotmat2quat(R): + """ + Converts a rotation matrix to a quaternion + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 + + Args + R: 3x3 rotation matrix + Returns + q: 1x4 quaternion + """ + rotdiff = R - R.T; + + r = np.zeros(3) + r[0] = -rotdiff[1, 2] + r[1] = rotdiff[0, 2] + r[2] = -rotdiff[0, 1] + sintheta = np.linalg.norm(r) / 2; + r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); + + costheta = (np.trace(R) - 1) / 2; + + theta = np.arctan2(sintheta, costheta); + + q = np.zeros(4) + q[0] = np.cos(theta / 2) + q[1:] = r0 * np.sin(theta / 2) + return q + + +def rotmat2expmap(R): + return quat2expmap(rotmat2quat(R)); + + +def expmap2rotmat(r): + """ + Converts an exponential map angle to a rotation matrix + Matlab port to python for evaluation purposes + I believe this is also called Rodrigues' formula + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m + + Args + r: 1x3 exponential map + Returns + R: 3x3 rotation matrix + """ + theta = np.linalg.norm(r) + r0 = np.divide(r, theta + np.finfo(np.float32).eps) + r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) + r0x = r0x - r0x.T + R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); + return R + + +def quat2expmap(q): + """ + Converts a quaternion to an exponential map + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 + + Args + q: 1x4 quaternion + Returns + r: 1x3 exponential map + Raises + ValueError if the l2 norm of the quaternion is not close to 1 + """ + if (np.abs(np.linalg.norm(q) - 1) > 1e-3): + raise (ValueError, "quat2expmap: input quaternion is not norm 1") + + sinhalftheta = np.linalg.norm(q[1:]) + coshalftheta = q[0] + + r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); + theta = 2 * np.arctan2(sinhalftheta, coshalftheta) + theta = np.mod(theta + 2 * np.pi, 2 * np.pi) + + if theta > np.pi: + theta = 2 * np.pi - theta + r0 = -r0 + + r = r0 * theta + return r + + +def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): + """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 + + Args + normalizedData: nxd matrix with normalized data + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + origData: data originally used to + """ + T = normalizedData.shape[0] + D = data_mean.shape[0] + + origData = np.zeros((T, D), dtype=np.float32) + dimensions_to_use = [] + for i in range(D): + if i in dimensions_to_ignore: + continue + dimensions_to_use.append(i) + dimensions_to_use = np.array(dimensions_to_use) + + if one_hot: + origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] + else: + origData[:, dimensions_to_use] = normalizedData + + # potentially ineficient, but only done once per experiment + stdMat = data_std.reshape((1, D)) + stdMat = np.repeat(stdMat, T, axis=0) + meanMat = data_mean.reshape((1, D)) + meanMat = np.repeat(meanMat, T, axis=0) + origData = np.multiply(origData, stdMat) + meanMat + return origData + + +def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): + """ + Converts the output of the neural network to a format that is more easy to + manipulate for, e.g. conversion to other format or visualization + + Args + poses: The output from the TF model. A list with (seq_length) entries, + each with a (batch_size, dim) output + Returns + poses_out: A tensor of size (batch_size, seq_length, dim) output. Each + batch is an n-by-d sequence of poses. + """ + seq_len = len(poses) + if seq_len == 0: + return [] + + batch_size, dim = poses[0].shape + + poses_out = np.concatenate(poses) + poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) + poses_out = np.transpose(poses_out, [1, 0, 2]) + + poses_out_list = [] + for i in xrange(poses_out.shape[0]): + poses_out_list.append( + unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) + + return poses_out_list + + +def readCSVasFloat(filename): + """ + Borrowed from SRNN code. Reads a csv and returns a float matrix. + https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 + + Args + filename: string. Path to the csv file + Returns + returnArray: the read data in a float32 matrix + """ + returnArray = [] + lines = open(filename).readlines() + for line in lines: + line = line.strip().split(',') + if len(line) > 0: + returnArray.append(np.array([np.float32(x) for x in line])) + + returnArray = np.array(returnArray) + return returnArray + + +def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): + """ + Normalize input data by removing unused dimensions, subtracting the mean and + dividing by the standard deviation + + Args + data: nx99 matrix with data to normalize + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dim_to_use: vector with dimensions used by the model + actions: list of strings with the encoded actions + one_hot: whether the data comes with one-hot encoding + Returns + data_out: the passed data matrix, but normalized + """ + data_out = {} + nactions = len(actions) + + if not one_hot: + # No one-hot encoding... no need to do anything special + for key in data.keys(): + data_out[key] = np.divide((data[key] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + + else: + # hard-coding 99 dimensions for un-normalized human poses + for key in data.keys(): + data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) + data_out[key] = data_out[key][:, dim_to_use] + data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) + + return data_out + + +def normalization_stats(completeData): + """" + Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 + + Args + completeData: nx99 matrix with data to normalize + Returns + data_mean: vector of mean used to normalize the data + data_std: vector of standard deviation used to normalize the data + dimensions_to_ignore: vector with dimensions not used by the model + dimensions_to_use: vector with dimensions used by the model + """ + data_mean = np.mean(completeData, axis=0) + data_std = np.std(completeData, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + + data_std[dimensions_to_ignore] = 1.0 + + return data_mean, data_std, dimensions_to_ignore, dimensions_to_use + + +def define_actions(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + if action in actions: + return [action] + + if action == "all": + return actions + + if action == "all_srnn": + return ["walking", "eating", "smoking", "discussion"] + + raise (ValueError, "Unrecognized action: %d" % action) + + +"""all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" + + +def define_actions_cmu(action): + """ + Define the list of actions we are using. + + Args + action: String with the passed action. Could be "all" + Returns + actions: List of strings of actions + Raises + ValueError if the action is not included in H3.6M + """ + + actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", + "washwindow"] + if action in actions: + return [action] + + if action == "all": + return actions + + raise (ValueError, "Unrecognized action: %d" % action) + + +def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + dimensions_to_ignore = [] + dimensions_to_use = [] + dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) + dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): + seq_len = input_n + output_n + nactions = len(actions) + sampled_seq = [] + complete_seq = [] + for action_idx in np.arange(nactions): + action = actions[action_idx] + path = '{}/{}'.format(path_to_dataset, action) + count = 0 + for _ in os.listdir(path): + count = count + 1 + for examp_index in np.arange(count): + filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) + action_sequence = readCSVasFloat(filename) + n, d = action_sequence.shape + exptmps = torch.from_numpy(action_sequence).float() + xyz = expmap2xyz_torch_cmu(exptmps) + xyz = xyz.view(-1, 38 * 3) + xyz = xyz.cpu().data.numpy() + action_sequence = xyz + + even_list = range(0, n, 2) + the_sequence = np.array(action_sequence[even_list, :]) + num_frames = len(the_sequence) + if not is_test: + fs = np.arange(0, num_frames - seq_len + 1) + fs_sel = fs + for i in np.arange(seq_len - 1): + fs_sel = np.vstack((fs_sel, fs + i + 1)) + fs_sel = fs_sel.transpose() + seq_sel = the_sequence[fs_sel, :] + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + else: + source_seq_len = 50 + target_seq_len = 25 + total_frames = source_seq_len + target_seq_len + batch_size = 8 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + for _ in range(batch_size): + idx = rng.randint(0, num_frames - total_frames) + seq_sel = the_sequence[ + idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] + seq_sel = np.expand_dims(seq_sel, axis=0) + if len(sampled_seq) == 0: + sampled_seq = seq_sel + complete_seq = the_sequence + else: + sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) + complete_seq = np.append(complete_seq, the_sequence, axis=0) + + if not is_test: + data_std = np.std(complete_seq, axis=0) + data_mean = np.mean(complete_seq, axis=0) + + joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) + + data_std[dimensions_to_ignore] = 1.0 + data_mean[dimensions_to_ignore] = 0.0 + + return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std + + +def rotmat2euler_torch(R): + """ + Converts a rotation matrix to euler angles + batch pytorch version ported from the corresponding numpy method above + + :param R:N*3*3 + :return: N*3 + """ + n = R.data.shape[0] + eul = torch.zeros(n, 3).float() + idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() + idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() + if len(idx_spec1) > 0: + R_spec1 = R[idx_spec1, :, :] + eul_spec1 = torch.zeros(len(idx_spec1), 3).float() + eul_spec1[:, 2] = 0 + eul_spec1[:, 1] = -np.pi / 2 + delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) + eul_spec1[:, 0] = delta + eul[idx_spec1, :] = eul_spec1 + + if len(idx_spec2) > 0: + R_spec2 = R[idx_spec2, :, :] + eul_spec2 = torch.zeros(len(idx_spec2), 3).float() + eul_spec2[:, 2] = 0 + eul_spec2[:, 1] = np.pi / 2 + delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) + eul_spec2[:, 0] = delta + eul[idx_spec2] = eul_spec2 + + idx_remain = np.arange(0, n) + idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() + if len(idx_remain) > 0: + R_remain = R[idx_remain, :, :] + eul_remain = torch.zeros(len(idx_remain), 3).float() + eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) + eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), + R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) + eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), + R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) + eul[idx_remain, :] = eul_remain + + return eul + + +def rotmat2quat_torch(R): + """ + Converts a rotation matrix to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N * 3 * 3 + :return: N * 4 + """ + rotdiff = R - R.transpose(1, 2) + r = torch.zeros_like(rotdiff[:, 0]) + r[:, 0] = -rotdiff[:, 1, 2] + r[:, 1] = rotdiff[:, 0, 2] + r[:, 2] = -rotdiff[:, 0, 1] + r_norm = torch.norm(r, dim=1) + sintheta = r_norm / 2 + r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) + t1 = R[:, 0, 0] + t2 = R[:, 1, 1] + t3 = R[:, 2, 2] + costheta = (t1 + t2 + t3 - 1) / 2 + theta = torch.atan2(sintheta, costheta) + q = torch.zeros(R.shape[0], 4).float() + q[:, 0] = torch.cos(theta / 2) + q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) + + return q + + +def expmap2quat_torch(exp): + """ + Converts expmap to quaternion + batch pytorch version ported from the corresponding numpy method above + :param R: N*3 + :return: N*4 + """ + theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) + v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) + sinhalf = torch.sin(theta / 2) + coshalf = torch.cos(theta / 2) + q1 = torch.mul(v, sinhalf.repeat(1, 3)) + q = torch.cat((coshalf, q1), dim=1) + return q + + +def expmap2rotmat_torch(r): + """ + Converts expmap matrix to rotation + batch pytorch version ported from the corresponding method above + :param r: N*3 + :return: N*3*3 + """ + theta = torch.norm(r, 2, 1) + r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) + r1 = torch.zeros_like(r0).repeat(1, 3) + r1[:, 1] = -r0[:, 2] + r1[:, 2] = r0[:, 1] + r1[:, 5] = -r0[:, 0] + r1 = r1.view(-1, 3, 3) + r1 = r1 - r1.transpose(1, 2) + n = r1.data.shape[0] + R = torch.eye(3, 3).repeat(n, 1, 1).float() + torch.mul( + torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( + (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) + return R + + +def expmap2xyz_torch(expmap): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() + xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) + return xyz + + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + + +def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 128): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + +def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 4): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + # print("subact1 {}".format(idx_ran1)) + # print("subact2 {}".format(idx_ran2)) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 diff --git a/models/sts_gcn/utils/dpw3d.py b/models/sts_gcn/utils/dpw3d.py new file mode 100644 index 0000000..a274e12 --- /dev/null +++ b/models/sts_gcn/utils/dpw3d.py @@ -0,0 +1,127 @@ +from torch.utils.data import Dataset +import numpy as np +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch +import os +from utils import ang2joint +import pickle as pkl +import os +from os import walk + +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/dpw3d.py +''' + +class Datasets(Dataset): + + def __init__(self,data_dir,input_n,output_n,skip_rate,split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.path_to_data = os.path.join(data_dir,'3dpw/sequenceFiles') + self.split = split + self.in_n = input_n + self.out_n = output_n + #self.sample_rate = opt.sample_rate + self.p3d = [] + self.keys = [] + self.data_idx = [] + self.joint_used = np.arange(4, 22) + seq_len = self.in_n + self.out_n + + if split == 0: + data_path = self.path_to_data + '/train/' + elif split == 2: + data_path = self.path_to_data + '/test/' + elif split == 1: + data_path = self.path_to_data + '/validation/' + files = [] + for (dirpath, dirnames, filenames) in walk(data_path): + files.extend(filenames) + + # from human_body_prior.body_model.body_model import BodyModel + # from smplx import lbs + # root_path = os.path.dirname(__file__) + # bm_path = root_path[:-6] + '/body_models/smplh/neutral/model.npz' + # bm = BodyModel(bm_path=bm_path, num_betas=16, batch_size=1) + # beta_mean = np.array([0.41771687, 0.25984767, 0.20500051, 0.13503872, 0.25965645, -2.10198147, -0.11915666, + # -0.5498772, 0.30885323, 1.4813145, -0.60987528, 1.42565269, 2.45862726, 0.23001716, + # -0.64180912, 0.30231911]) + # beta_mean = torch.from_numpy(beta_mean).unsqueeze(0).float() + # # Add shape contribution + # v_shaped = bm.v_template + lbs.blend_shapes(beta_mean, bm.shapedirs) + # # Get the joints + # # NxJx3 array + # p3d0 = lbs.vertices2joints(bm.J_regressor, v_shaped) # [1,52,3] + # p3d0 = (p3d0 - p3d0[:, 0:1, :]).float().cuda()[:, :22] + # parents = bm.kintree_table.data.numpy()[0, :] + skel = np.load('./body_models/smpl_skeleton.npz') + p3d0 = torch.from_numpy(skel['p3d0']).float().cuda()[:, :22] + parents = skel['parents'] + parent = {} + for i in range(len(parents)): + if i > 21: + break + parent[i] = parents[i] + n = 0 + + sample_rate = int(60 // 25) + + for f in files: + with open(data_path + f, 'rb') as f: + print('>>> loading {}'.format(f)) + data = pkl.load(f, encoding='latin1') + joint_pos = data['poses_60Hz'] + for i in range(len(joint_pos)): + poses = joint_pos[i] + fn = poses.shape[0] + fidxs = range(0, fn, sample_rate) + fn = len(fidxs) + poses = poses[fidxs] + poses = torch.from_numpy(poses).float().cuda() + poses = poses.reshape([fn, -1, 3]) + poses = poses[:, :-2] + # remove global rotation + poses[:, 0] = 0 + p3d0_tmp = p3d0.repeat([fn, 1, 1]) + p3d = ang2joint.ang2joint(p3d0_tmp, poses, parent) + # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy() + self.p3d.append(p3d.cpu().data.numpy()) + # # vis + # import utils.vis_util as vis_util + # from mpl_toolkits.mplot3d import Axes3D + # ax = plt.subplot(111, projection='3d') + # vis_util.draw_skeleton_smpl(ax, self.p3d[0][0], parents=parents[:22]) + + if split == 2: + # valid_frames = np.arange(0, fn - seq_len + 1, opt.skip_rate_test) + # valid_frames = np.arange(0, fn - seq_len + 1, 2) + valid_frames = np.arange(0, fn - seq_len + 1) + else: + valid_frames = np.arange(0, fn - seq_len + 1, skip_rate) + + # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames) + tmp_data_idx_1 = [n] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + n += 1 + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.p3d[key][fs] + + diff --git a/models/sts_gcn/utils/forward_kinematics.py b/models/sts_gcn/utils/forward_kinematics.py new file mode 100644 index 0000000..3b9b1d6 --- /dev/null +++ b/models/sts_gcn/utils/forward_kinematics.py @@ -0,0 +1,288 @@ +import numpy as np +import torch +from torch.autograd.variable import Variable +from . import data_utils + + +def fkl(angles, parent, offset, rotInd, expmapInd): + """ + Convert joint angles and bone lenghts into the 3d points of a person. + + adapted from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 + + which originaly based on expmap2xyz.m, available at + https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m + Args + angles: 99-long vector with 3d position and 3d joint angles in expmap format + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + Returns + xyz: 32x3 3d points that represent a person in 3d space + """ + + assert len(angles) == 99 + + # Structure that indicates parents for each joint + njoints = 32 + xyzStruct = [dict() for x in range(njoints)] + + for i in np.arange(njoints): + + # if not rotInd[i]: # If the list is empty + # xangle, yangle, zangle = 0, 0, 0 + # else: + # xangle = angles[rotInd[i][0] - 1] + # yangle = angles[rotInd[i][1] - 1] + # zangle = angles[rotInd[i][2] - 1] + if i == 0: + xangle = angles[0] + yangle = angles[1] + zangle = angles[2] + thisPosition = np.array([xangle, yangle, zangle]) + else: + thisPosition = np.array([0, 0, 0]) + + r = angles[expmapInd[i]] + + thisRotation = data_utils.expmap2rotmat(r) + + if parent[i] == -1: # Root node + xyzStruct[i]['rotation'] = thisRotation + xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition + else: + xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ + xyzStruct[parent[i]]['xyz'] + xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) + + xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] + xyz = np.array(xyz).squeeze() + # xyz = xyz[:, [0, 2, 1]] + # xyz = xyz[:,[2,0,1]] + + return xyz + + +def _some_variables(): + """ + borrowed from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 + + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, + 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 + + offset = np.array( + [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, + -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, + 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, + 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, + 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, + 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, + 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, + 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[5, 6, 4], + [8, 9, 7], + [11, 12, 10], + [14, 15, 13], + [17, 18, 16], + [], + [20, 21, 19], + [23, 24, 22], + [26, 27, 25], + [29, 30, 28], + [], + [32, 33, 31], + [35, 36, 34], + [38, 39, 37], + [41, 42, 40], + [], + [44, 45, 43], + [47, 48, 46], + [50, 51, 49], + [53, 54, 52], + [56, 57, 55], + [], + [59, 60, 58], + [], + [62, 63, 61], + [65, 66, 64], + [68, 69, 67], + [71, 72, 70], + [74, 75, 73], + [], + [77, 78, 76], + []] + + expmapInd = np.split(np.arange(4, 100) - 1, 32) + + return parent, offset, rotInd, expmapInd + + +def _some_variables_cmu(): + """ + We define some variables that are useful to run the kinematic tree + + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, + 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 + + offset = 70 * np.array( + [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, + 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, + 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, + -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, + 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, + 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, + 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, + 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, + 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, + 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, + -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, + -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[6, 5, 4], + [9, 8, 7], + [12, 11, 10], + [15, 14, 13], + [18, 17, 16], + [21, 20, 19], + [], + [24, 23, 22], + [27, 26, 25], + [30, 29, 28], + [33, 32, 31], + [36, 35, 34], + [], + [39, 38, 37], + [42, 41, 40], + [45, 44, 43], + [48, 47, 46], + [51, 50, 49], + [54, 53, 52], + [], + [57, 56, 55], + [60, 59, 58], + [63, 62, 61], + [66, 65, 64], + [69, 68, 67], + [72, 71, 70], + [], + [75, 74, 73], + [], + [78, 77, 76], + [81, 80, 79], + [84, 83, 82], + [87, 86, 85], + [90, 89, 88], + [93, 92, 91], + [], + [96, 95, 94], + []] + posInd = [] + for ii in np.arange(38): + if ii == 0: + posInd.append([1, 2, 3]) + else: + posInd.append([]) + + expmapInd = np.split(np.arange(4, 118) - 1, 38) + + return parent, offset, posInd, expmapInd + + +def fkl_torch(angles, parent, offset, rotInd, expmapInd): + """ + pytorch version of fkl. + + convert joint angles to joint locations + batch pytorch version of the fkl() method above + :param angles: N*99 + :param parent: + :param offset: + :param rotInd: + :param expmapInd: + :return: N*joint_n*3 + """ + n = angles.data.shape[0] + j_n = offset.shape[0] + p3d = Variable(torch.from_numpy(offset)).float().unsqueeze(0).repeat(n, 1, 1) + angles = angles[:, 3:].contiguous().view(-1, 3) + R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3) + for i in np.arange(1, j_n): + if parent[i] > 0: + R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() + p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] + return p3d + + +def main(): + # Load all the data + parent, offset, rotInd, expmapInd = _some_variables() + + # numpy implementation + # with h5py.File('samples.h5', 'r') as h5f: + # expmap_gt = h5f['expmap/gt/walking_0'][:] + # expmap_pred = h5f['expmap/preds/walking_0'][:] + expmap_pred = np.array( + [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, + -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, + 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, + 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, + -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, + -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + expmap_gt = np.array( + [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, + -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, + 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, + 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, + -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, + -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, + -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 + ]) + xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) + xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) + + exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()) + xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) + xyz = xyz.cpu().data.numpy() + print(xyz) + + +if __name__ == '__main__': + main() diff --git a/models/sts_gcn/utils/h36_3d_viz.py b/models/sts_gcn/utils/h36_3d_viz.py new file mode 100644 index 0000000..647a7cf --- /dev/null +++ b/models/sts_gcn/utils/h36_3d_viz.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python +# coding: utf-8 + +import numpy as np +import torch +from torch.utils.data import DataLoader +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.animation as animation +from utils import h36motion3d as datasets +from utils.loss_funcs import mpjpe_error +from utils.data_utils import define_actions + + + +def create_pose(ax,plots,vals,pred=True,update=False): + + + + # h36m 32 joints(full) + connect = [ + (1, 2), (2, 3), (3, 4), (4, 5), + (6, 7), (7, 8), (8, 9), (9, 10), + (0, 1), (0, 6), + (6, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22), + (1, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), + (24, 25), (24, 17), + (24, 14), (14, 15) + ] + LR = [ + False, True, True, True, True, + True, False, False, False, False, + False, True, True, True, True, + True, True, False, False, False, + False, False, False, False, True, + False, True, True, True, True, + True, True + ] + + +# Start and endpoints of our representation + I = np.array([touple[0] for touple in connect]) + J = np.array([touple[1] for touple in connect]) +# Left / right indicator + LR = np.array([LR[a] or LR[b] for a,b in connect]) + if pred: + lcolor = "#9b59b6" + rcolor = "#2ecc71" + else: + lcolor = "#8e8e8e" + rcolor = "#383838" + + for i in np.arange( len(I) ): + x = np.array( [vals[I[i], 0], vals[J[i], 0]] ) + z = np.array( [vals[I[i], 1], vals[J[i], 1]] ) + y = np.array( [vals[I[i], 2], vals[J[i], 2]] ) + if not update: + + if i ==0: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--' ,c=lcolor if LR[i] else rcolor,label=['GT' if not pred else 'Pred'])) + else: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--', c=lcolor if LR[i] else rcolor)) + + elif update: + plots[i][0].set_xdata(x) + plots[i][0].set_ydata(y) + plots[i][0].set_3d_properties(z) + plots[i][0].set_color(lcolor if LR[i] else rcolor) + + return plots + # ax.legend(loc='lower left') + + +# In[11]: + + +def update(num,data_gt,data_pred,plots_gt,plots_pred,fig,ax): + + gt_vals=data_gt[num] + pred_vals=data_pred[num] + plots_gt=create_pose(ax,plots_gt,gt_vals,pred=False,update=True) + plots_pred=create_pose(ax,plots_pred,pred_vals,pred=True,update=True) + + + + + + r = 0.75 + xroot, zroot, yroot = gt_vals[0,0], gt_vals[0,1], gt_vals[0,2] + ax.set_xlim3d([-r+xroot, r+xroot]) + ax.set_ylim3d([-r+yroot, r+yroot]) + ax.set_zlim3d([-r+zroot, r+zroot]) + #ax.set_title('pose at time frame: '+str(num)) + #ax.set_aspect('equal') + + return plots_gt,plots_pred + + + +# In[12]: + + +def visualize(input_n,output_n,visualize_from,path,modello,device,n_viz,skip_rate,actions): + + actions=define_actions(actions) + + for action in actions: + + if visualize_from=='train': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=0,actions=[action]) + elif visualize_from=='validation': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=1,actions=[action]) + elif visualize_from=='test': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=2,actions=[action]) + + dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) + # joints at same loc + joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) + index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + joint_equal = np.array([13, 19, 22, 13, 27, 30]) + index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) + + + loader = DataLoader( + loader, + batch_size=1, + shuffle = False, # for comparable visualizations with other models + num_workers=0) + + + + for cnt,batch in enumerate(loader): + batch = batch.to(device) + + all_joints_seq=batch.clone()[:, input_n:input_n+output_n,:] + + sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) + sequences_gt=batch[:, input_n:input_n+output_n, :] + + sequences_predict=modello(sequences_train).permute(0,1,3,2).contiguous().view(-1,output_n,len(dim_used)) + + all_joints_seq[:,:,dim_used] = sequences_predict + + all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] + + + all_joints_seq=all_joints_seq.view(-1,output_n,32,3) + + sequences_gt=sequences_gt.view(-1,output_n,32,3) + + loss=mpjpe_error(all_joints_seq,sequences_gt)# # both must have format (batch,T,V,C) + + + + data_pred=torch.squeeze(all_joints_seq,0).cpu().data.numpy()/1000 # in meters + data_gt=torch.squeeze(sequences_gt,0).cpu().data.numpy()/1000 + + + fig = plt.figure() + ax = Axes3D(fig) + ax.view_init(elev=20, azim=-40) + vals = np.zeros((32, 3)) # or joints_to_consider + gt_plots=[] + pred_plots=[] + + gt_plots=create_pose(ax,gt_plots,vals,pred=False,update=False) + pred_plots=create_pose(ax,pred_plots,vals,pred=True,update=False) + + ax.set_xlabel("x") + ax.set_ylabel("y") + ax.set_zlabel("z") + ax.legend(loc='lower left') + + + + ax.set_xlim3d([-1, 1.5]) + ax.set_xlabel('X') + + ax.set_ylim3d([-1, 1.5]) + ax.set_ylabel('Y') + + ax.set_zlim3d([0.0, 1.5]) + ax.set_zlabel('Z') + ax.set_title('loss in mm is: '+str(round(loss.item(),4))+' for action : '+str(action)+' for '+str(output_n)+' frames') + + line_anim = animation.FuncAnimation(fig, update, output_n, fargs=(data_gt,data_pred,gt_plots,pred_plots, + fig,ax),interval=70, blit=False) + plt.show() + + line_anim.save('human_viz.gif',writer='pillow') + + + if cnt==n_viz-1: + break + diff --git a/models/sts_gcn/utils/h36_ang_viz.py b/models/sts_gcn/utils/h36_ang_viz.py new file mode 100644 index 0000000..6b79f88 --- /dev/null +++ b/models/sts_gcn/utils/h36_ang_viz.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# coding: utf-8 + +import numpy as np +import torch +from torch.utils.data import DataLoader +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.animation as animation +from utils import h36motion as datasets +from utils.loss_funcs import euler_error +from utils.data_utils import expmap2xyz_torch,define_actions + + + + +def create_pose(ax,plots,vals,pred=True,update=False): + + + + # h36m 32 joints(full) + connect = [ + (1, 2), (2, 3), (3, 4), (4, 5), + (6, 7), (7, 8), (8, 9), (9, 10), + (0, 1), (0, 6), + (6, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22), + (1, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), + (24, 25), (24, 17), + (24, 14), (14, 15) + ] + LR = [ + False, True, True, True, True, + True, False, False, False, False, + False, True, True, True, True, + True, True, False, False, False, + False, False, False, False, True, + False, True, True, True, True, + True, True + ] + + +# Start and endpoints of our representation + I = np.array([touple[0] for touple in connect]) + J = np.array([touple[1] for touple in connect]) +# Left / right indicator + LR = np.array([LR[a] or LR[b] for a,b in connect]) + if pred: + lcolor = "#9b59b6" + rcolor = "#2ecc71" + else: + lcolor = "#8e8e8e" + rcolor = "#383838" + + for i in np.arange( len(I) ): + x = np.array( [vals[I[i], 0], vals[J[i], 0]] ) + z = np.array( [vals[I[i], 1], vals[J[i], 1]] ) + y = np.array( [vals[I[i], 2], vals[J[i], 2]] ) + if not update: + + if i ==0: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--' ,c=lcolor if LR[i] else rcolor,label=['GT' if not pred else 'Pred'])) + else: + plots.append(ax.plot(x, y, z, lw=2,linestyle='--', c=lcolor if LR[i] else rcolor)) + + elif update: + plots[i][0].set_xdata(x) + plots[i][0].set_ydata(y) + plots[i][0].set_3d_properties(z) + plots[i][0].set_color(lcolor if LR[i] else rcolor) + + return plots + # ax.legend(loc='lower left') + + +# In[11]: + + +def update(num,data_gt,data_pred,plots_gt,plots_pred,fig,ax): + + gt_vals=data_gt[num] + pred_vals=data_pred[num] + plots_gt=create_pose(ax,plots_gt,gt_vals,pred=False,update=True) + plots_pred=create_pose(ax,plots_pred,pred_vals,pred=True,update=True) + + + + + + r = 0.75 + xroot, zroot, yroot = gt_vals[0,0], gt_vals[0,1], gt_vals[0,2] # joint n 12 (back) as root + # print(xroot,yroot,zroot) + ax.set_xlim3d([-r+xroot, r+xroot]) + ax.set_ylim3d([-r+yroot, r+yroot]) + ax.set_zlim3d([-r+zroot, r+zroot]) + #ax.set_title('pose at time frame: '+str(num)) + #ax.set_aspect('equal') + + return plots_gt,plots_pred + + + +# In[12]: + + +def visualize(input_n,output_n,visualize_from,path,modello,device,n_viz,skip_rate,actions): + actions=define_actions(actions) + + for action in actions: + + if visualize_from=='train': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=0,actions=[action]) + elif visualize_from=='validation': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=1,actions=[action]) + elif visualize_from=='test': + loader=datasets.Datasets(path,input_n,output_n,skip_rate, split=2,actions=[action]) + + dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, + 86]) + + loader = DataLoader( + loader, + batch_size=1, + shuffle = True, + num_workers=0) + + + + for cnt,batch in enumerate(loader): + batch = batch.to(device) + + all_joints_seq=batch.clone()[:, input_n:input_n+output_n,:] + + sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)//3,3).permute(0,3,1,2) + sequences_gt=batch[:, input_n:input_n+output_n, :] + + sequences_predict=modello(sequences_train).permute(0,1,3,2).contiguous().view(-1,output_n,len(dim_used)) + + all_joints_seq[:,:,dim_used] = sequences_predict + + + loss=euler_error(all_joints_seq,sequences_gt)# # both must have format (batch,T,V,C) + + all_joints_seq=all_joints_seq.view(-1,99) + + sequences_gt=sequences_gt.view(-1,99) + + all_joints_seq=expmap2xyz_torch(all_joints_seq).view(-1,output_n,32,3) + + sequences_gt=expmap2xyz_torch(sequences_gt).view(-1,output_n,32,3) + + + + + + data_pred=torch.squeeze(all_joints_seq,0).cpu().data.numpy()/1000 # in meters + data_gt=torch.squeeze(sequences_gt,0).cpu().data.numpy()/1000 + + + fig = plt.figure() + ax = Axes3D(fig) + vals = np.zeros((32, 3)) # or joints_to_consider + gt_plots=[] + pred_plots=[] + + gt_plots=create_pose(ax,gt_plots,vals,pred=False,update=False) + pred_plots=create_pose(ax,pred_plots,vals,pred=True,update=False) + + ax.set_xlabel("x") + ax.set_ylabel("y") + ax.set_zlabel("z") + ax.legend(loc='lower left') + + + + ax.set_xlim3d([-1, 1.5]) + ax.set_xlabel('X') + + ax.set_ylim3d([-1, 1.5]) + ax.set_ylabel('Y') + + ax.set_zlim3d([0.0, 1.5]) + ax.set_zlabel('Z') + ax.set_title('loss in euler angle is: '+str(round(loss.item(),4))+' for action : '+action+' for '+str(output_n)+' frames') + + line_anim = animation.FuncAnimation(fig, update, output_n, fargs=(data_gt,data_pred,gt_plots,pred_plots, + fig,ax),interval=70, blit=False) + plt.show() + + + if cnt==n_viz-1: + break + diff --git a/models/sts_gcn/utils/h36motion.py b/models/sts_gcn/utils/h36motion.py new file mode 100644 index 0000000..43a69dc --- /dev/null +++ b/models/sts_gcn/utils/h36motion.py @@ -0,0 +1,130 @@ +from torch.utils.data import Dataset +import numpy as np +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch +import os + +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion.py +''' + + +class Datasets(Dataset): + + def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.path_to_data = os.path.join(data_dir,'h3.6m\dataset') + self.split = split + self.in_n = input_n + self.out_n = output_n + self.sample_rate = 2 + self.seq = {} + self.data_idx = [] + + self.dimensions_to_use = np.array( + [6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]) + self.dimensions_to_ignore = np.array( + [[0, 1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 19, 20, 25, 26, 31, 32, 33, 34, 35, 48, 49, 50, 58, + 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98]]) + + seq_len = self.in_n + self.out_n + subs = np.array([[1, 6, 7, 8, 9], [11], [5]]) + # acts = data_utils.define_actions(actions) + if actions is None: + acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + else: + acts = actions + # subs = np.array([[1], [11], [5]]) + # acts = ['walking'] + + subs = subs[split] + + for subj in subs: + for action_idx in np.arange(len(acts)): + action = acts[action_idx] + if self.split <= 1: + for subact in [1, 2]: # subactions + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape + even_list = range(0, n, self.sample_rate) + num_frames = len(even_list) + the_sequence = np.array(the_sequence[even_list, :]) + # the_sequence = torch.from_numpy(the_sequence).float().cuda() + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + # p3d = data_utils.expmap2xyz_torch(the_sequence) + self.seq[(subj, action, subact)] = the_sequence + + valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) + + tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) + the_sequence1 = data_utils.readCSVasFloat(filename) + n, d = the_sequence1.shape + even_list = range(0, n, self.sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(the_sequence1[even_list, :]) + # the_seq1 = torch.from_numpy(the_sequence1).float().cuda() + the_sequence1[:, 0:6] = 0 + # p3d1 = data_utils.expmap2xyz_torch(the_seq1) + self.seq[(subj, action, 1)] = the_sequence1 + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) + the_sequence2 = data_utils.readCSVasFloat(filename) + n, d = the_sequence2.shape + even_list = range(0, n, self.sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(the_sequence2[even_list, :]) + # the_seq2 = torch.from_numpy(the_sequence2).float().cuda() + the_sequence2[:, 0:6] = 0 + # p3d2 = data_utils.expmap2xyz_torch(the_seq2) + self.seq[(subj, action, 2)] = the_sequence2 + + # fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + # input_n=self.in_n) + fs_sel1, fs_sel2 = data_utils.find_indices_srnn(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [(subj, action, 1)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [(subj, action, 2)] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.seq[key][fs] diff --git a/models/sts_gcn/utils/h36motion3d.py b/models/sts_gcn/utils/h36motion3d.py new file mode 100644 index 0000000..c9cf2b6 --- /dev/null +++ b/models/sts_gcn/utils/h36motion3d.py @@ -0,0 +1,146 @@ +from torch.utils.data import Dataset +import numpy as np +from h5py import File +import scipy.io as sio +from utils import data_utils +from matplotlib import pyplot as plt +import torch + +import os + +''' +adapted from +https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion3d.py +''' + + +class Datasets(Dataset): + + def __init__(self, data_dir,input_n,output_n,skip_rate, actions=None, split=0, subs=None): + """ + :param path_to_data: + :param actions: + :param input_n: + :param output_n: + :param dct_used: + :param split: 0 train, 1 testing, 2 validation + :param sample_rate: + """ + self.path_to_data = os.path.join(data_dir,'h3.6m/dataset') + self.split = split + self.in_n = input_n + self.out_n = output_n + self.sample_rate = 2 + self.p3d = {} + self.data_idx = [] + seq_len = self.in_n + self.out_n + if subs is None: + subs = np.array([[1, 6, 7, 8, 9], [11], [5]]) + else: + subs = np.array(subs) + # acts = data_utils.define_actions(actions) + if actions is None: + acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + else: + acts = actions + # subs = np.array([[1], [11], [5]]) + # acts = ['walking'] + # 32 human3.6 joint name: + joint_name = ["Hips", "RightUpLeg", "RightLeg", "RightFoot", "RightToeBase", "Site", "LeftUpLeg", "LeftLeg", + "LeftFoot", + "LeftToeBase", "Site", "Spine", "Spine1", "Neck", "Head", "Site", "LeftShoulder", "LeftArm", + "LeftForeArm", + "LeftHand", "LeftHandThumb", "Site", "L_Wrist_End", "Site", "RightShoulder", "RightArm", + "RightForeArm", + "RightHand", "RightHandThumb", "Site", "R_Wrist_End", "Site"] + + subs = subs[split] + key = 0 + for subj in subs: + for action_idx in np.arange(len(acts)): + action = acts[action_idx] + if self.split <= 1: + for subact in [1, 2]: # subactions + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) + the_sequence = data_utils.readCSVasFloat(filename) + n, d = the_sequence.shape + even_list = range(0, n, self.sample_rate) + num_frames = len(even_list) + the_sequence = np.array(the_sequence[even_list, :]) + the_sequence = torch.from_numpy(the_sequence).float().cuda() + # remove global rotation and translation + the_sequence[:, 0:6] = 0 + p3d = data_utils.expmap2xyz_torch(the_sequence) + # self.p3d[(subj, action, subact)] = p3d.view(num_frames, -1).cpu().data.numpy() + self.p3d[key] = p3d.view(num_frames, -1).cpu().data.numpy() + + valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) + + # tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + key += 1 + else: + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) + the_sequence1 = data_utils.readCSVasFloat(filename) + n, d = the_sequence1.shape + even_list = range(0, n, self.sample_rate) + + num_frames1 = len(even_list) + the_sequence1 = np.array(the_sequence1[even_list, :]) + the_seq1 = torch.from_numpy(the_sequence1).float().cuda() + the_seq1[:, 0:6] = 0 + p3d1 = data_utils.expmap2xyz_torch(the_seq1) + # self.p3d[(subj, action, 1)] = p3d1.view(num_frames1, -1).cpu().data.numpy() + self.p3d[key] = p3d1.view(num_frames1, -1).cpu().data.numpy() + + print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) + filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) + the_sequence2 = data_utils.readCSVasFloat(filename) + n, d = the_sequence2.shape + even_list = range(0, n, self.sample_rate) + + num_frames2 = len(even_list) + the_sequence2 = np.array(the_sequence2[even_list, :]) + the_seq2 = torch.from_numpy(the_sequence2).float().cuda() + the_seq2[:, 0:6] = 0 + p3d2 = data_utils.expmap2xyz_torch(the_seq2) + + # self.p3d[(subj, action, 2)] = p3d2.view(num_frames2, -1).cpu().data.numpy() + self.p3d[key + 1] = p3d2.view(num_frames2, -1).cpu().data.numpy() + + # print("action:{}".format(action)) + # print("subact1:{}".format(num_frames1)) + # print("subact2:{}".format(num_frames2)) + fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, + input_n=self.in_n) + + valid_frames = fs_sel1[:, 0] + tmp_data_idx_1 = [key] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + + valid_frames = fs_sel2[:, 0] + tmp_data_idx_1 = [key + 1] * len(valid_frames) + tmp_data_idx_2 = list(valid_frames) + self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) + key += 2 + + # ignore constant joints and joints at same position with other joints + joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) + dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) + self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) + + def __len__(self): + return np.shape(self.data_idx)[0] + + def __getitem__(self, item): + key, start_frame = self.data_idx[item] + fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) + return self.p3d[key][fs] diff --git a/models/sts_gcn/utils/loss_funcs.py b/models/sts_gcn/utils/loss_funcs.py new file mode 100644 index 0000000..48eba4b --- /dev/null +++ b/models/sts_gcn/utils/loss_funcs.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# coding: utf-8 + +import torch +from utils import data_utils + + + +def mpjpe_error(batch_pred,batch_gt): + + + + + + batch_pred=batch_pred.contiguous().view(-1,3) + batch_gt=batch_gt.contiguous().view(-1,3) + + return torch.mean(torch.norm(batch_gt-batch_pred,2,1)) + + +def euler_error(ang_pred, ang_gt): + + # only for 32 joints + + dim_full_len=ang_gt.shape[2] + + # pred_expmap[:, 0:6] = 0 + # targ_expmap[:, 0:6] = 0 + pred_expmap = ang_pred.contiguous().view(-1,dim_full_len).view(-1, 3) + targ_expmap = ang_gt.contiguous().view(-1,dim_full_len).view(-1, 3) + + pred_eul = data_utils.rotmat2euler_torch(data_utils.expmap2rotmat_torch(pred_expmap)) + pred_eul = pred_eul.view(-1, dim_full_len) + + targ_eul = data_utils.rotmat2euler_torch(data_utils.expmap2rotmat_torch(targ_expmap)) + targ_eul = targ_eul.view(-1, dim_full_len) + mean_errors = torch.mean(torch.norm(pred_eul - targ_eul, 2, 1)) + + return mean_errors + + + + diff --git a/models/sts_gcn/utils/parser.py b/models/sts_gcn/utils/parser.py new file mode 100644 index 0000000..7a7019e --- /dev/null +++ b/models/sts_gcn/utils/parser.py @@ -0,0 +1,64 @@ +import argparse + + +parser = argparse.ArgumentParser(description='Arguments for running the scripts') + +#ARGS FOR LOADING THE DATASET + + +parser.add_argument('--data_dir',type=str,default='../datasets/',help='path to the unziped dataset directories(H36m/AMASS/3DPW)') +parser.add_argument('--input_n',type=int,default=10,help="number of model's input frames") +parser.add_argument('--output_n',type=int,default=10,help="number of model's output frames") +parser.add_argument('--skip_rate',type=int,default=1,choices=[1,5],help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW') +parser.add_argument('--joints_to_consider',type=int,default=22,choices=[16,18,22],help='number of joints to use, defaults=16 for H36M angles, 22 for H36M 3D or 18 for AMASS/3DPW') + + +#ARGS FOR THE MODEL + +parser.add_argument('--n_stgcnn_layers',type=int,default=9,help= 'number of stgcnn layers') +parser.add_argument('--n_ccnn_layers',type=int,default=2,help= 'number of layers for the Coordinate-Channel Convolution') +parser.add_argument('--n_tcnn_layers',type=int,default=4,help= 'number of layers for the Time-Extrapolator Convolution') +parser.add_argument('--ccnn_kernel_size',type=list,default=[1,1],help= ' kernel for the C-CNN layers') +parser.add_argument('--tcnn_kernel_size',type=list,default=[3,3],help= ' kernel for the Time-Extrapolator CNN layers') +parser.add_argument('--embedding_dim',type=int,default=40,help= 'dimensions for the coordinates of the embedding') +parser.add_argument('--input_dim',type=int,default=3,help= 'dimensions of the input coordinates') +parser.add_argument('--st_gcnn_dropout',type=float,default=.1,help= 'st-gcnn dropout') +parser.add_argument('--ccnn_dropout',type=float,default=0.0,help= 'ccnn dropout') +parser.add_argument('--tcnn_dropout',type=float,default=0.0,help= 'tcnn dropout') + + + + + + +#ARGS FOR THE TRAINING + + + + +parser.add_argument('--mode',type=str,default='train',choices=['train','test','viz'],help= 'Choose to train,test or visualize from the model.Either train,test or viz') +parser.add_argument('--n_epochs',type=int,default=50,help= 'number of epochs to train') +parser.add_argument('--batch_size',type=int,default=256,help= 'batch size') +parser.add_argument('--batch_size_test',type=int,default=256,help= 'batch size for the test set') +parser.add_argument('--lr',type=int,default=1e-02,help= 'Learning rate of the optimizer') +parser.add_argument('--use_scheduler',type=bool,default=True,help= 'use MultiStepLR scheduler') +parser.add_argument('--milestones',type=list,default=[15,25,35,40],help= 'the epochs after which the learning rate is adjusted by gamma') +parser.add_argument('--gamma',type=float,default=0.1,help= 'gamma correction to the learning rate, after reaching the milestone epochs') +parser.add_argument('--clip_grad',type=float,default=None,help= 'select max norm to clip gradients') +parser.add_argument('--model_path',type=str,default='./checkpoints/CKPT_3D_H36M',help= 'directory with the models checkpoints ') + + +#FLAGS FOR THE VISUALIZATION + +parser.add_argument('--visualize_from',type=str,default='test',choices =['train','val','test'],help= 'choose data split to visualize from(train-val-test)') +parser.add_argument('--actions_to_consider',default='all',help= 'Actions to visualize.Choose either all or a list of actions') +parser.add_argument('--n_viz',type=int,default='2',help= 'Numbers of sequences to visaluze for each action') + + + + +args = parser.parse_args() + + + + diff --git a/models/zero_vel.py b/models/zero_vel.py new file mode 100644 index 0000000..43a8989 --- /dev/null +++ b/models/zero_vel.py @@ -0,0 +1,17 @@ +import torch + + +class ZeroVel(torch.nn.Module): + def __init__(self, args): + super(ZeroVel, self).__init__() + self.args = args + self.input_size = self.output_size = int(args.keypoints_num * args.keypoint_dim) + + def forward(self, inputs): + obs_pose = inputs['observed_pose'] + last_frame = obs_pose[..., -1, :].unsqueeze(-2) + ndims = len(obs_pose.shape) + pred_pose = last_frame.repeat([1 for _ in range(ndims - 2)] + [self.args.pred_frames_num, 1]) + outputs = {'pred_pose': pred_pose, 'pred_vel': torch.zeros_like(pred_pose)} + + return outputs diff --git a/optimizers/__init__.py b/optimizers/__init__.py new file mode 100644 index 0000000..afed9c5 --- /dev/null +++ b/optimizers/__init__.py @@ -0,0 +1,9 @@ +from .adam import adam +from .sgd import sgd +from .adamw import adamw +from .sam import sam + +OPTIMIZERS = {'adam': adam, + 'sgd': sgd, + 'adamw': adamw, + 'sam': sam} diff --git a/optimizers/adam.py b/optimizers/adam.py new file mode 100644 index 0000000..b4a4f97 --- /dev/null +++ b/optimizers/adam.py @@ -0,0 +1,5 @@ +import torch.optim as optim + + +def adam(params, args): + return optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) diff --git a/optimizers/adamw.py b/optimizers/adamw.py new file mode 100644 index 0000000..4ad3ca0 --- /dev/null +++ b/optimizers/adamw.py @@ -0,0 +1,5 @@ +import torch.optim as optim + + +def adamw(params, args): + return optim.AdamW(params, lr=args.lr, betas=tuple(args.betas), weight_decay=args.weight_decay) diff --git a/optimizers/sam.py b/optimizers/sam.py new file mode 100644 index 0000000..b909920 --- /dev/null +++ b/optimizers/sam.py @@ -0,0 +1,77 @@ +import torch +import torch.optim as optim + +class SAM(optim.Optimizer): + def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs): + assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}" + + defaults = dict(rho=rho, adaptive=adaptive, **kwargs) + super(SAM, self).__init__(params, defaults) + + self.base_optimizer = base_optimizer(self.param_groups, **kwargs) + self.param_groups = self.base_optimizer.param_groups + + @torch.no_grad() + def first_step(self, zero_grad=False): + grad_norm = self._grad_norm() + for group in self.param_groups: + scale = group["rho"] / (grad_norm + 1e-12) + + for p in group["params"]: + if p.grad is None: continue + self.state[p]["old_p"] = p.data.clone() + e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p) + p.add_(e_w) # climb to the local maximum "w + e(w)" + + if zero_grad: self.zero_grad() + + @torch.no_grad() + def second_step(self, zero_grad=False): + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: continue + p.data = self.state[p]["old_p"] # get back to "w" from "w + e(w)" + + self.base_optimizer.step() # do the actual "sharpness-aware" update + + if zero_grad: self.zero_grad() + + @torch.no_grad() + def step(self, closure=None): + assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided" + closure = torch.enable_grad()(closure) # the closure should do a full forward-backward pass + + self.first_step(zero_grad=True) + closure() + self.second_step() + + def _grad_norm(self): + shared_device = self.param_groups[0]["params"][0].device # put everything on the same device, in case of model parallelism + norm = torch.norm( + torch.stack([ + ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device) + for group in self.param_groups for p in group["params"] + if p.grad is not None + ]), + p=2 + ) + return norm + + def load_state_dict(self, state_dict): + super().load_state_dict(state_dict) + self.base_optimizer.param_groups = self.param_groups + + +def sam(params, args): + if args.base_optimizer.type == 'adam': + base_optimizer = optim.Adam + optimizer = SAM( + params, + base_optimizer, + lr=args.base_optimizer.lr, + weight_decay=args.base_optimizer.weight_decay + ) + return optimizer + else: + raise Exception('Defined base optimizer is not supported') + diff --git a/optimizers/sgd.py b/optimizers/sgd.py new file mode 100644 index 0000000..b743c09 --- /dev/null +++ b/optimizers/sgd.py @@ -0,0 +1,5 @@ +import torch.optim as optim + + +def sgd(params, args): + return optim.SGD(params, lr=args.lr, momentum=args.momentum, dampening=args.dampening, nesterov=args.nesterov, weight_decay=args.weight_decay) diff --git a/path_definition.py b/path_definition.py new file mode 100644 index 0000000..13101d1 --- /dev/null +++ b/path_definition.py @@ -0,0 +1,5 @@ +import os + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) +PREPROCESSED_DATA_DIR = os.path.join(ROOT_DIR, 'preprocessed_data') +HYDRA_PATH = os.path.join(ROOT_DIR, 'configs/hydra') diff --git a/preprocessor/__init__.py b/preprocessor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/preprocessor/amass_preprocessor.py b/preprocessor/amass_preprocessor.py new file mode 100644 index 0000000..3ed9f56 --- /dev/null +++ b/preprocessor/amass_preprocessor.py @@ -0,0 +1,97 @@ +import logging +import os + +import jsonlines +import numpy as np +from utils.others import AMASSconvertTo3D + +from path_definition import PREPROCESSED_DATA_DIR + +logger = logging.getLogger(__name__) + +amass_splits = { + 'train': ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], + 'validation': ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], + 'test': ['BioMotionLab_NTroje'], +} + +class AmassPreprocessor: + def __init__(self, dataset_path, + custom_name): + + self.dataset_path = dataset_path + self.custom_name = custom_name + self.output_dir = os.path.join(PREPROCESSED_DATA_DIR, 'AMASS_total') + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + def normal(self, data_type='train'): + logger.info('start creating AMASS normal static data ... ') + const_joints = np.arange(4 * 3) + var_joints = np.arange(4 * 3, 22 * 3) + + if self.custom_name: + output_file_name = f'{data_type}_xyz_{self.custom_name}.jsonl' + else: + output_file_name = f'{data_type}_xyz_AMASS.jsonl' + + assert os.path.exists(os.path.join( + self.output_dir, + output_file_name + )) is False, f"preprocessed file exists at {os.path.join(self.output_dir, output_file_name)}" + + assert data_type in amass_splits, "data type must be one of train, validation or test" + + dataset_names = amass_splits[data_type] + for dataset_name in dataset_names: + raw_dataset_name = dataset_name + logger.info(f'dataset name: {dataset_name}') + for sub in os.listdir(os.path.join(self.dataset_path, dataset_name)): + raw_sub = sub + logger.info(f'subject name: {sub}') + sub = os.path.join(self.dataset_path, dataset_name, sub) + if not os.path.isdir(sub): + continue + for act in os.listdir(sub): + if not act.endswith('.npz'): + continue + raw_act = act[:-4] + pose_all = np.load(os.path.join(sub, act)) + try: + pose_data = pose_all['poses'] + except: + print('no poses at {} {}'.format(sub, act)) + continue + + pose_data = AMASSconvertTo3D(pose_data) # shape = [num frames , 66] + pose_data = pose_data * 1000 # convert from m to mm + + total_frame_num = pose_data.shape[0] + + data = [] + + video_data = { + 'obs_pose': list(), + 'obs_const_pose': list(), + 'fps': int(pose_all['mocap_framerate'].item()) + } + + for j in range(total_frame_num): + video_data['obs_pose'].append(pose_data[j][var_joints].tolist()) + video_data['obs_const_pose'].append(pose_data[j][const_joints].tolist()) + + data.append([ + '%s-%d' % ("{}-{}-{}".format(raw_dataset_name, raw_sub, raw_act), 0), + video_data['obs_pose'], video_data['obs_const_pose'], + video_data['fps'] + ]) + + with jsonlines.open(os.path.join(self.output_dir, output_file_name), 'a') as writer: + for data_row in data: + writer.write({ + 'video_section': data_row[0], + 'xyz_pose': data_row[1], + 'xyz_const_pose': data_row[2], + 'fps': data_row[3] + }) diff --git a/preprocessor/dpw_preprocessor.py b/preprocessor/dpw_preprocessor.py new file mode 100644 index 0000000..5891eb4 --- /dev/null +++ b/preprocessor/dpw_preprocessor.py @@ -0,0 +1,94 @@ +import logging +import os +import re +from collections import defaultdict + +import jsonlines +import numpy as np +import pandas as pd +from utils.others import DPWconvertTo3D + +from path_definition import PREPROCESSED_DATA_DIR + +logger = logging.getLogger(__name__) + + +class Preprocessor3DPW: + def __init__(self, dataset_path, + custom_name, load_60Hz=False): + + self.dataset_path = dataset_path + self.custom_name = custom_name + self.output_dir = os.path.join(PREPROCESSED_DATA_DIR, '3DPW_total') + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + self.load_60Hz = load_60Hz + + def normal(self, data_type='train'): + logger.info('start creating 3DPW normal static data ... ') + + const_joints = np.arange(4 * 3) + var_joints = np.arange(4 * 3, 22 * 3) + + if self.custom_name: + output_file_name = f'{data_type}_xyz_{self.custom_name}.jsonl' + else: + output_file_name = f'{data_type}_xyz_3dpw.jsonl' + + assert os.path.exists(os.path.join( + self.output_dir, + output_file_name + )) is False, f"preprocessed file exists at {os.path.join(self.output_dir, output_file_name)}" + + + self.dataset_path = os.path.join(self.dataset_path, data_type) + + for entry in os.scandir(self.dataset_path): + if not entry.name.endswith('.pkl'): + continue + logger.info(f'file name: {entry.name}') + pickle_obj = pd.read_pickle(entry.path) + video_name = re.search('(\w+).pkl', entry.name).group(1) + if self.load_60Hz: + pose_data = np.array(pickle_obj['poses_60Hz']) + else: + pose_data = np.array(pickle_obj['jointPositions']) + + pose_data = DPWconvertTo3D(pose_data) * 1000 + + total_frame_num = pose_data.shape[1] + + data = [] + video_data = { + 'obs_pose': defaultdict(list) + } + for j in range(total_frame_num): + for p_id in range(pose_data.shape[0]): + video_data['obs_pose'][p_id].append( + pose_data[p_id, j, :].tolist() + ) + if len(list(video_data['obs_pose'].values())) > 0: + for p_id in range(len(pose_data)): + data.append([ + '%s-%d' % (video_name, 0), + video_data['obs_pose'][p_id] + ] if not self.load_60Hz else [ + '%s-%d' % (video_name, 0), + np.array(video_data['obs_pose'][p_id])[:, var_joints].tolist(), + np.array(video_data['obs_pose'][p_id])[:, const_joints].tolist() + ]) + with jsonlines.open(os.path.join(self.output_dir, output_file_name), 'a') as writer: + for data_row in data: + if not self.load_60Hz: + writer.write({ + 'video_section': data_row[0], + 'xyz_pose': data_row[1], + }) + else: + writer.write({ + 'video_section': data_row[0], + 'xyz_pose': data_row[1], + 'xyz_const_pose': data_row[2], + }) diff --git a/preprocessor/human36m_preprocessor.py b/preprocessor/human36m_preprocessor.py new file mode 100644 index 0000000..6a38158 --- /dev/null +++ b/preprocessor/human36m_preprocessor.py @@ -0,0 +1,265 @@ +import csv +import logging +import os +import zipfile +from glob import glob +from urllib.request import urlretrieve + +import cdflib +import jsonlines +import numpy as np +import torch +from torch.autograd.variable import Variable +import copy + +from path_definition import PREPROCESSED_DATA_DIR +from utils.others import expmap_to_quaternion, qfix, expmap_to_rotmat, expmap_to_euler + +logger = logging.getLogger(__name__) + +SPLIT = { + 'train': ['S1', 'S6', 'S7', 'S8', 'S9'], + 'validation': ['S11'], + 'test': ['S5'] +} + + +class Human36mPreprocessor: + def __init__(self, dataset_path, + custom_name): + self.dataset_path = dataset_path + self.custom_name = custom_name + self.output_dir = os.path.join( + PREPROCESSED_DATA_DIR, 'human36m' + ) + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + self.subjects = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11'] + + self.acts = ["walking", "eating", "smoking", "discussion", "directions", + "greeting", "phoning", "posing", "purchases", "sitting", + "sittingdown", "takingphoto", "waiting", "walkingdog", + "walkingtogether"] + + def normal(self, data_type='train'): + self.subjects = SPLIT[data_type] + logger.info( + 'start creating Human3.6m preprocessed data from Human3.6m dataset ... ') + + if self.custom_name: + output_file_name = \ + f'{data_type}_{self.custom_name}.jsonl' + else: + output_file_name = \ + f'{data_type}_human3.6m.jsonl' + + assert os.path.exists(os.path.join( + self.output_dir, + output_file_name + )) is False, f"preprocessed file exists at {os.path.join(self.output_dir, output_file_name)}" + + for subject in self.subjects: + logger.info("handling subject: {}".format(subject)) + for super_action in self.acts: + for sub_action in ['_1', '_2']: + action = super_action + sub_action + expmap = self.expmap_rep(action, subject, data_type) + positions = self.expmap2xyz_torch(torch.from_numpy(copy.deepcopy(expmap)).float()) + rotmat = self.rotmat_rep(action, subject, data_type) + euler = self.euler_rep(action, subject, data_type) + quat = self.quaternion_rep(action, subject, data_type) + + expmap = expmap.reshape(expmap.shape[0], -1) + positions = positions.reshape(positions.shape[0], -1) + rotmat = rotmat.reshape(rotmat.shape[0], -1) + euler = euler.reshape(euler.shape[0], -1) + quat = quat.reshape(quat.shape[0], -1) + + video_data = { + 'xyz_pose': positions.tolist()[:], + 'quaternion_pose': quat.tolist()[:], + 'expmap_pose': expmap.tolist()[:], + 'rotmat_pose': rotmat.tolist()[:], + 'euler_pose': euler.tolist()[:], + 'action': super_action + } + + with jsonlines.open(os.path.join(self.output_dir, output_file_name), mode='a') as writer: + writer.write({ + 'video_section': f'{subject}-{action}', + 'action': f'{super_action}', + 'xyz_pose': video_data['xyz_pose'], + 'quaternion_pose': video_data['quaternion_pose'], + 'expmap_pose': video_data['expmap_pose'], + 'rotmat_pose': video_data['rotmat_pose'], + 'euler_pose': video_data['euler_pose'] + }) + + def expmap_rep(self, action, subject, data_type): + data = self.__read_file(action, self.dataset_path, subject, data_type) + return data + + def rotmat_rep(self, action, subject, data_type): + data = self.expmap_rep(action, subject, data_type) + data = data.reshape(data.shape[0], -1, 3)[:, 1:] + data = expmap_to_rotmat(data) + return data + + def euler_rep(self, action, subject, data_type): + data = self.expmap_rep(action, subject, data_type) + data = data.reshape(data.shape[0], -1, 3)[:, 1:] + data = expmap_to_euler(data) + return data + + def quaternion_rep(self, action, subject, data_type): + data = self.expmap_rep(action, subject, data_type) + data = data.reshape(data.shape[0], -1, 3)[:, 1:] + quat = expmap_to_quaternion(-data) + quat = qfix(quat) + quat = quat.reshape(-1, 32 * 4) + return quat.reshape(-1, 32 * 4) + + def expmap2xyz_torch(self, expmap): + """ + convert expmaps to joint locations + :param expmap: N*99 + :return: N*32*3 + """ + expmap[:, 0:6] = 0 + parent, offset, rotInd, expmapInd = self._some_variables() + xyz = self.fkl_torch(expmap, parent, offset, rotInd, expmapInd) + return xyz + + def _some_variables(self): + """ + borrowed from + https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 + We define some variables that are useful to run the kinematic tree + Args + None + Returns + parent: 32-long vector with parent-child relationships in the kinematic tree + offset: 96-long vector with bone lenghts + rotInd: 32-long list with indices into angles + expmapInd: 32-long list with indices into expmap angles + """ + + parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, + 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 + + offset = np.array( + [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, + -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, + 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, + 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, + 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, + 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, + 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, + 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, + 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) + offset = offset.reshape(-1, 3) + + rotInd = [[5, 6, 4], + [8, 9, 7], + [11, 12, 10], + [14, 15, 13], + [17, 18, 16], + [], + [20, 21, 19], + [23, 24, 22], + [26, 27, 25], + [29, 30, 28], + [], + [32, 33, 31], + [35, 36, 34], + [38, 39, 37], + [41, 42, 40], + [], + [44, 45, 43], + [47, 48, 46], + [50, 51, 49], + [53, 54, 52], + [56, 57, 55], + [], + [59, 60, 58], + [], + [62, 63, 61], + [65, 66, 64], + [68, 69, 67], + [71, 72, 70], + [74, 75, 73], + [], + [77, 78, 76], + []] + + expmapInd = np.split(np.arange(4, 100) - 1, 32) + + return parent, offset, rotInd, expmapInd + + def fkl_torch(self, angles, parent, offset, rotInd, expmapInd): + """ + pytorch version of fkl. + convert joint angles to joint locations + batch pytorch version of the fkl() method above + :param angles: N*99 + :param parent: + :param offset: + :param rotInd: + :param expmapInd: + :return: N*joint_n*3 + """ + n_a = angles.data.shape[0] + j_n = offset.shape[0] + p3d = Variable(torch.from_numpy(offset)).float().unsqueeze(0).repeat(n_a, 1, 1) + angles = angles[:, 3:].contiguous().view(-1, 3) + + theta = torch.norm(angles, 2, 1) + r0 = torch.div(angles, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) + r1 = torch.zeros_like(r0).repeat(1, 3) + r1[:, 1] = -r0[:, 2] + r1[:, 2] = r0[:, 1] + r1[:, 5] = -r0[:, 0] + r1 = r1.view(-1, 3, 3) + r1 = r1 - r1.transpose(1, 2) + n = r1.data.shape[0] + R = (torch.eye(3, 3).repeat(n, 1, 1).float() + torch.mul( + torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( + (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1))).view(n_a, j_n, 3, 3) + + for i in np.arange(1, j_n): + if parent[i] > 0: + R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() + p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] + return p3d + + @staticmethod + def __read_file(action, rot_dir_path, subject, data_type): + ''' + Read an individual file in expmap format, + and return a NumPy tensor with shape (sequence length, number of joints, 3). + ''' + action_number = action[-1] + action = action[:-2] + action = action.replace('WalkTogether', 'WalkingTogether').replace( + 'WalkDog', 'WalkingDog') + if action.lower().__contains__('photo'): + action = 'TakingPhoto' + + path_to_read = os.path.join(rot_dir_path, 'dataset', subject, + f'{action.split(" ")[0].lower()}_{action_number}.txt') + + data = [] + with open(path_to_read, 'r') as csvfile: + reader = csv.reader(csvfile, delimiter=',') + for row in reader: + data.append(row) + data = np.array(data, dtype='float64') + + return data + + @staticmethod + def delete_redundant_files(): + output_directory = os.path.join(PREPROCESSED_DATA_DIR, 'H3.6m_rotations') + h36_folder = os.path.join(output_directory, 'h3.6m') + os.remove(h36_folder + ".zip") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b695ca7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,22 @@ +torch~=1.7 +numpy~=1.19 +scipy~=1.6 +scikit-learn~=1.0.2 +matplotlib +pillow +jupyter +pandas~=1.2 +opencv-python~=4.4.0.46 +pygifsicle~=1.0.4 +imageio~=2.9 +openpifpaf~=0.12.13 +bs4 +lxml~=4.6 +jsonlines +tensorboard +hydra-core +cdflib +astropy +seaborn +mlflow +tqdm \ No newline at end of file diff --git a/schedulers/__init__.py b/schedulers/__init__.py new file mode 100644 index 0000000..c9fa1d4 --- /dev/null +++ b/schedulers/__init__.py @@ -0,0 +1,8 @@ +from .reduce_lr_on_plateau import Reduce_LR_On_Plateau +from .step_lr import Step_LR +from .multi_step_lr import MultiStepLR + +SCHEDULERS = {'reduce_lr_on_plateau': Reduce_LR_On_Plateau, + 'step_lr': Step_LR, + 'multi_step_lr': MultiStepLR + } diff --git a/schedulers/multi_step_lr.py b/schedulers/multi_step_lr.py new file mode 100644 index 0000000..20f8fbf --- /dev/null +++ b/schedulers/multi_step_lr.py @@ -0,0 +1,9 @@ +import torch.optim as optim + + +class MultiStepLR: + def __init__(self, optimizer, args): + self.scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) + + def step(self, in_=None): + self.scheduler.step() diff --git a/schedulers/reduce_lr_on_plateau.py b/schedulers/reduce_lr_on_plateau.py new file mode 100644 index 0000000..ea9440f --- /dev/null +++ b/schedulers/reduce_lr_on_plateau.py @@ -0,0 +1,11 @@ +import torch.optim as optim + + +class Reduce_LR_On_Plateau: + def __init__(self, optimizer, args): + self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=args.mode, factor=args.factor, + patience=args.patience, threshold=args.threshold, + verbose=args.verbose) + + def step(self, in_): + self.scheduler.step(in_) diff --git a/schedulers/step_lr.py b/schedulers/step_lr.py new file mode 100644 index 0000000..6c4109b --- /dev/null +++ b/schedulers/step_lr.py @@ -0,0 +1,10 @@ +import torch.optim as optim + + +class Step_LR: + def __init__(self, optimizer, args): + self.scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma, + last_epoch=args.last_epoch, verbose=args.verbose) + + def step(self, in_=None): + self.scheduler.step() diff --git a/training_commands.md b/training_commands.md new file mode 100644 index 0000000..bfd9879 --- /dev/null +++ b/training_commands.md @@ -0,0 +1,150 @@ +Using the commands below you can train different models on different datasets. + +**NOTE**: AMASS and 3DPW settings are simillar to each other. + +## ST_Trans +### Human3.6M +```bash +python -m api.train model=st_trans \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=50 \ + pred_frames_num=25 \ + model.loss.nT=25 \ + model.pre_post_process=human3.6m \ + model.n_major_joints=22 \ + model.loss.nJ=32\ + epochs=15 +``` +### AMASS +```bash +python -m api.train model=st_trans \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.loss.nT=25 \ + model.pre_post_process=AMASS \ + model.n_major_joints=18 \ + model.loss.nJ=18 +``` +### 3DPW +```bash +python -m api.train model=st_trans \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.loss.nT=25 \ + model.pre_post_process=3DPW \ + model.n_major_joints=18 \ + model.loss.nJ=18 +``` +## PGBIG +### Human3.6M +```bash +python -m api.train model=pgbig \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=human3.6m \ + model.in_features=66 \ + model.loss.nJ=22 \ + model.loss.pre_post_process=human3.6m \ + epochs=50 +``` +### AMASS +```bash +python -m api.train model=pgbig \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=AMASS \ + model.in_features=54 \ + model.loss.nJ=18 \ + model.loss.pre_post_process=AMASS \ + epochs=50 +``` +### 3DPW +```bash +python -m api.train model=pgbig \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=3DPW \ + model.in_features=54 \ + model.loss.nJ=18 \ + model.loss.pre_post_process=3DPW \ + epochs = 50 +``` +## History-Repeats-Itself +### Human3.6M +```bash +python -m api.train model=history_repeats_itself \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + model.modality=Human36 \ + model.in_features=66 \ + obs_frames_num=50 \ + pred_frames_num=25 +``` +### AMASS +```bash +python -m api.train model=history_repeats_itself \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + model.modality=AMASS \ + model.in_features=66 \ + obs_frames_num=50 \ + pred_frames_num=25 +``` + +### 3DPW +```bash +python -m api.train model=history_repeats_itself \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + model.modality=3DPW \ + model.in_features=66 \ + obs_frames_num=50 \ + pred_frames_num=25 +``` + +## STS-GCN +### Human3.6M +```bash +python -m api.train model=sts_gcn \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=human3.6m \ + model.n_major_joints=22 \ + model.loss.nJ=32 +``` +### AMASS +```bash +python -m api.train model=sts_gcn \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=AMASS \ + model.n_major_joints=18 \ + model.loss.nJ=18 +``` + +### 3DPW +```bash +python -m api.train model=sts_gcn \ + train_dataset=$DATASET_TRAIN_PATH \ + valid_dataset=$DATASET_VALIDATION_PATH \ + obs_frames_num=10 \ + pred_frames_num=25 \ + model.pre_post_process=3DPW \ + model.n_major_joints=18 \ + model.loss.nJ=18 +``` \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/average_meter.py b/utils/average_meter.py new file mode 100644 index 0000000..3b43005 --- /dev/null +++ b/utils/average_meter.py @@ -0,0 +1,18 @@ +class AverageMeter(object): + def __init__(self): + self.sum = 0 + self.count = 0 + + def reset(self): + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.sum += val * n + self.count += n + + def get_average(self): + assert not (self.count == 0 and self.sum != 0) + if self.count == 0: + return 0 + return self.sum / self.count diff --git a/utils/others.py b/utils/others.py new file mode 100644 index 0000000..920d406 --- /dev/null +++ b/utils/others.py @@ -0,0 +1,547 @@ +import numpy as np +import torch +import cv2 + +def pose_from_vel(velocity, last_obs_pose, stay_in_frame=False): + device = 'cuda' if velocity.is_cuda else 'cpu' + pose = torch.zeros_like(velocity).to(device) + last_obs_pose_ = last_obs_pose + + for i in range(velocity.shape[-2]): + pose[..., i, :] = last_obs_pose_ + velocity[..., i, :] + last_obs_pose_ = pose[..., i, :] + + if stay_in_frame: + for i in range(velocity.shape[-1]): + pose[..., i] = torch.min(pose[..., i], 1920 * torch.ones_like(pose.shape[:-1]).to(device)) + pose[..., i] = torch.max(pose[..., i], torch.zeros_like(pose.shape[:-1]).to(device)) + + return pose + + +def get_dct_matrix(N): + dct_m = np.eye(N) + for k in np.arange(N): + for i in np.arange(N): + w = np.sqrt(2 / N) + if k == 0: + w = np.sqrt(1 / N) + dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) + idct_m = np.linalg.inv(dct_m) + return dct_m, idct_m + + +def get_binary(src, device): + zero = torch.zeros_like(src).to(device) + one = torch.ones_like(src).to(device) + return torch.where(src > 0.5, one, zero) + + +def dict_to_device(src, device): + out = dict() + for key, value in src.items(): + if isinstance(value, torch.Tensor): + out[key] = value.clone().to(device) + else: + out[key] = value + return out + + +def expmap_to_quaternion(e): + """ + Convert axis-angle rotations (aka exponential maps) to quaternions. + Stable formula from "Practical Parameterization of Rotations Using the Exponential Map". + Expects a tensor of shape (*, 3), where * denotes any number of dimensions. + Returns a tensor of shape (*, 4). + """ + assert e.shape[-1] == 3 + + original_shape = list(e.shape) + original_shape[-1] = 4 + e = e.reshape(-1, 3) + theta = np.linalg.norm(e, axis=1).reshape(-1, 1) + w = np.cos(0.5 * theta).reshape(-1, 1) + xyz = 0.5 * np.sinc(0.5 * theta / np.pi) * e + return np.concatenate((w, xyz), axis=1).reshape(original_shape) + +def rotmat_to_expmap(action_sequence): + """Convert exponential maps to rotmats. + + Args: + action_sequence: [n_samples, n_joints, 9] + Returns: + Rotation matrices for exponenital maps [n_samples, n_joints, 3]. + """ + n_samples, n_joints, _ = action_sequence.shape + rotmat = np.reshape(action_sequence, [n_samples*n_joints, 3, 3]) + # first three values are positions, so technically it's meaningless to convert them, + # but we do it anyway because later we discard this values anywho + expmap = np.zeros([n_samples*n_joints, 3, 1]) + for i in range(expmap.shape[0]): + expmap[i] = cv2.Rodrigues(rotmat[i])[0] + expmap = np.reshape(expmap, [n_samples, n_joints, 3]) + return expmap + +def expmap_to_rotmat(action_sequence): + """Convert exponential maps to rotmats. + + Args: + action_sequence: [n_samples, n_joints, 3] + Returns: + Rotation matrices for exponenital maps [n_samples, n_joints, 9]. + """ + n_samples, n_joints, _ = action_sequence.shape + expmap = np.reshape(action_sequence, [n_samples*n_joints, 1, 3]) + # first three values are positions, so technically it's meaningless to convert them, + # but we do it anyway because later we discard this values anywho + rotmats = np.zeros([n_samples*n_joints, 3, 3]) + for i in range(rotmats.shape[0]): + rotmats[i] = cv2.Rodrigues(expmap[i])[0] + rotmats = np.reshape(rotmats, [n_samples, n_joints, 3*3]) + return rotmats + +def rotmat_to_euler(action_sequence): + """Convert exponential maps to rotmats. + + Args: + action_sequence: [n_samples, n_joints, 9] + Returns: + Euler angles for rotation maps given [n_samples, n_joints, 3]. + """ + n_samples, n_joints, _ = action_sequence.shape + rotmats = np.reshape(action_sequence, [n_samples*n_joints, 3, 3]) + eulers = np.zeros([n_samples*n_joints, 3]) + for i in range(eulers.shape[0]): + eulers[i] = rotmat2euler(rotmats[i]) + eulers = np.reshape(eulers, [n_samples, n_joints, 3]) + return eulers + +def rotmat2euler(R): + """Converts a rotation matrix to Euler angles. + Matlab port to python for evaluation purposes + https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 + + Args: + R: a 3x3 rotation matrix + + Returns: + eul: a 3x1 Euler angle representation of R + """ + if R[0,2] >= 1 or R[0,2] <= -1: + # special case values are out of bounds for arcsinc + E3 = 0 # set arbitrarily + dlta = np.arctan2( R[0,1], R[0,2] ); + + if R[0,2] == -1: + E2 = np.pi/2; + E1 = E3 + dlta; + else: + E2 = -np.pi/2; + E1 = -E3 + dlta; + else: + E2 = -np.arcsin(R[0,2]) + E1 = np.arctan2(R[1,2]/np.cos(E2), R[2,2]/np.cos(E2) ) + E3 = np.arctan2(R[0,1]/np.cos(E2), R[0,0]/np.cos(E2) ) + + eul = np.array([E1, E2, E3]); + return eul + +def expmap_to_euler(action_sequence): + rotmats = expmap_to_rotmat(action_sequence) + eulers = rotmat_to_euler(rotmats) + return eulers + +def qfix(q): + """ + Enforce quaternion continuity across the time dimension by selecting + the representation (q or -q) with minimal distance (or, equivalently, maximal dot product) + between two consecutive frames. + + Expects a tensor of shape (L, J, 4), where L is the sequence length and J is the number of joints. + Returns a tensor of the same shape. + """ + assert len(q.shape) == 3 + assert q.shape[-1] == 4 + + result = q.copy() + dot_products = np.sum(q[1:] * q[:-1], axis=2) + mask = dot_products < 0 + mask = (np.cumsum(mask, axis=0) % 2).astype(bool) + result[1:][mask] *= -1 + return result + + +def qeuler(q, order, epsilon=0): + """ + Convert quaternion(s) q to Euler angles. + Expects a tensor of shape (*, 4), where * denotes any number of dimensions. + Returns a tensor of shape (*, 3). + """ + assert q.shape[-1] == 4 + + original_shape = list(q.shape) + original_shape[-1] = 3 + q = q.view(-1, 4) + q0 = q[:, 0] + q1 = q[:, 1] + q2 = q[:, 2] + q3 = q[:, 3] + + if order == 'xyz': + x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2)) + y = torch.asin(torch.clamp(2 * (q1 * q3 + q0 * q2), -1 + epsilon, 1 - epsilon)) + z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3)) + elif order == 'yzx': + x = torch.atan2(2 * (q0 * q1 - q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3)) + y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3)) + z = torch.asin(torch.clamp(2 * (q1 * q2 + q0 * q3), -1 + epsilon, 1 - epsilon)) + elif order == 'zxy': + x = torch.asin(torch.clamp(2 * (q0 * q1 + q2 * q3), -1 + epsilon, 1 - epsilon)) + y = torch.atan2(2 * (q0 * q2 - q1 * q3), 1 - 2 * (q1 * q1 + q2 * q2)) + z = torch.atan2(2 * (q0 * q3 - q1 * q2), 1 - 2 * (q1 * q1 + q3 * q3)) + elif order == 'xzy': + x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q3 * q3)) + y = torch.atan2(2 * (q0 * q2 + q1 * q3), 1 - 2 * (q2 * q2 + q3 * q3)) + z = torch.asin(torch.clamp(2 * (q0 * q3 - q1 * q2), -1 + epsilon, 1 - epsilon)) + elif order == 'yxz': + x = torch.asin(torch.clamp(2 * (q0 * q1 - q2 * q3), -1 + epsilon, 1 - epsilon)) + y = torch.atan2(2 * (q1 * q3 + q0 * q2), 1 - 2 * (q1 * q1 + q2 * q2)) + z = torch.atan2(2 * (q1 * q2 + q0 * q3), 1 - 2 * (q1 * q1 + q3 * q3)) + elif order == 'zyx': + x = torch.atan2(2 * (q0 * q1 + q2 * q3), 1 - 2 * (q1 * q1 + q2 * q2)) + y = torch.asin(torch.clamp(2 * (q0 * q2 - q1 * q3), -1 + epsilon, 1 - epsilon)) + z = torch.atan2(2 * (q0 * q3 + q1 * q2), 1 - 2 * (q2 * q2 + q3 * q3)) + else: + raise + + return torch.stack((x, y, z), dim=1).view(original_shape) + + +def normalize(in_tensor, mean, std): + device = 'cuda' if in_tensor.is_cuda else 'cpu' + bs, frame_n, feature_n = in_tensor.shape + mean = torch.tensor(mean).to(device) + std = torch.tensor(std).to(device) + keypoint_dim = mean.shape[0] + assert mean.shape == std.shape + assert feature_n % keypoint_dim == 0 + mean = mean.unsqueeze(0).repeat(bs, frame_n, feature_n // keypoint_dim) + std = std.unsqueeze(0).repeat(bs, frame_n, feature_n // keypoint_dim) + + return (in_tensor - mean) / std + + +def denormalize(in_tensor, mean, std): + device = 'cuda' if in_tensor.is_cuda else 'cpu' + bs, frame_n, feature_n = in_tensor.shape + mean = torch.tensor(mean).to(device) + std = torch.tensor(std).to(device) + keypoint_dim = mean.shape[0] + assert mean.shape == std.shape + assert feature_n % keypoint_dim == 0 + mean = mean.unsqueeze(0).repeat(bs, frame_n, feature_n // keypoint_dim) + std = std.unsqueeze(0).repeat(bs, frame_n, feature_n // keypoint_dim) + + return (in_tensor * std) + mean + + +def xyz_to_spherical(inputs): + """ + Convert cartesian representation to spherical representation. + Args: + inputs -- cartesian coordinates. (..., 3) + + Returns: + out -- spherical coordinate. (..., 3) + """ + + rho = torch.norm(inputs, p=2, dim=-1) + theta = torch.arctan(inputs[..., 2] / (inputs[..., 0] + 1e-8)).unsqueeze(-1) + tol = 0 + theta[inputs[..., 0] < tol] = theta[inputs[..., 0] < tol] + torch.pi + phi = torch.arccos(inputs[..., 1] / (rho + 1e-8)).unsqueeze(-1) + rho = rho.unsqueeze(-1) + out = torch.cat([rho, theta, phi], dim=-1) + out[out.isnan()] = 0 + + return out + +def spherical_to_xyz(self, inputs): + """ + Convert cartesian representation to spherical representation. + Args: + inputs -- spherical coordinates. (..., 3) + + Returns: + out -- cartesian coordinate. (..., 3) + """ + + x = inputs[..., 0] * torch.sin(inputs[..., 2]) * torch.cos(inputs[..., 1]) + y = inputs[..., 0] * torch.sin(inputs[..., 2]) * torch.sin(inputs[..., 1]) + z = inputs[..., 0] * torch.cos(inputs[..., 2]) + x, y, z = x.unsqueeze(-1), y.unsqueeze(-1), z.unsqueeze(-1) + + return torch.cat([x, z, y], dim=-1) + +def sig5(p:torch.Tensor, x:torch.Tensor): + """ + Arguments: + p -- sig5 parameters. shape: ..., 5 + x -- input of sig5 function. shape: ... + Return: + output -- output of sig5 function. + """ + assert p.shape[-1] == 5 + if len(p.shape) == 1: p = p.reshape(1, -1) + p_shape = p.shape + x_shape = x.shape + + p = p.reshape(-1, 5) # 20, 5 + x = x.reshape(1, -1) # 1, 23 + + p1 = p[:, 0].unsqueeze(1) # 20, 1 + p2 = p[:, 1].unsqueeze(1) + p3 = p[:, 2].unsqueeze(1) + p4 = p[:, 3].unsqueeze(1) + p5 = p[:, 4].unsqueeze(1) + + c = 2*p3*p5/torch.abs(p3+p5) # 20, 1 + f = 1/(1+torch.exp(-c*(p4-x))) # 20, 23 + g = torch.exp(p3*(p4-x)) # 20, 23 + h = torch.exp(p5*(p4-x)) # 20, 23 + output = (p1+(p2/(1+f*g+(1-f)*h))) # 20, 23 + output = output.reshape(*p_shape[:-1], *x_shape) + return output + +def polyx(p:torch.Tensor, input:torch.Tensor, x:int): + """ + Arguments: + p -- polyx parameters. shape: ..., 10 + input -- input of polyx function. shape: ... + x -- degree of polynomial function. + Return: + output -- output of polyx function. + """ + assert p.shape[-1] == x+1 + if len(p.shape) == 1: p = p.reshape(1, -1) + p_shape = p.shape # ..., x+1 + input_shape = input.shape # ... + + input = input.reshape(1, -1) # ..., 1 + + powers = torch.arange(x+1).reshape(-1,1).to(input.device) # x+1, 1 + p = p.unsqueeze(-1) # ..., x+1, 1 + print(input.shape, powers.shape, p.shape) + return (p*(input**powers)).sum(dim=-2).reshape(*p_shape[:-1], *input_shape) + +def sigstar(p:torch.Tensor, x:torch.Tensor): + """ + Arguments: + p -- sig* parameters. shape: ..., 3 + x -- input of sig* function. shape: ... + Return: + output -- output of sig* function. + """ + assert p.shape[-1] == 3 + if len(p.shape) == 1: p = p.reshape(1, -1) + p_shape = p.shape + x_shape = x.shape + + p = p.reshape(-1, 3) # 20, 3 + x = x.reshape(1, -1) # 1, 23 + + x0 = p[:, 0].unsqueeze(1) # 20, 1 + k = p[:, 1].unsqueeze(1) + L = p[:, 2].unsqueeze(1) + + output = L / (1 + torch.exp(-k * (x - x0))) # 20, 23 + output = output.reshape(*p_shape[:-1], *x_shape) # + return output + + +p3d0_base = torch.tensor([[[ 0.0000e+00, 0.0000e+00, 0.0000e+00], + [ 7.2556e-02, -9.0371e-02, -4.9508e-03], + [-7.0992e-02, -8.9911e-02, -4.2638e-03], + [-2.9258e-03, 1.0815e-01, -2.7961e-02], + [ 1.1066e-01, -4.7893e-01, -7.1666e-03], + [-1.1376e-01, -4.8391e-01, -1.1530e-02], + [ 3.5846e-03, 2.4726e-01, -2.5113e-02], + [ 9.8395e-02, -8.8787e-01, -5.0576e-02], + [-9.9592e-02, -8.9208e-01, -5.4003e-02], + [ 5.3301e-03, 3.0330e-01, -1.3979e-04], + [ 1.3125e-01, -9.4635e-01, 7.0107e-02], + [-1.2920e-01, -9.4181e-01, 7.1206e-02], + [ 2.4758e-03, 5.2506e-01, -3.7885e-02], + [ 8.6329e-02, 4.2873e-01, -3.4415e-02], + [-7.7794e-02, 4.2385e-01, -4.0395e-02], + [ 8.1987e-03, 5.9696e-01, 1.8670e-02], + [ 1.7923e-01, 4.6251e-01, -4.3923e-02], + [-1.7389e-01, 4.5846e-01, -5.0048e-02], + [ 4.4708e-01, 4.4718e-01, -7.2309e-02], + [-4.3256e-01, 4.4320e-01, -7.3162e-02], + [ 7.0520e-01, 4.5867e-01, -7.2730e-02], + [-6.9369e-01, 4.5237e-01, -7.7453e-02]]]) + +def DPWconvertTo3D(pose_seq): + res = [] + for pose in pose_seq: + assert len(pose.shape) == 2 and pose.shape[1] == 72 + + pose = torch.from_numpy(pose).float() + pose = pose.view(-1, 72//3, 3) + pose = pose[:, :-2] + pose[:, 0] = 0 + res.append(ang2joint(pose).reshape(-1, 22 * 3).detach().numpy()) + return np.array(res) + +def AMASSconvertTo3D(pose): + assert len(pose.shape) == 2 and pose.shape[1] == 156 + pose = torch.from_numpy(pose).float() + pose = pose.view(-1, 156//3, 3) + pose[:, 0] = 0 + return ang2joint(pose).reshape(-1, 22 * 3).detach().numpy() + +def ang2joint(pose, + parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, + 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): + """ + :param p3d0:[batch_size, joint_num, 3] + :param pose:[batch_size, joint_num, 3] + :param parent: + :return: + """ + + assert len(pose.shape) == 3 and pose.shape[2] == 3 + batch_num = pose.shape[0] + p3d0 = p3d0_base.repeat([batch_num, 1, 1]) + + jnum = 22 + + J = p3d0 + R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) + results = [] + results.append( + with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) + ) + + for i in range(1, jnum): + results.append( + torch.matmul( + results[parent[i]], + with_zeros( + torch.cat( + (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), + dim=2 + ) + ) + ) + ) + + stacked = torch.stack(results, dim=1) + J_transformed = stacked[:, :, :3, 3] + return J_transformed + + +def rodrigues(r): + """ + Rodrigues' rotation formula that turns axis-angle tensor into rotation + matrix in a batch-ed manner. + Parameter: + ---------- + r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3]. + Return: + ------- + Rotation matrix of shape [batch_size * angle_num, 3, 3]. + """ + eps = r.clone().normal_(std=1e-8) + theta = torch.norm(r + eps, dim=(1, 2), keepdim=True) + # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple + theta_dim = theta.shape[0] + r_hat = r / theta + cos = torch.cos(theta) + z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device) + m = torch.stack( + (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick, + -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1) + m = torch.reshape(m, (-1, 3, 3)) + i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) \ + + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device) + A = r_hat.permute(0, 2, 1) + dot = torch.matmul(A, r_hat) + R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m + return R + + +def with_zeros(x): + """ + Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. + Parameter: + --------- + x: Tensor to be appended. + Return: + ------ + Tensor after appending of shape [4,4] + """ + ones = torch.tensor( + [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float + ).expand(x.shape[0], -1, -1).to(x.device) + ret = torch.cat((x, ones), dim=1) + return ret + + +def pack(x): + """ + Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor. + Parameter: + ---------- + x: A tensor of shape [batch_size, 4, 1] + Return: + ------ + A tensor of shape [batch_size, 4, 4] after appending. + """ + zeros43 = torch.zeros( + (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device) + ret = torch.cat((zeros43, x), dim=3) + return ret + +def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): + """ + Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 + which originaly from + In order to find the same action indices as in SRNN. + https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 + """ + + # Used a fixed dummy seed, following + # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 + SEED = 1234567890 + rng = np.random.RandomState(SEED) + + T1 = frame_num1 - 150 + T2 = frame_num2 - 150 # seq_len + idxo1 = None + idxo2 = None + for _ in np.arange(0, 128): + idx_ran1 = rng.randint(16, T1) + idx_ran2 = rng.randint(16, T2) + idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) + idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) + if idxo1 is None: + idxo1 = idxs1 + idxo2 = idxs2 + else: + idxo1 = np.vstack((idxo1, idxs1)) + idxo2 = np.vstack((idxo2, idxs2)) + return idxo1, idxo2 + + +if __name__ == '__main__': + # p = torch.tensor([[1, 1, 1], [1, 2, 3]]) # 2, 3 + # input = torch.tensor([[2, 5, 7], [1, 2, 3]]) # 2, 3 + p = torch.randn(22, 8) + input = torch.arange(0, 35) + x = 7 + print(polyx(p, input, x).shape) + # p = torch.rand(3, 4, 3) + # x = torch.rand(2, 6, 3, 5) + # print(sigstar(p, x).shape) diff --git a/utils/reporter.py b/utils/reporter.py new file mode 100644 index 0000000..f41e920 --- /dev/null +++ b/utils/reporter.py @@ -0,0 +1,175 @@ +import sys +import os +from tempfile import tempdir +import time +import json +from traceback import print_tb +from cv2 import sort +import matplotlib.pyplot as plt + +import numpy as np +import torch +import pandas as pd + +from utils.average_meter import AverageMeter + + +class Reporter: + + def __init__(self, state=''): + self.state = state + self.start_time = None + self.attrs = None + self.history = None + + def setup(self, attrs): + self.attrs = {} + for attr in attrs: + self.attrs[attr] = AverageMeter() + self.history = {} + self.min_attrs = {} + for attr in attrs: + self.history[attr] = [] + self.min_attrs[attr] = float('inf') + self.history['time'] = [] + + def update(self, attrs, batch_size, dynamic=False, counts=None): + if self.attrs is None or self.history is None: + self.setup(attrs) + for key, value in attrs.items(): + if dynamic: + if key not in self.attrs.keys(): + self.attrs[key] = AverageMeter() + self.history[key] = [] + self.min_attrs[key] = float('inf') + if counts is not None and key in counts.keys(): +# print(value) +# print(self.attrs.get(key)) + self.attrs.get(key).update(value, counts[key]) + else: + self.attrs.get(key).update(value, batch_size) + + def epoch_finished(self, tb=None, mf=None): + self.history.get('time').append(time.time() - self.start_time) + for key, avg_meter in self.attrs.items(): + value = avg_meter.get_average() + value = value.detach().cpu().numpy() if torch.is_tensor(value) else value + self.history.get(key).append(float(value)) + + if self.min_attrs[key] > value: + self.min_attrs[key] = value + + if tb is not None: + tb.add_scalar(self.state + '_' + key, float(value), len(self.history.get(key))) + if mf is not None: + mf.log_metric(self.state + '_' + key, float(value), len(self.history.get(key))) + mf.log_metric(self.state + '_best_' + key, float(self.min_attrs.get(key)), len(self.history.get(key))) + self.reset_avr_meters() + + def reset_avr_meters(self): + self.start_time = None + for i, avg_meter in enumerate(self.attrs.values()): + avg_meter.reset() + + def print_values(self, logger): + msg = self.state + '-epoch' + str(len(self.history['time'])) + ': ' + for key, value in self.history.items(): + msg += key + ': %.5f, ' % value[-1] + logger.info(str(msg)) + sys.stdout.flush() + + def save_data(self, save_dir): + for key, value in self.history.items(): + with open(os.path.join(save_dir, 'metrics_history', '_'.join((self.state, key)) + '.json'), "w") as f: + json.dump(value, f, indent=4) + def print_uncertainty_values(self, logger, unc_k): + msg = self.state + '-epoch' + str(len(self.history['time'])) + ': ' + for key, value in self.history.items(): + if not unc_k in key: + continue + msg += key + ': %.5f, ' % value[-1] + logger.info(str(msg)) + sys.stdout.flush() + + def save_uncertainty_data(self, unc_k, save_dir): + for key, value in self.history.items(): + if not unc_k in key: + continue + with open(os.path.join(save_dir, 'uncertainty_history', '_'.join((self.state, key)) + '.json'), "w") as f: + json.dump(value, f, indent=4) + + def print_mean_std(self, logger): + for key, value in self.history.items(): + logger.info(str(key) + ': (mean=%.5f, std=%.6f)' % (np.mean(value), np.std(value))) + + def print_pretty_metrics(self, logger, metrics): + actions = [] + for k in self.history.keys(): + if metrics[0] in k: + actions.append(k[len(metrics[0]) + 1:]) + actions = list(sorted(actions)) + logger.info(' |'.join(["actions".ljust(15)] + [a.center(15) for a in list(metrics)])) + logger.info("_" * 20 * (len(list(metrics)) + 1)) + for action in actions: + to_print = [] + for metric in list(metrics): + to_print.append(np.mean(self.history.get(f'{metric}_{action}'))) + logger.info(' |'.join([action.ljust(15)] + [str(np.around(a, 4)).center(15) for a in to_print])) + + def print_pretty_uncertainty(self, logger, metrics): + actions = [] + for k in self.history.keys(): + if metrics[0] in k: + actions.append(k[len(metrics[0]) + 1:]) + actions = list(sorted(actions)) + logger.info(' |'.join(["actions".ljust(15)] + [a.center(15) for a in list(metrics)])) + logger.info("_" * 20 * (len(list(metrics)) + 1)) + for action in actions: + to_print = [] + for metric in list(metrics): + to_print.append(np.mean(self.history.get(f'{metric}_{action}'))) + logger.info(' |'.join([action.ljust(15)] + [str(np.around(a, 4)).center(15) for a in to_print])) + + def save_csv_metrics(self, metrics, addr): + actions = [] + for k in self.history.keys(): + if metrics[0] in k: + actions.append(k[len(metrics[0]) + 1:]) + actions = list(sorted(actions)) + out = pd.DataFrame(columns=["action"] + list(metrics)) + + for action in actions: + to_print = [] + out_dict = {} + for metric in list(metrics): + out_dict[metric] = [np.mean(self.history.get(f'{metric}_{action}'))] + out_dict["action"] = action + temp = [action] + [a for a in to_print] + df_temp = pd.DataFrame(out_dict) + out = pd.concat([out, df_temp], ignore_index=True, axis=0) + out.to_csv(addr) + + + def save_csv_uncertainty(self, unc_k, addr): + out = pd.DataFrame(columns=["action"] + list(unc_k)) + + to_print = [] + out_dict = {} + out_dict[unc_k] = [np.mean(self.history.get(unc_k))] + out_dict["action"] = "all" + out = pd.DataFrame(out_dict) + out.to_csv(addr) + + + @staticmethod + def save_plots(save_dir, train_history, validiation_history, use_validation): + for key, value in train_history.items(): + X = list(range(1, len(value) + 1)) + plt.plot(X, value, color='b', label='_'.join(('train', key))) + if use_validation and key in validiation_history.keys(): + plt.plot(X, validiation_history.get(key), color='g', label='_'.join(('validation', key))) + plt.xlabel('epoch') + plt.ylabel(key) + plt.legend() + plt.savefig(os.path.join(save_dir, 'plots', key + '.png')) + plt.close() diff --git a/utils/save_load.py b/utils/save_load.py new file mode 100644 index 0000000..3515760 --- /dev/null +++ b/utils/save_load.py @@ -0,0 +1,66 @@ +import logging +import os +import pickle +from itertools import chain + +import torch + +from models import MODELS +from losses import LOSSES +from optimizers import OPTIMIZERS + +logger = logging.getLogger(__name__) + + +def load_snapshot(snapshot_path): + snapshot = torch.load(snapshot_path, map_location='cpu') + model = MODELS[snapshot['model_args'].type](snapshot['model_args']) + model.load_state_dict(snapshot['model_state_dict']) + loss_module = LOSSES[snapshot['loss_args'].type](snapshot['loss_args']) + loss_module.load_state_dict(snapshot['loss_state_dict']) + optimizer = OPTIMIZERS[snapshot['optimizer_args'].type](chain(model.parameters(), loss_module.parameters()), snapshot['optimizer_args']) + optimizer.load_state_dict(snapshot['optimizer_state_dict']) + return (model, loss_module, optimizer, snapshot['optimizer_args'], snapshot['epoch'], snapshot['train_reporter'], + snapshot['valid_reporter']) + + +def save_snapshot(model, loss_module, optimizer, optimizer_args, epoch, train_reporter, valid_reporter, save_path, best_model=False): + logger.info('### Taking Snapshot ###') + snapshot = { + 'model_state_dict': model.state_dict(), + 'model_args': model.args, + 'loss_state_dict': loss_module.state_dict(), + 'loss_args': loss_module.args, + 'optimizer_state_dict': optimizer.state_dict(), + 'optimizer_args': optimizer_args, + 'epoch': epoch, + 'train_reporter': train_reporter, + 'valid_reporter': valid_reporter + } + if not best_model: + torch.save(snapshot, os.path.join(save_path, 'snapshots', '%d.pt' % epoch)) + else: + torch.save(snapshot, os.path.join(save_path, 'snapshots', 'best_model.pt')) + del snapshot + + +def save_test_results(result_df, result_tensor, save_dir): + result_df.to_csv(os.path.join(save_dir, 'generated_outputs', 'results.csv'), index=False) + with open(os.path.join(save_dir, 'generated_outputs', 'results.pkl'), 'wb') as f: + pickle.dump(result_tensor, f) + + +def setup_training_dir(parent_dir): + os.makedirs(os.path.join(parent_dir, 'snapshots'), exist_ok=False) + os.makedirs(os.path.join(parent_dir, 'plots'), exist_ok=False) + os.makedirs(os.path.join(parent_dir, 'metrics_history'), exist_ok=False) + + +def setup_testing_dir(parent_dir): + os.makedirs(os.path.join(parent_dir, 'generated_outputs'), exist_ok=False) + + +def setup_visualization_dir(parent_dir): + vis_dir = os.path.join(parent_dir, 'visualization') + os.makedirs(vis_dir, exist_ok=True) + return vis_dir diff --git a/visualization/__init__.py b/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/visualization/color_generator.py b/visualization/color_generator.py new file mode 100644 index 0000000..d4f6552 --- /dev/null +++ b/visualization/color_generator.py @@ -0,0 +1,64 @@ +import random + + +class ColorGenerator: + def __init__(self): + self.points = [] + self.colors = [] + self.id_to_color = {0: [0, 191, 255], 1: [90, 100, 100], 2: [154, 205, 50], 3: [119, 136, 153], 4: [0, 255, 255], + 5: [255, 69, 0]} + + def get_new_point(self): + points = self.points + if len(points) == 0: + points.append(0) + return 0 + elif len(points) == 1: + points.append(255) + return 255 + elif len(points) == 256: + return False + + max_distance = 0 + max_idx = None + for i in range(len(points) - 1): + distance = points[i + 1] - points[i] + if distance > max_distance: + max_distance = distance + max_idx = i + + new_point = points[max_idx] + max_distance // 2 + points.insert(max_idx + 1, new_point) + + return new_point + + def generate_colors(self): + def is_allowed(color): + return any(x > 30 for x in color) and any(x < 225 for x in color) + + colors = set() + new_point = self.get_new_point() + points = self.points + for point_i in points: + for point_j in points: + colors.add((new_point, point_i, point_j)) + colors.add((point_i, new_point, point_j)) + colors.add((point_i, point_j, new_point)) + colors = [color for color in colors if is_allowed(color)] + random.Random(0).shuffle(colors) + + self.colors = colors + + def get_color(self, ind): + if ind in self.id_to_color: + return self.id_to_color[ind] + while len(self.colors) == 0: + self.generate_colors() + color = self.colors.pop() + self.id_to_color[ind] = color + return color + + +color_generator = ColorGenerator() + +color_list = ['blue', 'red', 'green', 'white', 'yellow'] diff --git a/visualization/utils.py b/visualization/utils.py new file mode 100644 index 0000000..6971166 --- /dev/null +++ b/visualization/utils.py @@ -0,0 +1,36 @@ +import numpy as np + +keypoint_connections = { + '3dpw': [ + (0, 1), (1, 4), (4, 7), (7, 10), (0, 2), (2, 5), (5, 8), (8, 11), + (22, 20), (20, 18), (16, 18), (13, 16), + (23, 21), (21, 19), (17, 19), (14, 17), + (12, 15), (12, 16), (12, 17), + (0, 3), (3, 6), (9, 13), (9, 14) + ], + 'amass': [ + (0, 1), (1, 4), (4, 7), (7, 10), (0, 2), (2, 5), (5, 8), (8, 11), + (22, 20), (20, 18), (16, 18), (13, 16), + (23, 21), (21, 19), (17, 19), (14, 17), + (12, 15), (12, 16), (12, 17), + (0, 3), (3, 6), (9, 13), (9, 14) + ], + 'human3.6m': [ + (0, 1), (1, 2), (2, 3), (0, 6), (6, 7), (7, 8), (0, 12), (12, 13), (13, 14), (14, 15), + (13, 17), (17, 18), (18, 19), (13, 25), (25, 26), (26, 27) + ] +} + +rotation_3D = { + 'human3.6m': [30, 20], + '3dpw': [0, -90], + 'amass': [0, -90] +} + +axes_order_3D = { + 'human3.6m': [0, 2, 1], + '3dpw': [0, 2, 1], + 'amass': [0, 2, 1] +} + +jta_cam_int = np.array([[1158, 0, 960], [0, 1158, 540], [0, 0, 1]]) diff --git a/visualization/visualizer.py b/visualization/visualizer.py new file mode 100644 index 0000000..a60aed0 --- /dev/null +++ b/visualization/visualizer.py @@ -0,0 +1,341 @@ +import logging +import os + +import cv2 +import imageio +import matplotlib.pyplot as plt +import numpy as np +import torch +from matplotlib.pyplot import AutoLocator + +from utils.save_load import setup_visualization_dir +from visualization.color_generator import color_generator +from visualization.utils import keypoint_connections, jta_cam_int, rotation_3D, axes_order_3D + +# from pygifsicle import optimize + +logger = logging.getLogger(__name__) + + +class Visualizer: + def __init__(self, dataset_name, parent_dir, images_dir): + self.parent_dir = parent_dir + self.images_dir = images_dir + self.dataset_name = dataset_name + + def visualizer_3D(self, names, poses, cam_ext, cam_int, images_paths, observed_noise, gif_name, fig_size=(16, 12)): + """ + visualizer_3D(poses, images_paths, fig_size) -> None + @brief Draws a 3D figure with matplotlib (it can have multiple sub figures). + The function cv::This function draw multiple 3D poses alongside each other to make comparisons. + :param names: name of each subplot. should be a list of strings. + :param poses: torch.Tensor: should be a list of tensors of multiple outputs that you want to compare. (should have 4 dimensions) + shape of poses is like: [nun_comparisons, num_frames (which we create gif upon it), num_persons(in each frame), num_keypoints * dim] + Ex: poses.shape = [3, 16, 5, 51] means you want to compare 3 different groups of outputs each contain + 5 persons with 17 joints (17 * 3). + :param cam_ext: torch.Tensor or list of torch.Tensors: camera extrinsic parameters + shape of cam_ext is like: [num_comparisons, num_frames, 3, 4] which last two dimensions demonstrates (3, 4) matrix based on formal definitions + Ex: cam_ext.shape = [3, 16, 3, 4] means you want to compare 3 different groups of poses each contain + 16 frame and unspecified number of persons. (for each frame basically we have a (3,4) matrix) + :param cam_int: torch.Tensor or list of torch.Tensors: camera intrinsic parameters + shape of cam_int is like: [num_comparisons, 3, 3] which last two dimensions demonstrates (3, 3) matrix based on formal definitions + Ex: cam_int.shape = [3, 3] means you want to compare 3 different groups of poses each contain + (3, 3) matrix which demonstrate camera intrinsic parameters + :param images_paths: list of tensors or list of numpy arrays: paths to specified outputs (scenes). + shape of images_paths is like: [num_comparisons, num_frames] + Ex: images_paths.shape = [3, 16] = means you want to compare 3 different groups of poses each have + 16 images in it. + :param observed_noise: torch.Tensor or list of torch.Tensors + shape of observed_noise is like: [nun_comparisons, num_frames (which we create gif upon it), num_persons(in each frame), num_keypoints] + Ex: masks.shape = [3, 16, 5, 17] just like 'poses'. The only difference here: we have 1 noise for each joint + :param fig_size: tuple(size=2): size of matplotlib figure. + Ex: (8, 6) + :param gif_name: str: name of generated output .gif file + :return: None: generate a .gif file + """ + poses = self.__clean_data(poses) + if cam_ext and cam_int is not None: + cam_ext = self.__clean_data(cam_ext) + if images_paths: + images_paths = self.__generate_images_path(images_paths) + new_pose = [] + for i, group_pose in enumerate(poses): + new_group_pose = [] + for j in range(len(group_pose)): + new_group_pose.append( + self.__scene_to_image(group_pose[j].unsqueeze(0), cam_ext[i], cam_int).tolist()) + new_pose.append(torch.tensor(new_group_pose).squeeze(1)) + self.visualizer_2D(names=names, poses=new_pose, images_paths=images_paths, + observed_noise=observed_noise, fig_size=fig_size, + gif_name=gif_name + '_2D_overlay') + if self.dataset_name == 'jta': + new_pose = [] + for i, group_pose in enumerate(poses): + new_group_pose = [] + for j in range(len(group_pose)): + new_group_pose.append(self.__generate_JTA_2D_pose(group_pose[j].unsqueeze(0)).tolist()) + new_pose.append(torch.tensor(new_group_pose).squeeze(1)) + self.visualizer_2D(names=names, poses=new_pose, images_paths=images_paths, fig_size=fig_size, + observed_noise=observed_noise, gif_name=gif_name + "_2D_overlay") + logger.info("start 3D visualizing.") + if observed_noise is None or observed_noise == []: + observed_noise = [] + else: + observed_noise = self.__clean_data(observed_noise) + max_axes = [] + min_axes = [] + for i in range(3): + max_axes.append(int(max(map(lambda sub_fig_pose: torch.max(sub_fig_pose[:, :, i::3]) + 1, poses)))) + min_axes.append(int(min(map(lambda sub_fig_pose: torch.min(sub_fig_pose[:, :, i::3]) - 1, poses)))) + comparison_number = len(poses) + axarr = [] + filenames = [] + save_dir = setup_visualization_dir(self.parent_dir) + for j in range(len(poses[0])): + fig = plt.figure(figsize=fig_size, dpi=100) + axarr.append([]) + for i in range(len(poses)): + noise = observed_noise if len(observed_noise) == len(poses[i]) else [] + axarr[j].append(fig.add_subplot(1, comparison_number, i + 1, projection='3d')) + self.__create_plot(axarr[j][i], max_axes=max_axes, min_axes=min_axes) + self.__generate_3D_figure( + i, all_poses=poses[i][j], + all_noises=noise[j] if j < len(noise) else None, + ax=axarr[j][i] + ) + for _ in range(2): + filenames.append(os.path.join(save_dir, f'{j}.png')) + if j == len(poses[0]) - 1: + for _ in range(3): + filenames.append(os.path.join(save_dir, f'{j}.png')) + plt.title(names[i]) + plt.savefig(os.path.join(save_dir, f'{j}.png'), dpi=100) + plt.close(fig) + with imageio.get_writer(os.path.join(save_dir, f'{gif_name}.gif'), mode='I') as writer: + for filename in filenames: + image = imageio.imread(filename) + writer.append_data(image) + + for filename in set(filenames): + os.remove(filename) + # optimize(os.path.join(save_dir, f'{gif_name}.gif')) + logger.info("end 3D visualizing.") + + def visualizer_2D(self, names, poses, images_paths, observed_noise, gif_name, fig_size=(24, 18)): + """ + visualizer_2D(poses, images_paths, fig_size) -> gif + @brief Draws a 2D figure with matplotlib (it can have multiple sub figures). + The function cv:: This function draw multiple 2D poses alongside each other to make comparisons + (in different outputs) you can draw many different persons together in one sub figure (scene) . + :param names: name of each subplot. should be a list of strings. + :param poses: torch.Tensor or list of torch.Tensors: 3D input pose + shape of poses is like: [nun_comparisons, num_frames (which we create gif upon it), num_persons(in each frame), num_keypoints * dim] + Ex: poses.shape = [3, 16, 5, 34] means you want to compare 3 different groups of outputs each contain + 5 persons with 17 joints (17 * 2). + :param images_paths: list or numpy.array: paths to specified outputs (scenes). + Ex: images_paths.shape = [3, 16] + :param observed_noise: torch.Tensor or list of torch.Tensors + shape of observed_noise is like: [1, num_frames (which we create gif upon it), num_keypoints] + Ex: masks.shape = [16, 22] just like 'images_path'. The only difference here: we have 1 noise for each joint + :param fig_size: tuple(size=2): size of matplotlib figure. + Ex: (8.6) + :param gif_name: str: name of generated output .gif file + :return None: generate a .gif file + """ + logger.info("start 2D visualizing.") + poses = self.__clean_data(poses) + if observed_noise is None or observed_noise == []: + observed_noise = [] + else: + observed_noise = self.__clean_data(observed_noise) + if images_paths is None or images_paths == []: + images_paths = [] + else: + images_paths = self.__generate_images_path(images_paths) + subfig_size = len(poses) + images = [] + for i, pose_group in enumerate(poses): + images.append([]) + for j, pose in enumerate(pose_group): + images[i].append( + self.__generate_2D_figure( + color_num=i, all_poses=pose, + all_noises=observed_noise[j] if j < len(observed_noise) else None, + image_path=images_paths[i][j] if i < len(images_paths) and j < len(images_paths[i]) else None + ) + ) + filenames = [] + save_dir = setup_visualization_dir(self.parent_dir) + for plt_index in range(len(poses[0])): + fig = plt.figure(figsize=np.array(fig_size), dpi=100) + axarr = [] + for i in range(len(poses)): + axarr.append(fig.add_subplot(1, subfig_size, i + 1)) + plt.title(names[i]) + axarr[i].imshow(images[i][plt_index]) + for _ in range(2): + filenames.append(os.path.join(save_dir, f'{plt_index}.png')) + if plt_index == len(poses[0]) - 1: + for _ in range(3): + filenames.append(os.path.join(save_dir, f'{plt_index}.png')) + plt.savefig(os.path.join(save_dir, f'{plt_index}.png'), dpi=100) + plt.close(fig) + with imageio.get_writer(os.path.join(save_dir, f'{gif_name}.gif'), mode='I') as writer: + for filename in filenames: + image = imageio.imread(filename) + writer.append_data(image) + for filename in set(filenames): + os.remove(filename) + # optimize(os.path.join(save_dir, f'{gif_name}.gif')) + logger.info("end 2D visualizing.") + + def __generate_3D_figure(self, color_num, all_poses, all_noises, ax): + num_keypoints = all_poses.shape[-1] // 3 + poses = all_poses.reshape(all_poses.shape[0], num_keypoints, 3) + if all_noises is None or all_noises == []: + all_noises = torch.zeros(all_poses.shape[1] // 3) + visualizing_keypoints = np.array(np.unique(keypoint_connections[self.dataset_name])) + for i, keypoints in enumerate(poses): + for ie, edge in enumerate(keypoint_connections[self.dataset_name]): + ax.plot(xs=[keypoints[edge, 0][0], keypoints[edge, 0][1]], + zs=[keypoints[edge, 1][0], keypoints[edge, 1][1]], + ys=[keypoints[edge, 2][0], keypoints[edge, 2][1]], linewidth=2, label=r'$x=y=z$', + color=np.array(color_generator.get_color(color_num)) / 255) + for k in visualizing_keypoints: + ax.scatter(xs=keypoints[k, axes_order_3D[self.dataset_name][0]], + ys=keypoints[k, axes_order_3D[self.dataset_name][1]], + zs=keypoints[k, axes_order_3D[self.dataset_name][2]], s=2, + color=np.array([0, 255, 100]) / 255 if all_noises[k] == 0 else np.array( + [255, 40, 0]) / 255) + + def __generate_2D_figure(self, color_num, all_poses, all_noises=None, image_path=None): + num_keypoints = all_poses.shape[-1] // 2 + poses = all_poses.reshape(all_poses.shape[0], num_keypoints, 2) + if image_path is None: + image = np.zeros((1080, 1920, 3)).astype(np.uint8) + else: + image = cv2.imread(image_path) + if all_noises is None or all_noises == []: + all_noises = torch.zeros(all_poses.shape[1] // 2) + for i, keypoints in enumerate(poses): + for keypoint in range(keypoints.shape[0]): + for ie, edge in enumerate(keypoint_connections[self.dataset_name]): + if not ((keypoints[edge, 0][0] <= 0 or keypoints[edge, 1][0] <= 0) or ( + keypoints[edge, 0][1] <= 0 or keypoints[edge, 1][1] <= 0)): + '' + cv2.line(image, (int(keypoints[edge, 0][0]), int(keypoints[edge, 1][0])), + (int(keypoints[edge, 0][1]), int(keypoints[edge, 1][1])), + color_generator.get_color(color_num), 4, lineType=cv2.LINE_AA) + for keypoint in range(keypoints.shape[0]): + cv2.circle(image, (int(keypoints[keypoint, 0]), int(keypoints[keypoint, 1])), 3, + (0, 255, 100) if all_noises[keypoint // 2] == 0 else (255, 50, 0), thickness=-1, + lineType=cv2.FILLED) + return image + + def __create_plot(self, axe, max_axes, min_axes): + axe.xaxis.set_major_locator(AutoLocator()) + axe.yaxis.set_major_locator(AutoLocator()) + axe.zaxis.set_major_locator(AutoLocator()) + range_axes = [(max_axes[i] - min_axes[i]) for i in range(len(max_axes))] + true_range = [ + (min_axes[i] - (max(range_axes) - range_axes[i]) / 2, max_axes[i] + (max(range_axes) - range_axes[i]) / 2) + for i in range(len(max_axes))] + axe.set_aspect('auto') + axe.view_init(elev=rotation_3D[self.dataset_name][0], azim=rotation_3D[self.dataset_name][1]) + axe.set_xlim(xmin=true_range[0][0], + xmax=true_range[0][1]) + axe.set_ylim(ymin=true_range[2][0], + ymax=true_range[2][1]) + axe.set_zlim(zmin=true_range[1][0], + zmax=true_range[1][1]) + + @staticmethod + def __scene_to_image(pose, cam_ext, cam_int): + """ + scene_to_image(pose, cam_ext, cam_int) -> 2D_pose + @brief this function project 3D locations with respect to camera into 2D pixels on body poses. + :param pose: torch.Tensor: 3D input pose + shape of pose is like: [num_persons(in each frame), num_frames, num_keypoints * 3] + Ex: [2, 16, 72] + :param cam_ext: torch.Tensor: camera extrinsic parameters + shape of cam_ext is like: [num_frames, 3, 4] which last two dimensions demonstrates (3, 4) matrix based on formal definitions + Ex: [16, 3, 4] + :param cam_int: torch.Tensor: camera intrinsic parameters + shape of cam_int is like: [3, 3] which demonstrate (3, 3) matrix based on formal definitions + :return 2d_pose: torch.Tensor: 2D projected pose + """ + first_shape = pose.shape + poses = pose.reshape(pose.shape[0], pose.shape[1], pose.shape[-1] // 3, 3) + one_padding = torch.ones(poses.shape[0], poses.shape[1], pose.shape[-1] // 3, 1) + + poses = torch.cat((poses, one_padding), 3) + poses = poses.transpose(1, 0) + new_pose = [] + for frame_num, frame_data in enumerate(poses): + for p_data in frame_data: + new_data = [] + for joint_data in p_data: + new_joint_data = torch.matmul(cam_int, torch.matmul(cam_ext[frame_num][:3], joint_data)) + new_data.append((new_joint_data[:2] / new_joint_data[-1]).tolist()) + new_pose.append(new_data) + return torch.tensor(new_pose).reshape(first_shape[0], first_shape[1], 2 * first_shape[-1] // 3) + + @staticmethod + def __generate_JTA_2D_pose(pose): + first_shape = pose.shape + poses = pose.reshape(pose.shape[0], pose.shape[1], pose.shape[-1] // 3, 3) + new_pose = [] + for frame_num, frame_data in enumerate(poses): + for p_data in frame_data: + new_data = [] + for joint_data in p_data: + x_p = joint_data[0] / joint_data[2] + y_p = joint_data[1] / joint_data[2] + x = jta_cam_int[0][0] * x_p + jta_cam_int[0][2] + y = jta_cam_int[1][1] * y_p + jta_cam_int[1][2] + new_data.append([x, y]) + new_pose.append(new_data) + return torch.tensor(new_pose).reshape(first_shape[0], first_shape[1], 2 * first_shape[-1] // 3) + + @staticmethod + def __clean_data(input_data: list): + new_data = [] + max_len = 0 + + for i in range(len(input_data)): + if input_data[i] is None: + continue + if len(input_data[i]) > max_len: + max_len = len(input_data[i]) + for i, pose in enumerate(input_data): + if input_data[i] is None: + continue + elif len(input_data[i]) < max_len: + size = [1 for _ in range(len(pose.shape))] + size[0] = max_len - len(input_data[i]) + last_row = pose[-1:] + expended_data = last_row.repeat(size) + expanded_data = torch.cat((pose, expended_data)) + else: + expanded_data = pose + new_data.append(expanded_data) + return new_data + + def __generate_images_path(self, images_paths): + if self.images_dir is None: + return [] + new_images_path = [] + max_len = len(images_paths[0]) + for i in range(len(images_paths)): + if len(images_paths[i]) > max_len: + max_len = len(images_paths[i]) + for i, image_path in enumerate(images_paths): + group_images_path = [] + for img in image_path: + group_images_path.append(os.path.join(self.images_dir, img)) + if len(image_path) < max_len: + last_path = image_path[-1] + for i in range(max_len - len(image_path)): + group_images_path.append(os.path.join(self.images_dir, last_path)) + new_images_path.append(group_images_path) + return new_images_path