diff --git a/conda-environment_py38_cu11_ubuntu.yml b/conda-environment_py38_cu11_ubuntu.yml new file mode 100644 index 0000000000000000000000000000000000000000..128231264e3c270b0a7a4e314047d98a476dbe07 --- /dev/null +++ b/conda-environment_py38_cu11_ubuntu.yml @@ -0,0 +1,42 @@ +name: work38 +channels: + - huggingface + - pytorch + - fastai + - nvidia + - anaconda + - conda-forge + - defaults +dependencies: + - libiconv=1.17 # otherwise ffmpeg might have a missing .so + - ffmpeg=4.4.1 + - ffmpeg-python=0.2.0 + - matplotlib-base>=3.2.2 + - moviepy=1.0.1 + - multidict=5.1.0 + - murmurhash=1.0.5 + - ninja=1.10.2 + - notebook=6.3.0 + - numba>=0.52.0 + - pandas>=1.1.5 + - pip>=21.2.4 + - python=3.8 + # - pytorch=1.9.1=py3.8_cuda11.1_cudnn8.0.5_0 + - pytorch=1.10.2=py3.8_cuda11.3_cudnn8.2.0_0 + - torchvision=0.11.3 + - torchaudio=0.10.2 + - pywavelets>=1.1.1 + - pyyaml>=5.4.1 + - resampy=0.2.2 + - scikit-image>=0.17.2 + - scikit-learn>=0.24.2 + - scikit-video>=1.1.11 + - seaborn=0.11.0 + - tqdm>=4.62.3 + - wheel>=0.37.0 + - yaml>=0.2.5 + - yarl>=1.6.3 + - requests=2.27.1 + - transformers=4.11.3 + - kornia==0.6.5 + - pyrender==0.1.45 diff --git a/gdl_apps/EMOCA/data/process_affectnet.py b/gdl_apps/EMOCA/data/process_affectnet.py index 938065f97fa3c193d58b7aded74642594bebb466..d58795e1392a2d0a61b9cb68ee8dd4b0de8211f8 100644 --- a/gdl_apps/EMOCA/data/process_affectnet.py +++ b/gdl_apps/EMOCA/data/process_affectnet.py @@ -16,8 +16,7 @@ All rights reserved. # For comments or questions, please email us at emoca@tue.mpg.de # For commercial licensing contact, please contact ps-license@tuebingen.mpg.de """ - - +import sys from gdl.datasets.AffectNetDataModule import AffectNetDataModule @@ -55,7 +54,7 @@ def main(): if sid is not None: if sid >= dm.num_subsets: - print(f"Subset index {sid} is larger than number of subsets. Terminating".) + print(f"Subset index {sid} is larger than number of subsets. Terminating") sys.exit() dm._detect_landmarks_and_segment_subset(dm.subset_size * sid, min((sid + 1) * dm.subset_size, len(dm.df))) else: diff --git a/gdl_apps/EMOCA/data/process_affectnet_mediapipe.py b/gdl_apps/EMOCA/data/process_affectnet_mediapipe.py new file mode 100644 index 0000000000000000000000000000000000000000..87e5ad3a77f37cb37d049932b4e75af086a9a527 --- /dev/null +++ b/gdl_apps/EMOCA/data/process_affectnet_mediapipe.py @@ -0,0 +1,86 @@ +import sys, os +import math +sys.path = [os.path.abspath("../../..")] + sys.path + +from pathlib import Path + +if len(sys.argv) > 1: + sid = int(sys.argv[1]) +else: + sid = 0 + + +from gdl.datasets.AffectNetDataModule import AffectNetDataModule, AffectNetEmoNetSplitModule + + + +""" +Author: Radek Danecek +Copyright (c) 2023, Radek Danecek +All rights reserved. + +# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is +# holder of all proprietary rights on this computer program. +# Using this computer program means that you agree to the terms +# in the LICENSE file included with this software distribution. +# Any use not explicitly granted by the LICENSE is prohibited. +# +# Copyright©2022 Max-Planck-Gesellschaft zur Förderung +# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute +# for Intelligent Systems. All rights reserved. +# +# For comments or questions, please email us at emoca@tue.mpg.de +# For commercial licensing contact, please contact ps-license@tuebingen.mpg.de +""" +import sys +from gdl.datasets.AffectNetDataModule import AffectNetDataModule + + +def main(): + if len(sys.argv) < 2: + print("Usage: python process_affectnet.py <input_folder> <output_folder> <optional_processed_subfolder> <optional_subset_index>") + print("input_folder ... folder where you downloaded and extracted AffectNet") + print("output_folder ... folder where you want to process AffectNet") + print("optional_processed_subfolder ... if AffectNet is partly processed, it created a subfolder, which you can specify here to finish processing") + print("optional_subset_index ... index of subset of AffectNet if you want to process many parts in parallel (recommended)") + + downloaded_affectnet_folder = sys.argv[1] + processed_output_folder = sys.argv[2] + + if len(sys.argv) >= 3: + processed_subfolder = sys.argv[3] + else: + processed_subfolder = None + + + if len(sys.argv) >= 4: + sid = int(sys.argv[4]) + else: + sid = None + + + dm = AffectNetDataModule( + downloaded_affectnet_folder, + processed_output_folder, + processed_subfolder=processed_subfolder, + mode="manual", + scale=1.25, + ignore_invalid=True, + ) + + if sid is not None: + if sid >= dm.num_subsets: + print(f"Subset index {sid} is larger than number of subsets. Terminating") + sys.exit() + print("Detecting mediapipe landmarks in subset %d" % sid) + dm._detect_landmarks_mediapipe(dm.subset_size * sid, min((sid + 1) * dm.subset_size, len(dm.df))) + print("Finished decting faces") + else: + dm.prepare_data() + + + + +if __name__ == "__main__": + main() + diff --git a/gdl_apps/EMOCA/emoca_conf/model/additional/lipread_loss.yaml b/gdl_apps/EMOCA/emoca_conf/model/additional/lipread_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afc620265272bface9a636dc3fa4edb8f91d439c --- /dev/null +++ b/gdl_apps/EMOCA/emoca_conf/model/additional/lipread_loss.yaml @@ -0,0 +1,21 @@ +# @package model + +lipread_loss: + # path: /ps/scratch/rdanecek/emoca/emodeca/2021_09_02_20-39-05_EmoCnn_vgg19_bn_none_AU_Aug_early # bgg19 with BN, binary cross entropy loss +# path: /ps/scratch/rdanecek/emoca/emodeca/2021_09_02_20-39-05_EmoCnn_vgg19_bn_none_AU_Aug_early + # feat_loss: l1_loss + # lipread_loss: l1_loss + lipread_loss: cosine_similarity + # lipread_loss: mse_loss + + # normalize_features: false + # trainable: false + # dual: false + weight: 0.0015 + use_as_loss: true # false to use as a metric only + + # use_feat_1: false + # use_feat_2: true + # use_aus: true + + diff --git a/gdl_apps/EMOCA/training/train_expdeca.py b/gdl_apps/EMOCA/training/train_expdeca.py index c88fcfab5d6941bf33b4ab3ac0ca7ec9dd68a937..a2c2a8d855cf55dce26973af1243569caa15d293 100644 --- a/gdl_apps/EMOCA/training/train_expdeca.py +++ b/gdl_apps/EMOCA/training/train_expdeca.py @@ -129,6 +129,8 @@ def create_single_dm(cfg, data_class): processed_ext=".png" if "processed_ext" not in cfg.data.keys() else cfg.data.processed_ext, dataset_type=cfg.data.dataset_type if "dataset_type" in cfg.data.keys() else None, # use_gt=cfg.data.use_gt if "use_gt" in cfg.data.keys() else True, + k_fold_crossvalidation=cfg.data.k_fold_crossvalidation if "k_fold_crossvalidation" in cfg.data.keys() else None, + k_index=cfg.data.k_index if "k_index" in cfg.data.keys() else None, ) sequence_name = "AFEW-VA" else: @@ -179,6 +181,10 @@ def create_experiment_name(cfg_coarse, cfg_detail, version=2): if cfg_coarse.model.exp_deca_jaw_pose: experiment_name += '_Jaw' + zero_out = cfg_coarse.model.get('zero_out_last_enc_layer', False) + if zero_out: + experiment_name += '_Z' + if cfg_coarse.learning.train_K == 1: experiment_name += '_NoRing' @@ -213,6 +219,15 @@ def create_experiment_name(cfg_coarse, cfg_detail, version=2): experiment_name += 'f-' + cfg_coarse.model.au_loss.feat_loss[:3] if cfg_coarse.model.au_loss.au_loss != 'l1_loss': experiment_name += '_c-' + cfg_coarse.model.au_loss.au_loss[:3] + + if 'lipread_loss' in cfg_coarse.model.keys(): + experiment_name += '_LR' + + if cfg_coarse.model.get('use_mediapipe_landmarks', False) or \ + cfg_coarse.model.get('use_mouth_corner_distance_mediapipe', False)\ + or cfg_coarse.model.get('use_lip_distance_mediapipe', False)\ + or cfg_coarse.model.get('use_eye_distance_mediapipe', False): + experiment_name += '_MP' # if expression exchange and geometric errors are to be computed even for the exchanged if 'use_geometric_losses_expression_exchange' in cfg_coarse.model.keys() and \ @@ -356,8 +371,8 @@ def train_expdeca(cfg_coarse, cfg_detail, start_i=-1, resume_from_previous = Tru full_run_dir.mkdir(parents=True, exist_ok=exist_ok) print(f"The run will be saved to: '{str(full_run_dir)}'") - with open("out_folder.txt", "w") as f: - f.write(str(full_run_dir)) + # with open("out_folder.txt", "w") as f: + # f.write(str(full_run_dir)) coarse_checkpoint_dir = full_run_dir / "coarse" / "checkpoints" coarse_checkpoint_dir.mkdir(parents=True, exist_ok=exist_ok) @@ -481,23 +496,44 @@ def main(): if len(sys.argv) <= 2: coarse_conf = "deca_train_coarse" detail_conf = "deca_train_detail" + + emonet = "/is/cluster/work/rdanecek/emoca/emodeca/2021_11_09_05-15-38_-8198495972451127810_EmoCnn_resnet50_shake_samp-balanced_expr_Aug_early" + photometric_normalization = 'mean' + use_mouth_corner_distance = True + use_eye_distance = True + use_lip_distance = True + emo_feature_loss_type = 'mse' + coarse_override = [ # 'model/settings=coarse_train', # 'model/settings=coarse_train_emonet', # 'model/settings=coarse_train_expdeca', - 'model/settings=coarse_train_expdeca_emonet', + # 'model/settings=coarse_train_expdeca_emonet', + # 'model/settings=coarse_train_emica', + 'model/settings=coarse_train_emica_emonet', # 'model/settings=coarse_train_expdeca_emomlp', # 'model.expression_constrain_type=exchange', # 'model.expression_constrain_use_jaw_pose=True', 'model.expression_constrain_use_global_pose=False', # 'model.use_geometric_losses_expression_exchange=True', + 'model.use_emonet_feat_1=False', + 'model.use_emonet_feat_2=True', + 'model.use_emonet_valence=False', + 'model.use_emonet_arousal=False', + 'model.use_emonet_expression=False', + 'model.use_emonet_combined=False', + f'+model.photometric_normalization={photometric_normalization}', + f'+model.use_mouth_corner_distance={use_mouth_corner_distance}', + f'+model.use_eye_distance={use_eye_distance}', + f'+model.use_lip_distance={use_lip_distance}', + f'+model.emo_feat_loss={emo_feature_loss_type}', # emonet feature loss # '+model.mlp_emotion_predictor.detach_shape=True', # '+model.mlp_emotion_predictor.detach_expression=True', # '+model.mlp_emotion_predictor.detach_detailcode=True', # '+model.mlp_emotion_predictor.detach_jaw=True', # '+model.mlp_emotion_predictor.detach_global_pose=True', - + f'+model.emonet_model_path={emonet}', 'data/datasets=affectnet_desktop', # affectnet vs deca dataset # f'data.ring_type=gt_va', # 'data.ring_size=4', @@ -505,13 +541,14 @@ def main(): f'data.num_workers={num_workers}', 'model.resume_training=True', # load the original EMOCA model 'learning.early_stopping.patience=5', - # 'learning/logging=none', + 'learning/logging=none', 'learning.batch_size_train=4', ] detail_override = [ # 'model/settings=detail_train', # 'model/settings=detail_train_emonet', - 'model/settings=detail_train_expdeca_emonet', + # 'model/settings=detail_train_expdeca_emonet', + 'model/settings=detail_train_emica_emonet', # 'model/settings=detail_train_expdeca_emomlp', # 'model.expression_constrain_type=exchange', # 'model.expression_constrain_use_jaw_pose=True', @@ -522,14 +559,27 @@ def main(): # '+model.mlp_emotion_predictor.detach_detailcode=True', # '+model.mlp_emotion_predictor.detach_jaw=True', # '+model.mlp_emotion_predictor.detach_global_pose=True', + f'+model.emonet_model_path={emonet}', 'data/datasets=affectnet_desktop', # affectnet vs deca dataset + # f'data.ring_type=gt_va', # 'learning/batching=single_gpu_expdeca_detail_ring', # 'data.ring_size=4', 'learning.early_stopping.patience=5', - # 'learning/logging=none', + 'learning/logging=none', f'data.num_workers={num_workers}', 'learning.batch_size_train=4', + + 'model.use_emonet_feat_1=False', + 'model.use_emonet_feat_2=True', + 'model.use_emonet_valence=False', + 'model.use_emonet_arousal=False', + 'model.use_emonet_expression=False', + 'model.use_emonet_combined=False', + f'+model.photometric_normalization={photometric_normalization}', + f'+model.use_mouth_corner_distance={use_mouth_corner_distance}', + f'+model.use_eye_distance={use_eye_distance}', + f'+model.use_lip_distance={use_lip_distance}', ] # coarse_conf = detail_conf