From f14092a1e6eabd672a0ffbb19e899c87c5576ab6 Mon Sep 17 00:00:00 2001
From: JianHe0628 <jl02958@surrey.ac.uk>
Date: Thu, 3 Apr 2025 10:20:28 +0100
Subject: [PATCH] Adding additional documentation

---
 Extract_HaMeR_Single.py                   |  4 +--
 Inject_Json.py => Inject_and_Visualize.py | 24 ++++++-------
 README.md                                 | 43 ++++++++++++++++++-----
 3 files changed, 49 insertions(+), 22 deletions(-)
 rename Inject_Json.py => Inject_and_Visualize.py (95%)

diff --git a/Extract_HaMeR_Single.py b/Extract_HaMeR_Single.py
index 62b4f46..0ab1273 100644
--- a/Extract_HaMeR_Single.py
+++ b/Extract_HaMeR_Single.py
@@ -29,7 +29,7 @@ def main(args, model, renderer, device):
     initial_start_time = time.time()
 
     # Load detector
-    if args.bbox == 'True':
+    if str(args.bbox).lower() == 'True':
         print("Loading Body Detector to retrieve Bounding Box")
         from hamer.utils.utils_detectron2 import DefaultPredictor_Lazy
         if args.body_detector == 'vitdet':
@@ -73,7 +73,7 @@ def main(args, model, renderer, device):
         # img_paths = sorted(img_paths,key = lambda x: int(os.path.basename(x).removeprefix('images').removesuffix('.png')))  
 
         img_cv2 = cv2.imread(str(img_paths[0]))
-        if args.bbox == 'True':
+        if str(args.bbox).lower() == 'True':
             # Detect humans in image
             det_out = detector(img_cv2)
             det_instances = det_out['instances']
diff --git a/Inject_Json.py b/Inject_and_Visualize.py
similarity index 95%
rename from Inject_Json.py
rename to Inject_and_Visualize.py
index d53c5a3..bc46db2 100644
--- a/Inject_Json.py
+++ b/Inject_and_Visualize.py
@@ -98,7 +98,7 @@ def main(args, model, device):
     os.makedirs(args.out_folder, exist_ok=True)
 
     # Load detector
-    if args.bbox:
+    if str(args.bbox).lower() == 'true':
         from hamer.utils.utils_detectron2 import DefaultPredictor_Lazy
         if args.body_detector == 'vitdet':
             from detectron2.config import LazyConfig
@@ -133,19 +133,19 @@ def main(args, model, device):
     
     # Setup Image Paths and lmdb Paths Ranges
     lmdb_files = []
-    if os.path.isdir(args.lmdb_input):
-        lmdb_files = os.listdir(args.lmdb_input)
+    if os.path.isdir(args.extracted_input):
+        lmdb_files = os.listdir(args.extracted_input)
+
     if 'data.mdb' in lmdb_files and 'lock.mdb' in lmdb_files:
-        vid_name = os.path.basename(args.lmdb_input).removesuffix('.lmdb')
-        global_orient_list, hand_pose_list, betas_list, pred_cam_list, details = load_hamer_features(args.lmdb_input, vid_name, generate_vertices=True)
-    elif args.lmdb_input.endswith('.lzma'):
-        lzma_data = decompress_from_lzma(args.lmdb_input)
+        vid_name = os.path.basename(args.extracted_input).removesuffix('.lmdb')
+        global_orient_list, hand_pose_list, betas_list, pred_cam_list, details = load_hamer_features(args.extracted_input, vid_name, generate_vertices=True)
+    elif args.extracted_input.endswith('.lzma'):
+        lzma_data = decompress_from_lzma(args.extracted_input)
         global_orient_list = lzma_data['features'][:,2*15*3*3:].reshape(len(lzma_data['features']), 2, 1, 3, 3)
         hand_pose_list = lzma_data['features'][:,:2*15*3*3].reshape(len(lzma_data['features']), 2, 15, 3, 3)
         pred_cam_list = lzma_data['predcam']
         single_beta = np.expand_dims(lzma_data['beta'], axis=0)
         betas_list = single_beta.repeat(len(pred_cam_list),0)
-        print(np.array(betas_list).shape)
     else:
         raise ValueError("Input is not lmdb or lzma file")
 
@@ -165,7 +165,7 @@ def main(args, model, device):
     pred_cam_list = pred_cam_list[start_frame:end_frame]
 
     img_cv2 = cv2.imread(str(img_paths[0]))
-    if args.bbox == True:
+    if str(args.bbox).lower() == 'true':
         # Detect humans in image
         det_out = detector(img_cv2)
         det_instances = det_out['instances']
@@ -359,16 +359,16 @@ if __name__ == '__main__':
     parser.add_argument('--body_detector', type=str, default='vitdet', choices=['vitdet', 'regnety'], help='Using regnety improves runtime and reduces memory')
     parser.add_argument('--file_type', nargs='+', default=['*.jpg', '*.png'], help='List of file extensions to consider')
     parser.add_argument('--injected_hand', type=int, default=2, help='Number of hands in the Video')
-    parser.add_argument('--bbox', dest='bbox', action='store_true', default=True, help='If set, use detected bbox')
+    parser.add_argument('--bbox', type=str, default='True', help= 'If set, use provided bbox from ViT')
     parser.add_argument('--custom_bbox', type=str, default='', help='Custom bbox in the format x1,y1,x2,y2')
-    parser.add_argument('--lmdb_input', type=str, default='', help='lmdb file for input')
+    parser.add_argument('--extracted_input', type=str, default='', help='lmdb file for input')
     parser.add_argument('--frame_range', type=str, default='0,-1', help='Frame range for input')
     args = parser.parse_args()
     
 
     # args.img_folder = '/vol/vssp/datasets/singlevideo/csldaily/sentences/frames_512x512/S005657_P0006_T00'
     args.vid = '/vol/research/signVision/Projects/BSLboundaries/data_for_segment/Your_caring_role_will_evolve_throughout_your_babys_journey_in_the_neonatal_unit_as_you_learn_more_about_them_and_their_needs.mp4'
-    args.lmdb_input = f'/vol/research/signVision/Projects/BSLboundaries/Rachel_Test/5/Your_caring_role_will_evolve_throughout_your_babys_journey_in_the_neonatal_unit_as_you_learn_more_about_them_and_their_needs.lmdb'
+    args.extracted_input = f'/vol/research/signVision/Projects/BSLboundaries/Rachel_Test/5/Your_caring_role_will_evolve_throughout_your_babys_journey_in_the_neonatal_unit_as_you_learn_more_about_them_and_their_needs.lmdb'
 
     
     # args.lmdb_input = "/media/scratch/MeinDGS_HaMeR_lzma/1182135/1182135_1b1.lzma"
diff --git a/README.md b/README.md
index 0af370b..c0dc12a 100644
--- a/README.md
+++ b/README.md
@@ -23,19 +23,21 @@ python Extract_HaMeR_Single.py --out_folder path/to/save/directory --vid path/to
 ```
 
 #### Extracting Using Images
-The code has been written to support a folder of images as well. To do this, run:
+The code has been written to support a directory of images as well. To do this, run:
 ```
 python Extract_HaMeR_Single.py --out_folder path/to/save/directory --img_folder path/to/image/directory
 ```
 
-#### Important Settings to Consider
+`Note:` If you provide an image folder, you might need to change the sorting line in "Extract_HaMeR_Single.py" as the code sorts the input in ascending order according to how the image files are named.
+
+#### Important Considerations:
 While extracting the poses, you need to take into consideration the bounding box (BBOX) you need. HaMeR extraction works better with a tighter BBOX i.e. where the Signer is cropped to take up more of the frame. 
 
 The current code can handle cropping the frames in several ways. 
 1. If the image frame has already been cropped:
    - If you have already processed each frame to be tightly cropped, set the bbox argument to be `False`, as by default it is set to `True`:
         ```
-        python Extract_HaMeR_Single.py --out_folder path/to/save/directory --img_folder path/to/image/directory --bbox False
+        python Extract_HaMeR_Single.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --bbox False
         ```
 
 2. If the image is not cropped:
@@ -44,7 +46,7 @@ The current code can handle cropping the frames in several ways.
 3. If you need custom BBOX crops:
    - If you want to provide your own set of specific BBOX coordinates, the code also supports this. This can be beneficial for Sign Language Datasets as the signer often remains centrally in the frame, and you may want to provide a hard-coded box.
         ```
-        python Extract_HaMeR_Single.py --out_folder path/to/save/directory --img_folder path/to/image/directory --bbox False --custom_bbox x1,y1,x2,y2
+        python Extract_HaMeR_Single.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --bbox False --custom_bbox x1,y1,x2,y2
         ```
 
 4. Output File Types:
@@ -52,10 +54,35 @@ The current code can handle cropping the frames in several ways.
 
 
 ### Visualizations and Sanity Checks 
-After extracting the HaMeR poses, you may want to generate the mesh from the extracted poses to make sure everything is working as expected. This can be done with the following:
+After extracting the HaMeR poses, you may want to generate the mesh from the extracted poses to make sure everything is working as expected. There are several ways to run this code.
 
- ```
-python Extract_HaMeR_Single.py --out_folder path/to/save/directory --img_folder path/to/image/directory --bbox False --custom_bbox x1,y1,x2,y2
-```
+1. You extracted HaMeR poses from a directory of images:
+
+    ```
+    python Inject_and_Visualize.py --out_folder path/to/save/directory --img_folder path/to/image/directory --extracted_input path/to/your/input.lzma --save_mesh
+    ```
+
+2. You extracted HaMeR poses from a video:
+
+    ```
+    python Inject_and_Visualize.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --extracted_input path/to/your/input.lzma --save_mesh 
+    ```
+
+#### Important Considerations:
+There are also several critical considerations you want to take not of during visualization.
+
+1. You MUST make sure the BBOX settings in the visualization matches the extraction. For instance, if you extracted using custom BBOX settings:
+        
+    ```
+    python Extract_HaMeR_Single.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --bbox False --custom_bbox x1,y1,x2,y2
+    ```
+    Make sure your visualization coordinates matches exactly with the following:
 
+    ```
+    python Inject_and_Visualize.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --extracted_input path/to/your/input.lzma --save_mesh --bbox False --custom_bbox x1,y1,x2,y2
+    ```
+2. You can also specify which specific frames you want to extract with `args.frame_range`. For instance, the video may be 20 minutes long, but you would only like to visualize the first 50 frames. You can do:
 
+    ```
+    python Inject_and_Visualize.py --out_folder path/to/save/directory --vid path/to/video/dataset.mp4 --extracted_input path/to/your/input.lzma --save_mesh --frame_range 0,50
+    ```
\ No newline at end of file
-- 
GitLab