Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
H
hamer
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Low, Low Jian He (PG/R - Comp Sci & Elec Eng)
hamer
Commits
5200fd70
Commit
5200fd70
authored
8 months ago
by
Low, Low Jian He (PG/R - Comp Sci & Elec Eng)
Browse files
Options
Downloads
Patches
Plain Diff
Adding version of json extractor which is uncompressed and more suitable for long videos
parent
4603aa55
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Extract_Compressed_HaMeR.py
+36
-15
36 additions, 15 deletions
Extract_Compressed_HaMeR.py
Extract_Uncompressed_HaMeR.py
+334
-0
334 additions, 0 deletions
Extract_Uncompressed_HaMeR.py
with
370 additions
and
15 deletions
Extract_
HaMeR_Jsons
.py
→
Extract_
Compressed_HaMeR
.py
+
36
−
15
View file @
5200fd70
...
...
@@ -23,6 +23,26 @@ from vitpose_model import ViTPoseModel
import
json
from
typing
import
Dict
,
Optional
import
subprocess
def
print_gpu_usage
():
try
:
# Run the `nvidia-smi` command
result
=
subprocess
.
run
([
'
nvidia-smi
'
,
'
--query-gpu=utilization.gpu,memory.used,memory.total
'
,
'
--format=csv,nounits,noheader
'
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
)
if
result
.
returncode
!=
0
:
print
(
"
Error fetching GPU usage:
"
,
result
.
stderr
)
return
# Process the output
usage_lines
=
result
.
stdout
.
strip
().
split
(
'
\n
'
)
for
i
,
line
in
enumerate
(
usage_lines
):
gpu_util
,
mem_used
,
mem_total
=
map
(
int
,
line
.
split
(
'
,
'
))
print
(
f
"
GPU
{
i
}
: Utilization:
{
gpu_util
}
%, Memory:
{
mem_used
}
/
{
mem_total
}
MiB
"
)
except
FileNotFoundError
:
print
(
"
nvidia-smi command not found. Ensure NVIDIA drivers are installed.
"
)
# def Produce_Mesh_from_Dict(out, batch, model, args):
# renderer = Renderer(model_cfg, faces=model.mano.faces)
# multiplier = (2*batch['right']-1)
...
...
@@ -124,7 +144,7 @@ def convert_tensors_to_lists(d):
convert_tensors_to_lists
(
value
)
return
d
def
main
(
args
,
model
,
renderer
,
device
,
cpm
):
def
main
(
args
,
model
,
renderer
,
device
):
initial_start_time
=
time
.
time
()
# Load detector
...
...
@@ -161,7 +181,7 @@ def main(args, model, renderer, device, cpm):
outfile
=
os
.
path
.
join
(
temp_dir
.
name
,
filename
)
os
.
makedirs
(
outfile
,
exist_ok
=
True
)
os
.
system
(
f
"
ffmpeg -i
{
args
.
vid
}
-vf fps=
{
fps
}
{
temp_dir
.
name
}
/Frame%d.png
"
)
os
.
system
(
f
"
ffmpeg
-nostdin
-i
{
args
.
vid
}
-vf fps=
{
fps
}
{
temp_dir
.
name
}
/Frame%d.png
"
)
image_folder
=
Path
(
temp_dir
.
name
)
print
(
f
"
Total Time for Video to Image:
{
(
time
.
time
()
-
start_time
)
}
Seconds
"
)
else
:
...
...
@@ -169,13 +189,14 @@ def main(args, model, renderer, device, cpm):
image_folder
=
Path
(
args
.
img_folder
)
# Get all demo images ends with .jpg or .png
img_paths
=
[
img
for
end
in
args
.
file_type
for
img
in
image_folder
.
glob
(
end
)]
img_paths
=
sorted
(
img_paths
,
key
=
lambda
x
:
int
(
os
.
path
.
basename
(
x
).
removesuffix
(
'
.png
'
).
removeprefix
(
'
Frame
'
)))
if
args
.
bbox
:
# Detect humans in image
img_cv2
=
cv2
.
imread
(
str
(
img_paths
[
0
]))
det_out
=
detector
(
img_cv2
)
det_instances
=
det_out
[
'
instances
'
]
print_gpu_usage
()
#Clearing memory
del
detector
torch
.
cuda
.
empty_cache
()
...
...
@@ -190,6 +211,10 @@ def main(args, model, renderer, device, cpm):
else
:
print
(
"
No humans detected in the image
"
)
return
# keypoint detector
cpm
=
ViTPoseModel
(
device
)
print_gpu_usage
()
else
:
# Hardcoded bbox. This assumes person is in the center and that there is always one person in the image
# Values below strictly for Rachel's BM Videos
...
...
@@ -301,7 +326,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
'
--MANO_Output
'
,
type
=
bool
,
default
=
False
,
help
=
'
If set, generate output images
'
)
args
=
parser
.
parse_args
()
model
,
model_cfg
=
load_hamer
(
args
.
checkpoint
,
load_mesh
=
True
)
# Load Mesh False only if no vertices are needed
# Setup HaMeR model
...
...
@@ -309,25 +334,21 @@ if __name__ == '__main__':
model
=
model
.
to
(
device
)
model
.
eval
()
renderer
=
Renderer
(
model_cfg
,
faces
=
model
.
mano
.
faces
)
print_gpu_usage
()
# keypoint detector
cpm
=
ViTPoseModel
(
device
)
# args.in_folder = '/vol/research/SignMotion/2024.02.20_Capture_1/glosses'
Folder_List
=
os
.
listdir
(
args
.
in_folder
)
Folder_Dirname
=
os
.
path
.
dirname
(
args
.
in_folder
)
Rachel_Path
=
'
//vol//research//SignFeaturePool//Rachel_Gloss_Features//HaMeR_Features
'
Folder_List
=
[
os
.
path
.
join
(
args
.
in_folder
,
f
)
for
f
in
os
.
listdir
(
args
.
in_folder
)
if
f
.
endswith
(
'
.mp4
'
)
or
f
.
endswith
(
'
.png
'
)
or
f
.
endswith
(
'
.jpg
'
)]
args
.
out_folder
=
os
.
path
.
join
(
Rachel_Path
,
os
.
path
.
basename
(
args
.
in_folder
.
removesuffix
(
'
/glosses
'
)
))
args
.
out_folder
=
os
.
path
.
join
(
args
.
out_folder
,
os
.
path
.
basename
(
args
.
in_folder
))
if
Folder_List
[
0
].
endswith
(
'
.mp4
'
):
for
vid_file
in
Folder_List
:
args
.
vid
=
f
'
{
Folder_Dirname
}
//
{
vid_file
}
'
args
.
vid
=
vid_file
print
(
args
.
vid
)
try
:
main
(
args
,
model
,
renderer
,
device
,
cpm
)
main
(
args
,
model
,
renderer
,
device
)
except
Exception
as
e
:
print
(
f
"
Error in processing
{
vid_file
}
:
{
e
}
"
)
elif
Folder_List
[
0
].
endswith
(
'
.png
'
)
or
Folder_List
[
0
].
endswith
(
'
.jpg
'
):
args
.
img_folder
=
args
.
in_folder
main
(
args
,
model
,
renderer
,
device
,
cpm
)
main
(
args
,
model
,
renderer
,
device
)
This diff is collapsed.
Click to expand it.
Extract_Uncompressed_HaMeR.py
0 → 100644
+
334
−
0
View file @
5200fd70
from
pathlib
import
Path
import
torch
import
argparse
import
os
import
cv2
import
numpy
as
np
import
tempfile
import
time
import
matplotlib.pyplot
as
plt
import
matplotlib.patches
as
patches
import
gc
from
hamer.configs
import
CACHE_DIR_HAMER
from
hamer.models
import
HAMER
,
download_models
,
load_hamer
,
DEFAULT_CHECKPOINT
from
hamer.utils
import
recursive_to
from
hamer.datasets.vitdet_dataset
import
ViTDetDataset
,
DEFAULT_MEAN
,
DEFAULT_STD
from
hamer.utils.renderer
import
Renderer
,
cam_crop_to_full
LIGHT_BLUE
=
(
0.65098039
,
0.74117647
,
0.85882353
)
from
vitpose_model
import
ViTPoseModel
import
json
from
typing
import
Dict
,
Optional
import
subprocess
def
print_gpu_usage
():
try
:
# Run the `nvidia-smi` command
result
=
subprocess
.
run
([
'
nvidia-smi
'
,
'
--query-gpu=utilization.gpu,memory.used,memory.total
'
,
'
--format=csv,nounits,noheader
'
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
)
if
result
.
returncode
!=
0
:
print
(
"
Error fetching GPU usage:
"
,
result
.
stderr
)
return
# Process the output
usage_lines
=
result
.
stdout
.
strip
().
split
(
'
\n
'
)
for
i
,
line
in
enumerate
(
usage_lines
):
gpu_util
,
mem_used
,
mem_total
=
map
(
int
,
line
.
split
(
'
,
'
))
print
(
f
"
GPU
{
i
}
: Utilization:
{
gpu_util
}
%, Memory:
{
mem_used
}
/
{
mem_total
}
MiB
"
)
except
FileNotFoundError
:
print
(
"
nvidia-smi command not found. Ensure NVIDIA drivers are installed.
"
)
# def Produce_Mesh_from_Dict(out, batch, model, args):
# renderer = Renderer(model_cfg, faces=model.mano.faces)
# multiplier = (2*batch['right']-1)
# pred_cam = out['pred_cam']
# pred_cam[:,1] = multiplier*pred_cam[:,1]
# box_center = batch["box_center"].float()
# box_size = batch["box_size"].float()
# img_size = batch["img_size"].float()
# multiplier = (2*batch['right']-1)
# scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
# pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu().numpy()
# # Render the result
# batch_size = batch['img'].shape[0]
# for n in range(batch_size):
# start_time = time.time()
# # Get filename from path img_path
# img_fn, _ = os.path.splitext(os.path.basename(img_path))
# person_id = int(batch['personid'][n])
# white_img = (torch.ones_like(batch['img'][n]).cpu() - DEFAULT_MEAN[:,None,None]/255) / (DEFAULT_STD[:,None,None]/255)
# input_patch = batch['img'][n].cpu() * (DEFAULT_STD[:,None,None]/255) + (DEFAULT_MEAN[:,None,None]/255)
# input_patch = input_patch.permute(1,2,0).numpy()
# regression_img = renderer(out['pred_vertices'][n].detach().cpu().numpy(),
# out['pred_cam_t'][n].detach().cpu().numpy(),
# batch['img'][n],
# mesh_base_color=LIGHT_BLUE,
# scene_bg_color=(1, 1, 1),
# )
# if args.side_view:
# side_img = renderer(out['pred_vertices'][n].detach().cpu().numpy(),
# out['pred_cam_t'][n].detach().cpu().numpy(),
# white_img,
# mesh_base_color=LIGHT_BLUE,
# scene_bg_color=(1, 1, 1),
# side_view=True)
# final_img = np.concatenate([input_patch, regression_img, side_img], axis=1)
# else:
# final_img = np.concatenate([input_patch, regression_img], axis=1)
# cv2.imwrite(os.path.join(args.out_folder, f'{img_fn}_{person_id}.png'), 255*final_img[:, :, ::-1])
# # Add all verts and cams to list
# verts = out['pred_vertices'][n].detach().cpu().numpy()
# is_right = batch['right'][n].cpu().numpy()
# verts[:,0] = (2*is_right-1)*verts[:,0]
# cam_t = pred_cam_t_full[n]
# all_verts.append(verts)
# all_cam_t.append(cam_t)
# all_right.append(is_right)
# # Save all meshes to disk
# if args.save_mesh:
# camera_translation = cam_t.copy()
# tmesh = renderer.vertices_to_trimesh(verts, camera_translation, LIGHT_BLUE, is_right=is_right)
# tmesh.export(os.path.join(args.out_folder, f'{img_fn}_{person_id}.obj'))
# print(f"Total Time for rendering Meshes and Saving: {(time.time()-start_time)} Seconds")
# # Render front view
# if args.full_frame and len(all_verts) > 0:
# misc_args = dict(
# mesh_base_color=LIGHT_BLUE,
# scene_bg_color=(1, 1, 1),
# focal_length=scaled_focal_length,
# )
# cam_view = renderer.render_rgba_multiple(all_verts, cam_t=all_cam_t, render_res=img_size[n], is_right=all_right, **misc_args)
# # Overlay image
# input_img = img_cv2.astype(np.float32)[:,:,::-1]/255.0
# input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
# input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
# cv2.imwrite(os.path.join(args.out_folder, f'{img_fn}_all.jpg'), 255*input_img_overlay[:, :, ::-1])
def
show_bbox
(
img
,
bbox
):
bbox_tensor
=
torch
.
tensor
(
bbox
)
bbox
=
bbox_tensor
.
cpu
().
numpy
()[
0
]
# Extract coordinates
x1
,
y1
,
x2
,
y2
=
bbox
# Load an image (replace 'image.jpg' with your image file)
image
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
)
# Convert BGR to RGB for Matplotlib
# Create a figure and axis
fig
,
ax
=
plt
.
subplots
(
1
)
# Display the image
ax
.
imshow
(
image
)
# Create a Rectangle patch
rect
=
patches
.
Rectangle
((
x1
,
y1
),
x2
-
x1
,
y2
-
y1
,
linewidth
=
2
,
edgecolor
=
'
g
'
,
facecolor
=
'
none
'
)
# Add the patch to the Axes
ax
.
add_patch
(
rect
)
plt
.
show
()
def
convert_tensors_to_lists
(
d
):
for
key
,
value
in
d
.
items
():
if
isinstance
(
value
,
torch
.
Tensor
):
d
[
key
]
=
value
.
tolist
()
# Convert tensor to list
elif
isinstance
(
value
,
dict
):
# If there is a nested dictionary
convert_tensors_to_lists
(
value
)
return
d
def
main
(
args
,
model
,
renderer
,
device
):
initial_start_time
=
time
.
time
()
# Load detector
if
args
.
bbox
:
from
hamer.utils.utils_detectron2
import
DefaultPredictor_Lazy
if
args
.
body_detector
==
'
vitdet
'
:
from
detectron2.config
import
LazyConfig
import
hamer
cfg_path
=
Path
(
hamer
.
__file__
).
parent
/
'
configs
'
/
'
cascade_mask_rcnn_vitdet_h_75ep.py
'
detectron2_cfg
=
LazyConfig
.
load
(
str
(
cfg_path
))
detectron2_cfg
.
train
.
init_checkpoint
=
"
https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h/f328730692/model_final_f05665.pkl
"
for
i
in
range
(
3
):
detectron2_cfg
.
model
.
roi_heads
.
box_predictors
[
i
].
test_score_thresh
=
0.25
detector
=
DefaultPredictor_Lazy
(
detectron2_cfg
)
elif
args
.
body_detector
==
'
regnety
'
:
from
detectron2
import
model_zoo
from
detectron2.config
import
get_cfg
detectron2_cfg
=
model_zoo
.
get_config
(
'
new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
'
,
trained
=
True
)
detectron2_cfg
.
model
.
roi_heads
.
box_predictor
.
test_score_thresh
=
0.5
detectron2_cfg
.
model
.
roi_heads
.
box_predictor
.
test_nms_thresh
=
0.4
detector
=
DefaultPredictor_Lazy
(
detectron2_cfg
)
os
.
makedirs
(
args
.
out_folder
,
exist_ok
=
True
)
print
(
f
"
Total Time for Initialization:
{
(
time
.
time
()
-
initial_start_time
)
}
Seconds
"
)
start_time
=
time
.
time
()
if
args
.
vid
!=
''
:
fps
=
args
.
fps
temp_dir
=
tempfile
.
TemporaryDirectory
()
print
(
f
"
Temp directory created at
{
temp_dir
.
name
}
"
)
#Create Temp Out Folder
filename
=
os
.
path
.
basename
(
args
.
vid
).
removesuffix
(
'
.mp4
'
)
outfile
=
os
.
path
.
join
(
args
.
out_folder
,
filename
)
os
.
makedirs
(
outfile
,
exist_ok
=
True
)
os
.
system
(
f
"
ffmpeg -nostdin -i
{
args
.
vid
}
-vf fps=
{
fps
}
{
temp_dir
.
name
}
/Frame%d.png
"
)
image_folder
=
Path
(
temp_dir
.
name
)
print
(
f
"
Total Time for Video to Image:
{
(
time
.
time
()
-
start_time
)
}
Seconds
"
)
else
:
temp_dir
=
None
image_folder
=
Path
(
args
.
img_folder
)
# Get all demo images ends with .jpg or .png
img_paths
=
[
img
for
end
in
args
.
file_type
for
img
in
image_folder
.
glob
(
end
)]
img_paths
=
sorted
(
img_paths
,
key
=
lambda
x
:
int
(
os
.
path
.
basename
(
x
).
removesuffix
(
'
.png
'
).
removeprefix
(
'
Frame
'
)))
if
args
.
bbox
:
# Detect humans in image
img_cv2
=
cv2
.
imread
(
str
(
img_paths
[
0
]))
det_out
=
detector
(
img_cv2
)
det_instances
=
det_out
[
'
instances
'
]
print_gpu_usage
()
#Clearing memory
del
detector
torch
.
cuda
.
empty_cache
()
gc
.
collect
()
valid_idx
=
(
det_instances
.
pred_classes
==
0
)
&
(
det_instances
.
scores
>
0.5
)
if
valid_idx
[
0
]:
pred_bboxes
=
det_instances
.
pred_boxes
.
tensor
[
valid_idx
].
cpu
().
numpy
()
pred_scores
=
det_instances
.
scores
[
valid_idx
].
cpu
().
numpy
()
x1
,
y1
,
x2
,
y2
=
pred_bboxes
[
0
]
pred_bboxes
[
0
]
=
[
x1
-
x1
*
0.275
,
y1
-
y1
*
0.1
,
x2
+
x1
*
0.275
,
y2
]
else
:
print
(
"
No humans detected in the image
"
)
return
# keypoint detector
cpm
=
ViTPoseModel
(
device
)
print_gpu_usage
()
else
:
# Hardcoded bbox. This assumes person is in the center and that there is always one person in the image
# Values below strictly for Rachel's BM Videos
pred_bboxes
=
np
.
array
([[
300
,
100
,
1620
,
1075
]])
# Force confidence to be 0.99 that human is present
pred_scores
=
np
.
array
([
0.99
])
img_cv2
=
cv2
.
imread
(
str
(
img_paths
[
0
]))
# Iterate over all images in folder
for
img_path
in
img_paths
:
start_time
=
time
.
time
()
img_cv2
=
cv2
.
imread
(
str
(
img_path
))
img
=
img_cv2
.
copy
()[:,
:,
::
-
1
]
# # Detect humans in image
# det_out = detector(img_cv2)
# det_instances = det_out['instances']
# valid_idx = (det_instances.pred_classes==0) & (det_instances.scores > 0.5)
# pred_bboxes=det_instances.pred_boxes.tensor[valid_idx].cpu().numpy()
# pred_scores=det_instances.scores[valid_idx].cpu().numpy()
start_time
=
time
.
time
()
# Detect human keypoints for each person
vitposes_out
=
cpm
.
predict_pose
(
img
,
[
np
.
concatenate
([
pred_bboxes
,
pred_scores
[:,
None
]],
axis
=
1
)],
)
bboxes
=
[]
is_right
=
[]
start_time
=
time
.
time
()
# Use hands based on hand keypoint detections
for
vitposes
in
vitposes_out
:
left_hand_keyp
=
vitposes
[
'
keypoints
'
][
-
42
:
-
21
]
right_hand_keyp
=
vitposes
[
'
keypoints
'
][
-
21
:]
# Rejecting not confident detections
keyp
=
left_hand_keyp
valid
=
keyp
[:,
2
]
>
0.5
if
sum
(
valid
)
>
3
:
bbox
=
[
keyp
[
valid
,
0
].
min
(),
keyp
[
valid
,
1
].
min
(),
keyp
[
valid
,
0
].
max
(),
keyp
[
valid
,
1
].
max
()]
bboxes
.
append
(
bbox
)
is_right
.
append
(
0
)
keyp
=
right_hand_keyp
valid
=
keyp
[:,
2
]
>
0.5
if
sum
(
valid
)
>
3
:
bbox
=
[
keyp
[
valid
,
0
].
min
(),
keyp
[
valid
,
1
].
min
(),
keyp
[
valid
,
0
].
max
(),
keyp
[
valid
,
1
].
max
()]
bboxes
.
append
(
bbox
)
is_right
.
append
(
1
)
if
len
(
bboxes
)
==
0
:
continue
boxes
=
np
.
stack
(
bboxes
)
right
=
np
.
stack
(
is_right
)
start_time
=
time
.
time
()
# Run reconstruction on all detected hands
dataset
=
ViTDetDataset
(
model_cfg
,
img_cv2
,
boxes
,
right
,
rescale_factor
=
args
.
rescale_factor
)
dataloader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
batch_size
=
8
,
shuffle
=
False
,
num_workers
=
0
)
all_verts
=
[]
all_cam_t
=
[]
all_right
=
[]
for
count
,
batch
in
enumerate
(
dataloader
):
start_time
=
time
.
time
()
batch
=
recursive_to
(
batch
,
device
)
with
torch
.
no_grad
():
out
=
model
(
batch
)
output
=
convert_tensors_to_lists
(
out
.
copy
())
output
[
'
VitPose
'
]
=
vitposes_out
[
0
][
'
keypoints
'
].
tolist
()
json_path
=
os
.
path
.
join
(
args
.
out_folder
,
f
"
{
os
.
path
.
basename
(
img_path
).
removesuffix
(
'
.png
'
)
}
_
{
count
}
.json
"
)
if
args
.
vid
==
''
else
os
.
path
.
join
(
outfile
,
f
"
{
os
.
path
.
basename
(
img_path
).
removesuffix
(
'
.png
'
)
}
_
{
count
}
.json
"
)
with
open
(
json_path
,
'
w
'
)
as
f
:
json
.
dump
(
output
,
f
,
indent
=
4
)
if
temp_dir
:
temp_dir
.
cleanup
()
print
(
f
"
Temp directory
{
temp_dir
.
name
}
cleaned up
"
)
print
(
f
"
Total time taken:
{
(
time
.
time
()
-
initial_start_time
)
/
60
:
.
2
f
}
minutes
"
)
if
__name__
==
'
__main__
'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'
HaMeR demo code
'
)
parser
.
add_argument
(
'
--checkpoint
'
,
type
=
str
,
default
=
DEFAULT_CHECKPOINT
,
help
=
'
Path to pretrained model checkpoint
'
)
parser
.
add_argument
(
'
--vid
'
,
type
=
str
,
default
=
''
,
help
=
'
Path to video file
'
)
parser
.
add_argument
(
'
--img_folder
'
,
type
=
str
,
default
=
'
images
'
,
help
=
'
Folder with input images
'
)
parser
.
add_argument
(
'
--out_folder
'
,
type
=
str
,
default
=
'
out_demo
'
,
help
=
'
Output folder to save rendered results
'
)
parser
.
add_argument
(
'
--fps
'
,
type
=
int
,
default
=
50
,
help
=
'
FPS for video output
'
)
parser
.
add_argument
(
'
--side_view
'
,
dest
=
'
side_view
'
,
action
=
'
store_true
'
,
default
=
False
,
help
=
'
If set, render side view also
'
)
parser
.
add_argument
(
'
--full_frame
'
,
dest
=
'
full_frame
'
,
action
=
'
store_true
'
,
default
=
True
,
help
=
'
If set, render all people together also
'
)
parser
.
add_argument
(
'
--save_mesh
'
,
dest
=
'
save_mesh
'
,
action
=
'
store_true
'
,
default
=
False
,
help
=
'
If set, save meshes to disk also
'
)
parser
.
add_argument
(
'
--batch_size
'
,
type
=
int
,
default
=
1
,
help
=
'
Batch size for inference/fitting
'
)
parser
.
add_argument
(
'
--rescale_factor
'
,
type
=
float
,
default
=
2.0
,
help
=
'
Factor for padding the bbox
'
)
parser
.
add_argument
(
'
--body_detector
'
,
type
=
str
,
default
=
'
vitdet
'
,
choices
=
[
'
vitdet
'
,
'
regnety
'
],
help
=
'
Using regnety improves runtime and reduces memory
'
)
parser
.
add_argument
(
'
--file_type
'
,
nargs
=
'
+
'
,
default
=
[
'
*.jpg
'
,
'
*.png
'
],
help
=
'
List of file extensions to consider
'
)
parser
.
add_argument
(
'
--bbox
'
,
type
=
bool
,
default
=
True
,
help
=
'
If set, use provided bbox from ViT
'
)
parser
.
add_argument
(
'
--MANO_Output
'
,
type
=
bool
,
default
=
False
,
help
=
'
If set, generate output images
'
)
args
=
parser
.
parse_args
()
args
.
out_folder
=
os
.
path
.
join
(
args
.
out_folder
,
os
.
path
.
basename
(
os
.
path
.
dirname
(
args
.
vid
)))
print
(
f
"
Output folder:
{
args
.
out_folder
}
"
)
print
(
f
'
Warning: Script Format Currently for MeinDGS Processing! FPS set at
{
args
.
fps
}
'
)
model
,
model_cfg
=
load_hamer
(
args
.
checkpoint
,
load_mesh
=
True
)
# Load Mesh False only if no vertices are needed
# Setup HaMeR model
device
=
torch
.
device
(
'
cuda
'
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
'
cpu
'
)
model
=
model
.
to
(
device
)
model
.
eval
()
renderer
=
Renderer
(
model_cfg
,
faces
=
model
.
mano
.
faces
)
print_gpu_usage
()
try
:
main
(
args
,
model
,
renderer
,
device
)
except
Exception
as
e
:
print
(
f
"
Error in processing
{
args
.
vid
}
:
{
e
}
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment