Spaces:
Runtime error
Runtime error
Commit
·
33b5165
1
Parent(s):
3ceda9e
code cleanup, fixhand examples autoload, change to youtube
Browse files- .gitignore +1 -1
- app.py +121 -199
.gitignore
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
sbatch/err/
|
| 3 |
sbatch/out/
|
| 4 |
__pycache__/
|
|
|
|
| 1 |
+
.vscode/
|
| 2 |
sbatch/err/
|
| 3 |
sbatch/out/
|
| 4 |
__pycache__/
|
app.py
CHANGED
|
@@ -473,13 +473,10 @@ def visualize_ref(ref):
|
|
| 473 |
img = ref["background"][..., :3]
|
| 474 |
|
| 475 |
# viualization
|
| 476 |
-
# img = cv2.resize(img, inpainted.shape[::-1], interpolation=cv2.INTER_AREA)
|
| 477 |
mask = inpainted < 128
|
| 478 |
-
# img = img.astype(np.int32)
|
| 479 |
-
# img[mask, :] = img[mask, :] - 50
|
| 480 |
-
# img[np.any(img<0, axis=-1)]=0
|
| 481 |
-
# img = img.astype(np.uint8)
|
| 482 |
img = mask_image(img, mask)
|
|
|
|
|
|
|
| 483 |
return img, inpaint_mask
|
| 484 |
|
| 485 |
def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
|
|
@@ -581,11 +578,6 @@ def process_crop(img, crop_coord, evt:gr.SelectData):
|
|
| 581 |
cropped_vis[:,:,-1] = alpha
|
| 582 |
else:
|
| 583 |
gr.Error("Something is wrong", duration=3)
|
| 584 |
-
# cropped_editor = {
|
| 585 |
-
# "background": cropped,
|
| 586 |
-
# "composite": cropped,
|
| 587 |
-
# "layers": [cropped_vis]
|
| 588 |
-
# }
|
| 589 |
return crop_coord, cropped, cropped_vis
|
| 590 |
|
| 591 |
def disable_crop(crop_coord):
|
|
@@ -653,8 +645,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
|
|
| 653 |
keypts[0] = np.zeros((21, 2))
|
| 654 |
elif len(keypts[0]) == 21:
|
| 655 |
keypts[0] = np.array(keypts[0], dtype=np.float32)
|
| 656 |
-
# keypts[0][:, 0] = keypts[0][:, 0] + crop_coord[0][0]
|
| 657 |
-
# keypts[0][:, 1] = keypts[0][:, 1] + crop_coord[0][1]
|
| 658 |
else:
|
| 659 |
gr.Info("Number of right hand keypoints should be either 0 or 21.")
|
| 660 |
return None, None
|
|
@@ -662,8 +652,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
|
|
| 662 |
keypts[1] = np.zeros((21, 2))
|
| 663 |
elif len(keypts[1]) == 21:
|
| 664 |
keypts[1] = np.array(keypts[1], dtype=np.float32)
|
| 665 |
-
# keypts[1][:, 0] = keypts[1][:, 0] + crop_coord[0][0]
|
| 666 |
-
# keypts[1][:, 1] = keypts[1][:, 1] + crop_coord[0][1]
|
| 667 |
else:
|
| 668 |
gr.Info("Number of left hand keypoints should be either 0 or 21.")
|
| 669 |
return None, None
|
|
@@ -868,13 +856,7 @@ def flip_hand(
|
|
| 868 |
return
|
| 869 |
img["composite"] = img["composite"][:, ::-1, :]
|
| 870 |
img["background"] = img["background"][:, ::-1, :]
|
| 871 |
-
img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
|
| 872 |
-
# for comp in [pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond]:
|
| 873 |
-
# if comp is not None:
|
| 874 |
-
# if isinstance(comp, torch.Tensor):
|
| 875 |
-
# comp = comp.flip(-1)
|
| 876 |
-
# else:
|
| 877 |
-
# comp = comp[:, ::-1, :]
|
| 878 |
if img_raw is not None:
|
| 879 |
img_raw = img_raw[:, ::-1, :]
|
| 880 |
pose_img = pose_img[:, ::-1, :]
|
|
@@ -889,12 +871,6 @@ def flip_hand(
|
|
| 889 |
auto_cond = auto_cond.flip(-1)
|
| 890 |
if manual_cond is not None:
|
| 891 |
manual_cond = manual_cond.flip(-1)
|
| 892 |
-
# for comp in [keypts, auto_keypts, manual_keypts]:
|
| 893 |
-
# if comp is not None:
|
| 894 |
-
# if comp[:21, :].sum() != 0:
|
| 895 |
-
# comp[:21, 0] = opts.image_size[1] - comp[:21, 0]
|
| 896 |
-
# if comp[21:, :].sum() != 0:
|
| 897 |
-
# comp[21:, 0] = opts.image_size[1] - comp[21:, 0]
|
| 898 |
if keypts is not None:
|
| 899 |
if keypts[:21, :].sum() != 0:
|
| 900 |
keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
|
|
@@ -980,6 +956,10 @@ def fix_clear_all():
|
|
| 980 |
def enable_component(image1, image2):
|
| 981 |
if image1 is None or image2 is None:
|
| 982 |
return gr.update(interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 983 |
if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
|
| 984 |
if image1["background"] is None or (
|
| 985 |
image1["background"].sum() == 0
|
|
@@ -1109,61 +1089,14 @@ example_ref_imgs = [
|
|
| 1109 |
[
|
| 1110 |
"sample_images/sample4.jpg",
|
| 1111 |
],
|
| 1112 |
-
# [
|
| 1113 |
-
# "sample_images/sample5.jpg",
|
| 1114 |
-
# ],
|
| 1115 |
[
|
| 1116 |
"sample_images/sample6.jpg",
|
| 1117 |
],
|
| 1118 |
-
# [
|
| 1119 |
-
# "sample_images/sample7.jpg",
|
| 1120 |
-
# ],
|
| 1121 |
-
# [
|
| 1122 |
-
# "sample_images/sample8.jpg",
|
| 1123 |
-
# ],
|
| 1124 |
-
# [
|
| 1125 |
-
# "sample_images/sample9.jpg",
|
| 1126 |
-
# ],
|
| 1127 |
-
# [
|
| 1128 |
-
# "sample_images/sample10.jpg",
|
| 1129 |
-
# ],
|
| 1130 |
-
# [
|
| 1131 |
-
# "sample_images/sample11.jpg",
|
| 1132 |
-
# ],
|
| 1133 |
-
# ["pose_images/pose1.jpg"],
|
| 1134 |
-
# ["pose_images/pose2.jpg"],
|
| 1135 |
-
# ["pose_images/pose3.jpg"],
|
| 1136 |
-
# ["pose_images/pose4.jpg"],
|
| 1137 |
-
# ["pose_images/pose5.jpg"],
|
| 1138 |
-
# ["pose_images/pose6.jpg"],
|
| 1139 |
-
# ["pose_images/pose7.jpg"],
|
| 1140 |
-
# ["pose_images/pose8.jpg"],
|
| 1141 |
]
|
| 1142 |
example_target_imgs = [
|
| 1143 |
-
# [
|
| 1144 |
-
# "sample_images/sample1.jpg",
|
| 1145 |
-
# ],
|
| 1146 |
-
# [
|
| 1147 |
-
# "sample_images/sample2.jpg",
|
| 1148 |
-
# ],
|
| 1149 |
-
# [
|
| 1150 |
-
# "sample_images/sample3.jpg",
|
| 1151 |
-
# ],
|
| 1152 |
-
# [
|
| 1153 |
-
# "sample_images/sample4.jpg",
|
| 1154 |
-
# ],
|
| 1155 |
[
|
| 1156 |
"sample_images/sample5.jpg",
|
| 1157 |
],
|
| 1158 |
-
# [
|
| 1159 |
-
# "sample_images/sample6.jpg",
|
| 1160 |
-
# ],
|
| 1161 |
-
# [
|
| 1162 |
-
# "sample_images/sample7.jpg",
|
| 1163 |
-
# ],
|
| 1164 |
-
# [
|
| 1165 |
-
# "sample_images/sample8.jpg",
|
| 1166 |
-
# ],
|
| 1167 |
[
|
| 1168 |
"sample_images/sample9.jpg",
|
| 1169 |
],
|
|
@@ -1174,40 +1107,22 @@ example_target_imgs = [
|
|
| 1174 |
"sample_images/sample11.jpg",
|
| 1175 |
],
|
| 1176 |
["pose_images/pose1.jpg"],
|
| 1177 |
-
# ["pose_images/pose2.jpg"],
|
| 1178 |
-
# ["pose_images/pose3.jpg"],
|
| 1179 |
-
# ["pose_images/pose4.jpg"],
|
| 1180 |
-
# ["pose_images/pose5.jpg"],
|
| 1181 |
-
# ["pose_images/pose6.jpg"],
|
| 1182 |
-
# ["pose_images/pose7.jpg"],
|
| 1183 |
-
# ["pose_images/pose8.jpg"],
|
| 1184 |
]
|
| 1185 |
fix_example_imgs = [
|
| 1186 |
-
["bad_hands/1.jpg"],
|
| 1187 |
-
|
| 1188 |
-
["bad_hands/
|
| 1189 |
-
["bad_hands/
|
| 1190 |
-
["bad_hands/
|
| 1191 |
-
["bad_hands/
|
| 1192 |
-
["bad_hands/7.jpg"], # "bad_hands/7_mask.jpg"],
|
| 1193 |
-
# ["bad_hands/8.jpg"], # "bad_hands/8_mask.jpg"],
|
| 1194 |
-
# ["bad_hands/9.jpg"], # "bad_hands/9_mask.jpg"],
|
| 1195 |
-
# ["bad_hands/10.jpg"], # "bad_hands/10_mask.jpg"],
|
| 1196 |
-
# ["bad_hands/11.jpg"], # "bad_hands/11_mask.jpg"],
|
| 1197 |
-
# ["bad_hands/12.jpg"], # "bad_hands/12_mask.jpg"],
|
| 1198 |
-
# ["bad_hands/13.jpg"], # "bad_hands/13_mask.jpg"],
|
| 1199 |
-
# ["bad_hands/14.jpg"],
|
| 1200 |
-
# ["bad_hands/15.jpg"],
|
| 1201 |
]
|
| 1202 |
fix_example_brush = [
|
| 1203 |
-
["bad_hands/1_composite.png"]
|
| 1204 |
-
["bad_hands/3_composite.png"]
|
| 1205 |
-
["bad_hands/4_composite.png"]
|
| 1206 |
-
["bad_hands/5_composite.png"]
|
| 1207 |
-
["bad_hands/6_composite.png"]
|
| 1208 |
-
["bad_hands/7_composite.png"]
|
| 1209 |
-
# ["bad_hands/14_mask.jpg"],
|
| 1210 |
-
# ["bad_hands/15_mask.jpg"],
|
| 1211 |
]
|
| 1212 |
fix_example_kpts = [
|
| 1213 |
["bad_hands/1_kpts.png", 3.0, 1224],
|
|
@@ -1217,9 +1132,20 @@ fix_example_kpts = [
|
|
| 1217 |
["bad_hands/6_kpts.png", 3.0, 1348],
|
| 1218 |
["bad_hands/7_kpts.png", 3.0, 42],
|
| 1219 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1220 |
for i in range(len(fix_example_kpts)):
|
| 1221 |
npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
|
| 1222 |
fix_example_kpts[i].append(npy_path)
|
|
|
|
|
|
|
|
|
|
| 1223 |
|
| 1224 |
custom_css = """
|
| 1225 |
.gradio-container .examples img {
|
|
@@ -1248,6 +1174,18 @@ custom_css = """
|
|
| 1248 |
#kpts_examples table tr td:nth-child(4) {
|
| 1249 |
display: none !important;
|
| 1250 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1251 |
#repose_tutorial video {
|
| 1252 |
width: 70% !important;
|
| 1253 |
display: block;
|
|
@@ -1256,10 +1194,35 @@ custom_css = """
|
|
| 1256 |
}
|
| 1257 |
"""
|
| 1258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
_HEADER_ = '''
|
| 1260 |
<div style="text-align: center;">
|
| 1261 |
<h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
|
| 1262 |
-
<h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">
|
| 1263 |
<style>
|
| 1264 |
.link-spacing {
|
| 1265 |
margin-right: 20px;
|
|
@@ -1280,8 +1243,7 @@ _HEADER_ = '''
|
|
| 1280 |
<h3>
|
| 1281 |
<a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
|
| 1282 |
<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
|
| 1283 |
-
<a href='' target='_blank' class="link-spacing">Code</a>
|
| 1284 |
-
<a href='' target='_blank'>Model Weights</a>
|
| 1285 |
</h3>
|
| 1286 |
<p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
|
| 1287 |
</div>
|
|
@@ -1323,21 +1285,23 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1323 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
|
| 1324 |
with gr.Row(variant="panel"):
|
| 1325 |
with gr.Column():
|
| 1326 |
-
gr.Video(
|
| 1327 |
-
"how_to_videos/subtitled_fix_hands_custom.mp4",
|
| 1328 |
-
label="Using your own image",
|
| 1329 |
-
autoplay=True,
|
| 1330 |
-
loop=True,
|
| 1331 |
-
show_label=True,
|
| 1332 |
-
)
|
|
|
|
| 1333 |
with gr.Column():
|
| 1334 |
-
gr.Video(
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
|
| 1339 |
-
|
| 1340 |
-
)
|
|
|
|
| 1341 |
|
| 1342 |
# more options
|
| 1343 |
with gr.Accordion(label="More options", open=False):
|
|
@@ -1392,20 +1356,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1392 |
gr.Markdown(
|
| 1393 |
"""<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
|
| 1394 |
)
|
| 1395 |
-
# fix_crop = gr.ImageEditor(
|
| 1396 |
-
# type="numpy",
|
| 1397 |
-
# sources=["upload", "webcam", "clipboard"],
|
| 1398 |
-
# label="Image crop",
|
| 1399 |
-
# show_label=True,
|
| 1400 |
-
# height=LENGTH,
|
| 1401 |
-
# width=LENGTH,
|
| 1402 |
-
# layers=False,
|
| 1403 |
-
# # crop_size="1:1",
|
| 1404 |
-
# transforms=(),
|
| 1405 |
-
# brush=False,
|
| 1406 |
-
# image_mode="RGBA",
|
| 1407 |
-
# container=False,
|
| 1408 |
-
# )
|
| 1409 |
fix_crop = gr.Image(
|
| 1410 |
type="numpy",
|
| 1411 |
sources=["upload", "webcam", "clipboard"],
|
|
@@ -1420,23 +1370,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1420 |
gr.Markdown(
|
| 1421 |
"""<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
|
| 1422 |
)
|
| 1423 |
-
#
|
| 1424 |
-
#
|
| 1425 |
-
#
|
| 1426 |
-
#
|
| 1427 |
-
# height=LENGTH,
|
| 1428 |
-
# width=LENGTH,
|
| 1429 |
-
# interactive=True,
|
| 1430 |
-
# visible=True,
|
| 1431 |
-
# sources=[],
|
| 1432 |
# )
|
| 1433 |
-
fix_example = gr.Examples(
|
| 1434 |
-
fix_example_imgs,
|
| 1435 |
-
inputs=[fix_crop],
|
| 1436 |
-
examples_per_page=20,
|
| 1437 |
-
# run_on_click=True,
|
| 1438 |
-
# fn=load_brush,
|
| 1439 |
-
)
|
| 1440 |
with gr.Column():
|
| 1441 |
gr.Markdown(
|
| 1442 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
|
|
@@ -1460,19 +1398,10 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1460 |
container=False,
|
| 1461 |
interactive=True,
|
| 1462 |
)
|
| 1463 |
-
fix_ex_brush = gr.Examples(
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
examples_per_page=20,
|
| 1468 |
-
# run_on_click=True,
|
| 1469 |
-
# fn=inpaint_from_example,
|
| 1470 |
-
)
|
| 1471 |
-
# gr.Markdown(
|
| 1472 |
-
# """<p style="text-align: center;">③ Hit the \"Finish Cropping & Brushing\" button</p>"""
|
| 1473 |
-
# )
|
| 1474 |
-
# fix_finish_crop = gr.Button(
|
| 1475 |
-
# value="Finish Croping & Brushing", interactive=False
|
| 1476 |
# )
|
| 1477 |
|
| 1478 |
# keypoint selection
|
|
@@ -1485,8 +1414,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1485 |
)
|
| 1486 |
fix_kp_all = gr.Image(
|
| 1487 |
type="numpy",
|
| 1488 |
-
|
| 1489 |
-
show_label=
|
| 1490 |
height=LENGTH,
|
| 1491 |
width=LENGTH,
|
| 1492 |
interactive=False,
|
|
@@ -1494,14 +1423,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1494 |
sources=(),
|
| 1495 |
image_mode="RGBA"
|
| 1496 |
)
|
| 1497 |
-
with gr.Accordion(open=True):
|
| 1498 |
-
|
| 1499 |
-
|
| 1500 |
-
|
| 1501 |
-
|
| 1502 |
-
|
| 1503 |
-
|
| 1504 |
-
|
| 1505 |
with gr.Accordion("[Custom data] Manually give hand pose", open=False):
|
| 1506 |
gr.Markdown(
|
| 1507 |
"""<p style="text-align: center;">① Tell us if this is right, left, or both hands</p>"""
|
|
@@ -1515,10 +1444,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1515 |
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
| 1516 |
visible=False
|
| 1517 |
)
|
| 1518 |
-
# fix_kp_r_info = gr.Markdown(
|
| 1519 |
-
# """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
|
| 1520 |
-
# visible=False,
|
| 1521 |
-
# )
|
| 1522 |
fix_kp_right = gr.Image(
|
| 1523 |
type="numpy",
|
| 1524 |
label="Keypoint Selection (right hand)",
|
|
@@ -1569,21 +1494,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1569 |
interactive=False,
|
| 1570 |
)
|
| 1571 |
|
| 1572 |
-
# get latent
|
| 1573 |
-
# with gr.Column():
|
| 1574 |
-
|
| 1575 |
# result column
|
| 1576 |
with gr.Column():
|
| 1577 |
gr.Markdown(
|
| 1578 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press "Run" to get the corrected hand image 🎯</p>"""
|
| 1579 |
)
|
| 1580 |
-
# gr.Markdown(
|
| 1581 |
-
# """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Press "Ready" to start pre-processing</p>"""
|
| 1582 |
-
# )
|
| 1583 |
-
# fix_ready = gr.Button(value="Ready", interactive=False)
|
| 1584 |
-
# gr.Markdown(
|
| 1585 |
-
# """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256)-resized, brushed image</p>"""
|
| 1586 |
-
# )
|
| 1587 |
fix_vis_mask32 = gr.Image(
|
| 1588 |
type="numpy",
|
| 1589 |
label=f"Visualized {opts.latent_size} Inpaint Mask",
|
|
@@ -1603,9 +1518,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1603 |
interactive=False,
|
| 1604 |
visible=True,
|
| 1605 |
)
|
| 1606 |
-
# gr.Markdown(
|
| 1607 |
-
# """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
|
| 1608 |
-
# )
|
| 1609 |
gr.Markdown(
|
| 1610 |
"""<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
|
| 1611 |
)
|
|
@@ -1645,7 +1557,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1645 |
)
|
| 1646 |
fix_clear = gr.ClearButton()
|
| 1647 |
|
| 1648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1649 |
# listeners
|
| 1650 |
fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
|
| 1651 |
fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
|
|
@@ -1713,6 +1632,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1713 |
reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
|
| 1714 |
)
|
| 1715 |
fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
|
|
|
|
|
|
|
| 1716 |
fix_run.click(
|
| 1717 |
ready_sample,
|
| 1718 |
[fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
|
|
@@ -1820,13 +1741,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
| 1820 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
|
| 1821 |
with gr.Row(variant="panel", elem_id="repose_tutorial"):
|
| 1822 |
with gr.Column():
|
| 1823 |
-
gr.Video(
|
| 1824 |
-
|
| 1825 |
-
|
| 1826 |
-
|
| 1827 |
-
|
| 1828 |
-
|
| 1829 |
-
)
|
|
|
|
| 1830 |
|
| 1831 |
# main tabs
|
| 1832 |
with gr.Row():
|
|
|
|
| 473 |
img = ref["background"][..., :3]
|
| 474 |
|
| 475 |
# viualization
|
|
|
|
| 476 |
mask = inpainted < 128
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
img = mask_image(img, mask)
|
| 478 |
+
if inpaint_mask.sum() == 0:
|
| 479 |
+
gr.Warning("Run botton not enabled? Please try again.", duration=10)
|
| 480 |
return img, inpaint_mask
|
| 481 |
|
| 482 |
def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
|
|
|
|
| 578 |
cropped_vis[:,:,-1] = alpha
|
| 579 |
else:
|
| 580 |
gr.Error("Something is wrong", duration=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
return crop_coord, cropped, cropped_vis
|
| 582 |
|
| 583 |
def disable_crop(crop_coord):
|
|
|
|
| 645 |
keypts[0] = np.zeros((21, 2))
|
| 646 |
elif len(keypts[0]) == 21:
|
| 647 |
keypts[0] = np.array(keypts[0], dtype=np.float32)
|
|
|
|
|
|
|
| 648 |
else:
|
| 649 |
gr.Info("Number of right hand keypoints should be either 0 or 21.")
|
| 650 |
return None, None
|
|
|
|
| 652 |
keypts[1] = np.zeros((21, 2))
|
| 653 |
elif len(keypts[1]) == 21:
|
| 654 |
keypts[1] = np.array(keypts[1], dtype=np.float32)
|
|
|
|
|
|
|
| 655 |
else:
|
| 656 |
gr.Info("Number of left hand keypoints should be either 0 or 21.")
|
| 657 |
return None, None
|
|
|
|
| 856 |
return
|
| 857 |
img["composite"] = img["composite"][:, ::-1, :]
|
| 858 |
img["background"] = img["background"][:, ::-1, :]
|
| 859 |
+
img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 860 |
if img_raw is not None:
|
| 861 |
img_raw = img_raw[:, ::-1, :]
|
| 862 |
pose_img = pose_img[:, ::-1, :]
|
|
|
|
| 871 |
auto_cond = auto_cond.flip(-1)
|
| 872 |
if manual_cond is not None:
|
| 873 |
manual_cond = manual_cond.flip(-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
if keypts is not None:
|
| 875 |
if keypts[:21, :].sum() != 0:
|
| 876 |
keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
|
|
|
|
| 956 |
def enable_component(image1, image2):
|
| 957 |
if image1 is None or image2 is None:
|
| 958 |
return gr.update(interactive=False)
|
| 959 |
+
if isinstance(image1, np.ndarray) and image1.sum() == 0:
|
| 960 |
+
return gr.update(interactive=False)
|
| 961 |
+
if isinstance(image2, np.ndarray) and image2.sum() == 0:
|
| 962 |
+
return gr.update(interactive=False)
|
| 963 |
if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
|
| 964 |
if image1["background"] is None or (
|
| 965 |
image1["background"].sum() == 0
|
|
|
|
| 1089 |
[
|
| 1090 |
"sample_images/sample4.jpg",
|
| 1091 |
],
|
|
|
|
|
|
|
|
|
|
| 1092 |
[
|
| 1093 |
"sample_images/sample6.jpg",
|
| 1094 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1095 |
]
|
| 1096 |
example_target_imgs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1097 |
[
|
| 1098 |
"sample_images/sample5.jpg",
|
| 1099 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1100 |
[
|
| 1101 |
"sample_images/sample9.jpg",
|
| 1102 |
],
|
|
|
|
| 1107 |
"sample_images/sample11.jpg",
|
| 1108 |
],
|
| 1109 |
["pose_images/pose1.jpg"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
]
|
| 1111 |
fix_example_imgs = [
|
| 1112 |
+
["bad_hands/1.jpg"],
|
| 1113 |
+
["bad_hands/3.jpg"],
|
| 1114 |
+
["bad_hands/4.jpg"],
|
| 1115 |
+
["bad_hands/5.jpg"],
|
| 1116 |
+
["bad_hands/6.jpg"],
|
| 1117 |
+
["bad_hands/7.jpg"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
]
|
| 1119 |
fix_example_brush = [
|
| 1120 |
+
["bad_hands/1_composite.png"],
|
| 1121 |
+
["bad_hands/3_composite.png"],
|
| 1122 |
+
["bad_hands/4_composite.png"],
|
| 1123 |
+
["bad_hands/5_composite.png"],
|
| 1124 |
+
["bad_hands/6_composite.png"],
|
| 1125 |
+
["bad_hands/7_composite.png"],
|
|
|
|
|
|
|
| 1126 |
]
|
| 1127 |
fix_example_kpts = [
|
| 1128 |
["bad_hands/1_kpts.png", 3.0, 1224],
|
|
|
|
| 1132 |
["bad_hands/6_kpts.png", 3.0, 1348],
|
| 1133 |
["bad_hands/7_kpts.png", 3.0, 42],
|
| 1134 |
]
|
| 1135 |
+
fix_example_all = [
|
| 1136 |
+
["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_kpts.png", 3.0, 1224],
|
| 1137 |
+
["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_kpts.png", 1.0, 42],
|
| 1138 |
+
["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_kpts.png", 2.0, 42],
|
| 1139 |
+
["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_kpts.png", 3.0, 42],
|
| 1140 |
+
["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_kpts.png", 3.0, 1348],
|
| 1141 |
+
["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_kpts.png", 3.0, 42],
|
| 1142 |
+
]
|
| 1143 |
for i in range(len(fix_example_kpts)):
|
| 1144 |
npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
|
| 1145 |
fix_example_kpts[i].append(npy_path)
|
| 1146 |
+
for i in range(len(fix_example_all)):
|
| 1147 |
+
npy_path = fix_example_all[i][2].replace("_kpts.png", ".npy")
|
| 1148 |
+
fix_example_all[i].append(npy_path)
|
| 1149 |
|
| 1150 |
custom_css = """
|
| 1151 |
.gradio-container .examples img {
|
|
|
|
| 1174 |
#kpts_examples table tr td:nth-child(4) {
|
| 1175 |
display: none !important;
|
| 1176 |
}
|
| 1177 |
+
#fix_examples_all table tr th:nth-child(4),
|
| 1178 |
+
#fix_examples_all table tr td:nth-child(4) {
|
| 1179 |
+
display: none !important;
|
| 1180 |
+
}
|
| 1181 |
+
#fix_examples_all table tr th:nth-child(5),
|
| 1182 |
+
#fix_examples_all table tr td:nth-child(5) {
|
| 1183 |
+
display: none !important;
|
| 1184 |
+
}
|
| 1185 |
+
#fix_examples_all table tr th:nth-child(6),
|
| 1186 |
+
#fix_examples_all table tr td:nth-child(6) {
|
| 1187 |
+
display: none !important;
|
| 1188 |
+
}
|
| 1189 |
#repose_tutorial video {
|
| 1190 |
width: 70% !important;
|
| 1191 |
display: block;
|
|
|
|
| 1194 |
}
|
| 1195 |
"""
|
| 1196 |
|
| 1197 |
+
tut1_custom = f"""
|
| 1198 |
+
<iframe style="width:100%; aspect-ratio: 12/9;"
|
| 1199 |
+
src="https://www.youtube.com/embed/fQk7cOjSCVc"
|
| 1200 |
+
title="Using your own image" frameborder="0"
|
| 1201 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
| 1202 |
+
allowfullscreen>
|
| 1203 |
+
</iframe>
|
| 1204 |
+
"""
|
| 1205 |
+
tut1_example = f"""
|
| 1206 |
+
<iframe style="width:100%; aspect-ratio: 12/9;"
|
| 1207 |
+
src="https://www.youtube.com/embed/-Dq0XTYwTHA"
|
| 1208 |
+
title="Using your own image" frameborder="0"
|
| 1209 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
| 1210 |
+
allowfullscreen>
|
| 1211 |
+
</iframe>
|
| 1212 |
+
"""
|
| 1213 |
+
tut2_example = f"""
|
| 1214 |
+
<iframe style="width:50%; aspect-ratio: 12/9; display:block; margin-left:auto; margin-right:auto;"
|
| 1215 |
+
src="https://www.youtube.com/embed/y2CbzUG2uM0"
|
| 1216 |
+
title="Using your own image" frameborder="0"
|
| 1217 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
| 1218 |
+
allowfullscreen>
|
| 1219 |
+
</iframe>
|
| 1220 |
+
"""
|
| 1221 |
+
|
| 1222 |
_HEADER_ = '''
|
| 1223 |
<div style="text-align: center;">
|
| 1224 |
<h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
|
| 1225 |
+
<h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">Highlight</span></h2>
|
| 1226 |
<style>
|
| 1227 |
.link-spacing {
|
| 1228 |
margin-right: 20px;
|
|
|
|
| 1243 |
<h3>
|
| 1244 |
<a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
|
| 1245 |
<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
|
| 1246 |
+
<a href='' target='_blank' class="link-spacing">Code (Coming in June)</a>
|
|
|
|
| 1247 |
</h3>
|
| 1248 |
<p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
|
| 1249 |
</div>
|
|
|
|
| 1285 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
|
| 1286 |
with gr.Row(variant="panel"):
|
| 1287 |
with gr.Column():
|
| 1288 |
+
# gr.Video(
|
| 1289 |
+
# "how_to_videos/subtitled_fix_hands_custom.mp4",
|
| 1290 |
+
# label="Using your own image",
|
| 1291 |
+
# autoplay=True,
|
| 1292 |
+
# loop=True,
|
| 1293 |
+
# show_label=True,
|
| 1294 |
+
# )
|
| 1295 |
+
gr.HTML(tut1_custom)
|
| 1296 |
with gr.Column():
|
| 1297 |
+
# gr.Video(
|
| 1298 |
+
# "how_to_videos/subtitled_fix_hands_example.mp4",
|
| 1299 |
+
# label="Using our example image",
|
| 1300 |
+
# autoplay=True,
|
| 1301 |
+
# loop=True,
|
| 1302 |
+
# show_label=True,
|
| 1303 |
+
# )
|
| 1304 |
+
gr.HTML(tut1_example)
|
| 1305 |
|
| 1306 |
# more options
|
| 1307 |
with gr.Accordion(label="More options", open=False):
|
|
|
|
| 1356 |
gr.Markdown(
|
| 1357 |
"""<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
|
| 1358 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1359 |
fix_crop = gr.Image(
|
| 1360 |
type="numpy",
|
| 1361 |
sources=["upload", "webcam", "clipboard"],
|
|
|
|
| 1370 |
gr.Markdown(
|
| 1371 |
"""<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
|
| 1372 |
)
|
| 1373 |
+
# fix_example = gr.Examples(
|
| 1374 |
+
# fix_example_imgs,
|
| 1375 |
+
# inputs=[fix_crop],
|
| 1376 |
+
# examples_per_page=20,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1377 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1378 |
with gr.Column():
|
| 1379 |
gr.Markdown(
|
| 1380 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
|
|
|
|
| 1398 |
container=False,
|
| 1399 |
interactive=True,
|
| 1400 |
)
|
| 1401 |
+
# fix_ex_brush = gr.Examples(
|
| 1402 |
+
# fix_example_brush,
|
| 1403 |
+
# inputs=[fix_ref],
|
| 1404 |
+
# examples_per_page=20,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1405 |
# )
|
| 1406 |
|
| 1407 |
# keypoint selection
|
|
|
|
| 1414 |
)
|
| 1415 |
fix_kp_all = gr.Image(
|
| 1416 |
type="numpy",
|
| 1417 |
+
label="Target Hand Pose",
|
| 1418 |
+
show_label=True,
|
| 1419 |
height=LENGTH,
|
| 1420 |
width=LENGTH,
|
| 1421 |
interactive=False,
|
|
|
|
| 1423 |
sources=(),
|
| 1424 |
image_mode="RGBA"
|
| 1425 |
)
|
| 1426 |
+
# with gr.Accordion(open=True):
|
| 1427 |
+
# fix_ex_kpts = gr.Examples(
|
| 1428 |
+
# fix_example_kpts,
|
| 1429 |
+
# inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
|
| 1430 |
+
# examples_per_page=20,
|
| 1431 |
+
# postprocess=False,
|
| 1432 |
+
# elem_id="kpts_examples"
|
| 1433 |
+
# )
|
| 1434 |
with gr.Accordion("[Custom data] Manually give hand pose", open=False):
|
| 1435 |
gr.Markdown(
|
| 1436 |
"""<p style="text-align: center;">① Tell us if this is right, left, or both hands</p>"""
|
|
|
|
| 1444 |
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
| 1445 |
visible=False
|
| 1446 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1447 |
fix_kp_right = gr.Image(
|
| 1448 |
type="numpy",
|
| 1449 |
label="Keypoint Selection (right hand)",
|
|
|
|
| 1494 |
interactive=False,
|
| 1495 |
)
|
| 1496 |
|
|
|
|
|
|
|
|
|
|
| 1497 |
# result column
|
| 1498 |
with gr.Column():
|
| 1499 |
gr.Markdown(
|
| 1500 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press "Run" to get the corrected hand image 🎯</p>"""
|
| 1501 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1502 |
fix_vis_mask32 = gr.Image(
|
| 1503 |
type="numpy",
|
| 1504 |
label=f"Visualized {opts.latent_size} Inpaint Mask",
|
|
|
|
| 1518 |
interactive=False,
|
| 1519 |
visible=True,
|
| 1520 |
)
|
|
|
|
|
|
|
|
|
|
| 1521 |
gr.Markdown(
|
| 1522 |
"""<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
|
| 1523 |
)
|
|
|
|
| 1557 |
)
|
| 1558 |
fix_clear = gr.ClearButton()
|
| 1559 |
|
| 1560 |
+
gr.Examples(
|
| 1561 |
+
fix_example_all,
|
| 1562 |
+
inputs=[fix_crop, fix_ref, fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
|
| 1563 |
+
examples_per_page=20,
|
| 1564 |
+
postprocess=False,
|
| 1565 |
+
elem_id="fix_examples_all",
|
| 1566 |
+
)
|
| 1567 |
+
|
| 1568 |
# listeners
|
| 1569 |
fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
|
| 1570 |
fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
|
|
|
|
| 1632 |
reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
|
| 1633 |
)
|
| 1634 |
fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
|
| 1635 |
+
fix_inpaint_mask.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
|
| 1636 |
+
fix_kpts_np.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
|
| 1637 |
fix_run.click(
|
| 1638 |
ready_sample,
|
| 1639 |
[fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
|
|
|
|
| 1741 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
|
| 1742 |
with gr.Row(variant="panel", elem_id="repose_tutorial"):
|
| 1743 |
with gr.Column():
|
| 1744 |
+
# gr.Video(
|
| 1745 |
+
# "how_to_videos/subtitled_repose_hands.mp4",
|
| 1746 |
+
# label="Tutorial",
|
| 1747 |
+
# autoplay=True,
|
| 1748 |
+
# loop=True,
|
| 1749 |
+
# show_label=True,
|
| 1750 |
+
# )
|
| 1751 |
+
gr.HTML(tut2_example)
|
| 1752 |
|
| 1753 |
# main tabs
|
| 1754 |
with gr.Row():
|