Spaces:

hujiecpp
/

PE3R

Running on Zero

App Files Files Community

hujiecpp commited on Feb 21, 2025

Commit

399850e

1 Parent(s): ba148f1

init project

Browse files

Files changed (5) hide show

app.py +42 -71
modules/dust3r/__pycache__/inference.cpython-312.pyc +0 -0
modules/dust3r/inference.py +4 -4
modules/dust3r/utils/image.py.bak +0 -163
modules/dust3r/utils/image.py.ori +0 -143

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import sys
 sys.path.append(os.path.abspath('./modules'))
-import math
 import tempfile
 import gradio
 import torch
@@ -11,23 +11,23 @@ import numpy as np
 import functools
 import trimesh
 import copy
-from PIL import Image
 from scipy.spatial.transform import Rotation
 from modules.pe3r.images import Images
 from modules.dust3r.inference import inference
 from modules.dust3r.image_pairs import make_pairs
-from modules.dust3r.utils.image import load_images, rgb
 from modules.dust3r.utils.device import to_numpy
 from modules.dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
 from modules.dust3r.cloud_opt import global_aligner, GlobalAlignerMode
-from copy import deepcopy
-import cv2
-from typing import Any, Dict, Generator,List
-import matplotlib.pyplot as pl
-from modules.mobilesamv2.utils.transforms import ResizeLongestSide
 # from modules.pe3r.models import Models
 import torchvision.transforms as tvf
@@ -447,7 +447,7 @@ def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False,
 #     return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
-@spaces.GPU(duration=60)
 def get_reconstructed_scene(outdir, filelist, schedule='linear', niter=300, min_conf_thr=3.0,
                             as_pointcloud=True, mask_sky=False, clean_depth=True, transparent_cams=True, cam_size=0.05,
                             scenegraph_type='complete', winsize=1, refid=0):
@@ -541,7 +541,7 @@ def get_reconstructed_scene(outdir, filelist, schedule='linear', niter=300, min_
     torch.cuda.empty_cache()
     return scene, outfile
-# @spaces.GPU(duration=60)
 # def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
 #                  mask_sky, clean_depth, transparent_cams, cam_size):
@@ -561,65 +561,36 @@ def get_reconstructed_scene(outdir, filelist, schedule='linear', niter=300, min_
 #                                       clean_depth, transparent_cams, cam_size)
 #     return outfile
-with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
-    recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
-    # model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
-    # get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
-    with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
-        # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
-        scene = gradio.State(None)
-        gradio.HTML('<h2 style="text-align: center;">PE3R Demo</h2>')
-        with gradio.Column():
-            inputfiles = gradio.File(file_count="multiple")
-            # with gradio.Row():
-                # schedule = gradio.Dropdown(["linear", "cosine"],
-                #                             value='linear', label="schedule", info="For global alignment!",
-                #                             visible=False)
-                # niter = gradio.Number(value=300, precision=0, minimum=0, maximum=5000,
-                #                         label="num_iterations", info="For global alignment!",
-                #                         visible=False)
-                # scenegraph_type = gradio.Dropdown([("complete: all possible image pairs", "complete"),
-                #                                     ("swin: sliding window", "swin"),
-                #                                     ("oneref: match one image with all", "oneref")],
-                #                                     value='complete', label="Scenegraph",
-                #                                     info="Define how to make pairs",
-                #                                     interactive=True,
-                #                                     visible=False)
-                # winsize = gradio.Slider(label="Scene Graph: Window Size", value=1,
-                #                         minimum=1, maximum=1, step=1, visible=False)
-                # refid = gradio.Slider(label="Scene Graph: Id", value=0, minimum=0, maximum=0, step=1, visible=False)
-            run_btn = gradio.Button("Reconstruct")
-            # with gradio.Row():
-                # adjust the confidence threshold
-                # min_conf_thr = gradio.Slider(label="min_conf_thr", value=3.0, minimum=1.0, maximum=20, step=0.1, visible=False)
-                # adjust the camera size in the output pointcloud
-                # cam_size = gradio.Slider(label="cam_size", value=0.05, minimum=0.001, maximum=0.1, step=0.001, visible=False)
-            # with gradio.Row():
-                # as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud", visible=False)
-                # two post process implemented
-                # mask_sky = gradio.Checkbox(value=False, label="Mask sky", visible=False)
-                # clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
-                # transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
-            with gradio.Row():
-                text_input = gradio.Textbox(label="Query Text")
-                threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
-            find_btn = gradio.Button("Find")
-            outmodel = gradio.Model3D()
-            # events
-            run_btn.click(fn=recon_fun,
-                            inputs=[inputfiles],
-                            outputs=[scene, outmodel]) # , outgallery
-            # find_btn.click(fn=get_3D_object_from_scene_fun,
-            #                     inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                             clean_depth, transparent_cams, cam_size],
-            #                 outputs=outmodel)
-    demo.launch(show_error=True, share=None, server_name=None, server_port=None)

 import sys
 sys.path.append(os.path.abspath('./modules'))
+# import math
 import tempfile
 import gradio
 import torch
 import functools
 import trimesh
 import copy
+# from PIL import Image
 from scipy.spatial.transform import Rotation
 from modules.pe3r.images import Images
 from modules.dust3r.inference import inference
 from modules.dust3r.image_pairs import make_pairs
+from modules.dust3r.utils.image import load_images #, rgb
 from modules.dust3r.utils.device import to_numpy
 from modules.dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
 from modules.dust3r.cloud_opt import global_aligner, GlobalAlignerMode
+# from copy import deepcopy
+# import cv2
+# from typing import Any, Dict, Generator,List
+# import matplotlib.pyplot as pl
+# from modules.mobilesamv2.utils.transforms import ResizeLongestSide
 # from modules.pe3r.models import Models
 import torchvision.transforms as tvf
 #     return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
+@spaces.GPU(duration=30)
 def get_reconstructed_scene(outdir, filelist, schedule='linear', niter=300, min_conf_thr=3.0,
                             as_pointcloud=True, mask_sky=False, clean_depth=True, transparent_cams=True, cam_size=0.05,
                             scenegraph_type='complete', winsize=1, refid=0):
     torch.cuda.empty_cache()
     return scene, outfile
+# @spaces.GPU(duration=30)
 # def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
 #                  mask_sky, clean_depth, transparent_cams, cam_size):
 #                                       clean_depth, transparent_cams, cam_size)
 #     return outfile
+tmpdirname = tempfile.mkdtemp(suffix='pe3r_gradio_demo')
+recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
+# model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
+# get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
+with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
+    # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
+    scene = gradio.State(None)
+    gradio.HTML('<h2 style="text-align: center;">PE3R Demo</h2>')
+    with gradio.Column():
+        inputfiles = gradio.File(file_count="multiple")
+        run_btn = gradio.Button("Reconstruct")
+        with gradio.Row():
+            text_input = gradio.Textbox(label="Query Text")
+            threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
+        find_btn = gradio.Button("Find")
+        outmodel = gradio.Model3D()
+        # events
+        run_btn.click(fn=recon_fun,
+                        inputs=[inputfiles],
+                        outputs=[scene, outmodel]) # , outgallery
+        # find_btn.click(fn=get_3D_object_from_scene_fun,
+        #                     inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
+        #                             clean_depth, transparent_cams, cam_size],
+        #                 outputs=outmodel)
+demo.launch(show_error=True, share=None, server_name=None, server_port=None)

modules/dust3r/__pycache__/inference.cpython-312.pyc CHANGED Viewed

Binary files a/modules/dust3r/__pycache__/inference.cpython-312.pyc and b/modules/dust3r/__pycache__/inference.cpython-312.pyc differ

modules/dust3r/inference.py CHANGED Viewed

@@ -41,12 +41,12 @@ def loss_of_one_batch(batch, model, criterion, device, symmetrize_batch=False, u
     if symmetrize_batch:
         view1, view2 = make_batch_symmetric(batch)
-    with torch.cuda.amp.autocast(enabled=bool(use_amp)):
-        pred1, pred2 = model(view1, view2)
         # loss is supposed to be symmetric
-        with torch.cuda.amp.autocast(enabled=False):
-            loss = criterion(view1, view2, pred1, pred2) if criterion is not None else None
     result = dict(view1=view1, view2=view2, pred1=pred1, pred2=pred2, loss=loss)
     return result[ret] if ret else result

     if symmetrize_batch:
         view1, view2 = make_batch_symmetric(batch)
+    # with torch.cuda.amp.autocast(enabled=bool(use_amp)):
+    pred1, pred2 = model(view1, view2)
         # loss is supposed to be symmetric
+        # with torch.cuda.amp.autocast(enabled=False):
+    loss = criterion(view1, view2, pred1, pred2) if criterion is not None else None
     result = dict(view1=view1, view2=view2, pred1=pred1, pred2=pred2, loss=loss)
     return result[ret] if ret else result

modules/dust3r/utils/image.py.bak DELETED Viewed

@@ -1,163 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# utilitary functions about images (loading/converting...)
-# --------------------------------------------------------
-import os
-import torch
-import numpy as np
-import PIL.Image
-from PIL.ImageOps import exif_transpose
-import torchvision.transforms as tvf
-os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
-import cv2  # noqa
-try:
-    from pillow_heif import register_heif_opener  # noqa
-    register_heif_opener()
-    heif_support_enabled = True
-except ImportError:
-    heif_support_enabled = False
-ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
-def img_to_arr( img ):
-    if isinstance(img, str):
-        img = imread_cv2(img)
-    return img
-def imread_cv2(path, options=cv2.IMREAD_COLOR):
-    """ Open an image or a depthmap with opencv-python.
-    """
-    if path.endswith(('.exr', 'EXR')):
-        options = cv2.IMREAD_ANYDEPTH
-    img = cv2.imread(path, options)
-    if img is None:
-        raise IOError(f'Could not load image={path} with {options=}')
-    if img.ndim == 3:
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    return img
-def rgb(ftensor, true_shape=None):
-    if isinstance(ftensor, list):
-        return [rgb(x, true_shape=true_shape) for x in ftensor]
-    if isinstance(ftensor, torch.Tensor):
-        ftensor = ftensor.detach().cpu().numpy()  # H,W,3
-    if ftensor.ndim == 3 and ftensor.shape[0] == 3:
-        ftensor = ftensor.transpose(1, 2, 0)
-    elif ftensor.ndim == 4 and ftensor.shape[1] == 3:
-        ftensor = ftensor.transpose(0, 2, 3, 1)
-    if true_shape is not None:
-        H, W = true_shape
-        ftensor = ftensor[:H, :W]
-    if ftensor.dtype == np.uint8:
-        img = np.float32(ftensor) / 255
-    else:
-        img = (ftensor * 0.5) + 0.5
-    return img.clip(min=0, max=1)
-def _resize_pil_image(img, long_edge_size):
-    S = max(img.size)
-    if S > long_edge_size:
-        interp = PIL.Image.LANCZOS
-    elif S <= long_edge_size:
-        interp = PIL.Image.BICUBIC
-    new_size = tuple(int(round(x*long_edge_size/S)) for x in img.size)
-    return img.resize(new_size, interp)
-def load_images(folder_or_list, cog_seg_maps, size, square_ok=False, verbose=True):
-    """ open and convert all images in a list or folder to proper input format for DUSt3R
-    """
-    if isinstance(folder_or_list, str):
-        if verbose:
-            print(f'>> Loading images from {folder_or_list}')
-        root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))
-    elif isinstance(folder_or_list, list):
-        if verbose:
-            print(f'>> Loading a list of {len(folder_or_list)} images')
-        root, folder_content = '', folder_or_list
-    else:
-        raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')
-    supported_images_extensions = ['.jpg', '.jpeg', '.png']
-    if heif_support_enabled:
-        supported_images_extensions += ['.heic', '.heif']
-    supported_images_extensions = tuple(supported_images_extensions)
-    imgs = []
-    for path in enumerate(folder_content):
-        if not path.lower().endswith(supported_images_extensions):
-            continue
-        img = exif_transpose(PIL.Image.open(os.path.join(root, path))).convert('RGB')
-        W1, H1 = img.size
-        if size == 224:
-            # resize short side to 224 (then crop)
-            img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1)))
-        else:
-            # resize long side to 512
-            img = _resize_pil_image(img, size)
-        W, H = img.size
-        cx, cy = W//2, H//2
-        if size == 224:
-            half = min(cx, cy)
-            img = img.crop((cx-half, cy-half, cx+half, cy+half))
-        else:
-            halfw, halfh = ((2*cx)//16)*8, ((2*cy)//16)*8
-            if not (square_ok) and W == H:
-                halfh = 3*halfw/4
-            img = img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
-        W2, H2 = img.size
-        if verbose:
-            print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}')
-        imgs.append(dict(img=img, ori_img=ImgNorm(img)[None], true_shape=np.int32(
-            [img.size[::-1]]), idx=len(imgs), instance=str(len(imgs))))
-    mean_colors = {}
-    mean_colors_cnt = {}
-    for i in range(len(imgs)):
-        img_np = imgs[i]['img']
-        seg_map = cog_seg_maps[i]
-        unique_labels = np.unique(seg_map)
-        for label in unique_labels:
-            if label == -1:
-                continue
-            mask = (seg_map == label)
-            mean_color = img_np[mask].mean(axis=0)
-            if label in mean_colors.keys():
-                mean_colors[label] += mean_color
-                mean_colors_cnt[label] += 1
-            else:
-                mean_colors[label] = mean_color
-                mean_colors_cnt[label] = 1
-    for key in mean_colors.keys():
-        mean_colors[key] /= mean_colors_cnt[key]
-    for i in range(len(imgs)):
-        img_np = np.array(imgs[i]['img'])
-        smoothed_image = np.zeros_like(img_np)
-        seg_map = cog_seg_maps[i]
-        unique_labels = np.unique(seg_map)
-        for label in unique_labels:
-            if label == -1:
-                continue
-            mask = (seg_map == label)
-            mean_color = mean_colors[label]
-            smoothed_image[mask] = mean_color
-        smoothed_image = cv2.addWeighted(img_np, 0.1, smoothed_image, 0.9, 0)
-        smoothed_image = PIL.Image.fromarray(smoothed_image)
-        imgs[i]['img'] = ImgNorm(smoothed_image)[None]
-    assert imgs, 'no images foud at '+root
-    if verbose:
-        print(f' (Found {len(imgs)} images)')
-    return imgs

modules/dust3r/utils/image.py.ori DELETED Viewed

@@ -1,143 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# utilitary functions about images (loading/converting...)
-# --------------------------------------------------------
-import os
-import torch
-import numpy as np
-import PIL.Image
-from PIL.ImageOps import exif_transpose
-import torchvision.transforms as tvf
-os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
-import cv2  # noqa
-try:
-    from pillow_heif import register_heif_opener  # noqa
-    register_heif_opener()
-    heif_support_enabled = True
-except ImportError:
-    heif_support_enabled = False
-ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
-def img_to_arr( img ):
-    if isinstance(img, str):
-        img = imread_cv2(img)
-    return img
-def imread_cv2(path, options=cv2.IMREAD_COLOR):
-    """ Open an image or a depthmap with opencv-python.
-    """
-    if path.endswith(('.exr', 'EXR')):
-        options = cv2.IMREAD_ANYDEPTH
-    img = cv2.imread(path, options)
-    if img is None:
-        raise IOError(f'Could not load image={path} with {options=}')
-    if img.ndim == 3:
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    return img
-def rgb(ftensor, true_shape=None):
-    if isinstance(ftensor, list):
-        return [rgb(x, true_shape=true_shape) for x in ftensor]
-    if isinstance(ftensor, torch.Tensor):
-        ftensor = ftensor.detach().cpu().numpy()  # H,W,3
-    if ftensor.ndim == 3 and ftensor.shape[0] == 3:
-        ftensor = ftensor.transpose(1, 2, 0)
-    elif ftensor.ndim == 4 and ftensor.shape[1] == 3:
-        ftensor = ftensor.transpose(0, 2, 3, 1)
-    if true_shape is not None:
-        H, W = true_shape
-        ftensor = ftensor[:H, :W]
-    if ftensor.dtype == np.uint8:
-        img = np.float32(ftensor) / 255
-    else:
-        img = (ftensor * 0.5) + 0.5
-    return img.clip(min=0, max=1)
-def _resize_pil_image(img, long_edge_size):
-    S = max(img.size)
-    if S > long_edge_size:
-        interp = PIL.Image.LANCZOS
-    elif S <= long_edge_size:
-        interp = PIL.Image.BICUBIC
-    new_size = tuple(int(round(x*long_edge_size/S)) for x in img.size)
-    return img.resize(new_size, interp)
-def load_images(folder_or_list, cog_seg_maps, size, square_ok=False, verbose=True):
-    """ open and convert all images in a list or folder to proper input format for DUSt3R
-    """
-    if isinstance(folder_or_list, str):
-        if verbose:
-            print(f'>> Loading images from {folder_or_list}')
-        root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))
-    elif isinstance(folder_or_list, list):
-        if verbose:
-            print(f'>> Loading a list of {len(folder_or_list)} images')
-        root, folder_content = '', folder_or_list
-    else:
-        raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')
-    supported_images_extensions = ['.jpg', '.jpeg', '.png']
-    if heif_support_enabled:
-        supported_images_extensions += ['.heic', '.heif']
-    supported_images_extensions = tuple(supported_images_extensions)
-    imgs = []
-    for i, path in enumerate(folder_content):
-        if not path.lower().endswith(supported_images_extensions):
-            continue
-        img = exif_transpose(PIL.Image.open(os.path.join(root, path))).convert('RGB')
-        img_np = np.array(img)
-        smoothed_image = np.zeros_like(img_np)
-        seg_map = cog_seg_maps[i]
-        unique_labels = np.unique(seg_map)
-        for label in unique_labels:
-            mask = (seg_map == label)
-            mean_color = img_np[mask].mean(axis=0)
-            smoothed_image[mask] = mean_color
-        smoothed_image = cv2.addWeighted(img_np, 0.05, smoothed_image, 0.95, 0)
-        smoothed_image = PIL.Image.fromarray(smoothed_image)
-        W1, H1 = img.size
-        if size == 224:
-            # resize short side to 224 (then crop)
-            img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1)))
-            smoothed_image = _resize_pil_image(smoothed_image, round(size * max(W1/H1, H1/W1)))
-        else:
-            # resize long side to 512
-            img = _resize_pil_image(img, size)
-            smoothed_image = _resize_pil_image(smoothed_image, size)
-        W, H = img.size
-        cx, cy = W//2, H//2
-        if size == 224:
-            half = min(cx, cy)
-            img = img.crop((cx-half, cy-half, cx+half, cy+half))
-            smoothed_image = smoothed_image.crop((cx-half, cy-half, cx+half, cy+half))
-        else:
-            halfw, halfh = ((2*cx)//16)*8, ((2*cy)//16)*8
-            if not (square_ok) and W == H:
-                halfh = 3*halfw/4
-            img = img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
-            smoothed_image = smoothed_image.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
-        W2, H2 = img.size
-        if verbose:
-            print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}')
-        imgs.append(dict(img=ImgNorm(smoothed_image)[None], ori_img=ImgNorm(img)[None], true_shape=np.int32(
-            [img.size[::-1]]), idx=len(imgs), instance=str(len(imgs))))
-    assert imgs, 'no images foud at '+root
-    if verbose:
-        print(f' (Found {len(imgs)} images)')
-    return imgs