Spaces:

PeiqingYang
/

MatAnyone

Running on L4

App Files Files Community

Update hugging_face/app.py

by assile - opened Mar 14

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+33

-37

Files changed (1) hide show

hugging_face/app.py +33 -37

hugging_face/app.py CHANGED Viewed

@@ -25,6 +25,9 @@ from matanyone_wrapper import matanyone
 from matanyone.utils.get_default_model import get_matanyone_model
 from matanyone.inference.inference_core import InferenceCore
 def parse_augment():
     parser = argparse.ArgumentParser()
     parser.add_argument('--device', type=str, default=None)
@@ -121,7 +124,6 @@ def get_frames_from_video(video_input, video_state):
     except Exception as e:
         print(f"Audio extraction error: {str(e)}")
         audio_path = ""  # Set to "" if extraction fails
-    # print(f'audio_path: {audio_path}')
     # extract frames
     try:
@@ -140,15 +142,15 @@ def get_frames_from_video(video_input, video_state):
         print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
     image_size = (frames[0].shape[0],frames[0].shape[1])
-    # resize if resolution too big
-    if image_size[0]>=1280 and image_size[0]>=1280:
-        scale = 1080 / min(image_size)
-        new_w = int(image_size[1] * scale)
-        new_h = int(image_size[0] * scale)
-        # update frames
-        frames = [cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA) for f in frames]
-        # update image_size
-        image_size = (frames[0].shape[0],frames[0].shape[1])
     # initialize video_state
     video_state = {
@@ -165,8 +167,7 @@ def get_frames_from_video(video_input, video_state):
     video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
     model.samcontroler.sam_controler.reset_image()
     model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
-    return video_state, video_info, video_state["origin_images"][0], \
-                        gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
                         gr.update(visible=True), gr.update(visible=True), \
                         gr.update(visible=True), gr.update(visible=True),\
                         gr.update(visible=True), gr.update(visible=True), \
@@ -292,6 +293,7 @@ def image_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
     foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
     foreground_output = Image.fromarray(foreground[-1])
     alpha_output = Image.fromarray(alpha[-1][:,:,0])
     return foreground_output, alpha_output
 # video matting
@@ -324,7 +326,7 @@ def video_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
     foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
     alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
     return foreground_output, alpha_output
@@ -409,38 +411,32 @@ sam_checkpoint_url_dict = {
     'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
     'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
 }
-checkpoint_folder = os.path.join('/home/user/app/', 'pretrained_models')
 sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
 # initialize sams
 model = MaskGenerator(sam_checkpoint, args)
 # initialize matanyone
-# load from ckpt
-# pretrain_model_url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0"
-# ckpt_path = load_file_from_url(os.path.join(pretrain_model_url, 'matanyone.pth'), checkpoint_folder)
-# matanyone_model = get_matanyone_model(ckpt_path, args.device)
-# load from Hugging Face
-from matanyone.model.matanyone import MatAnyone
-matanyone_model = MatAnyone.from_pretrained("PeiqingYang/MatAnyone")
 matanyone_model = matanyone_model.to(args.device).eval()
-matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
 # download test samples
-media_url = "https://github.com/pq-yang/MatAnyone/releases/download/media/"
-test_sample_path = os.path.join('/home/user/app/hugging_face/', "test_sample/")
-load_file_from_url(os.path.join(media_url, 'test-sample0-720p.mp4'), test_sample_path)
-load_file_from_url(os.path.join(media_url, 'test-sample1-720p.mp4'), test_sample_path)
-load_file_from_url(os.path.join(media_url, 'test-sample2-720p.mp4'), test_sample_path)
-load_file_from_url(os.path.join(media_url, 'test-sample3-720p.mp4'), test_sample_path)
-load_file_from_url(os.path.join(media_url, 'test-sample0.jpg'), test_sample_path)
-load_file_from_url(os.path.join(media_url, 'test-sample1.jpg'), test_sample_path)
 # download assets
-assets_path = os.path.join('/home/user/app/hugging_face/', "assets/")
-load_file_from_url(os.path.join(media_url, 'tutorial_single_target.mp4'), assets_path)
-load_file_from_url(os.path.join(media_url, 'tutorial_multi_targets.mp4'), assets_path)
 # documents
 title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
@@ -574,11 +570,11 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
                 with gr.Row():
                     with gr.Column():
                         gr.Markdown("### Case 1: Single Target")
-                        gr.Video(value="/home/user/app/hugging_face/assets/tutorial_single_target.mp4", elem_classes="video")
                     with gr.Column():
                         gr.Markdown("### Case 2: Multiple Targets")
-                        gr.Video(value="/home/user/app/hugging_face/assets/tutorial_multi_targets.mp4", elem_classes="video")
     with gr.Tabs():
         with gr.TabItem("Video"):
@@ -978,4 +974,4 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
     gr.Markdown(article)
 demo.queue()
-demo.launch(debug=True)

 from matanyone.utils.get_default_model import get_matanyone_model
 from matanyone.inference.inference_core import InferenceCore
+import warnings
+warnings.filterwarnings("ignore")
 def parse_augment():
     parser = argparse.ArgumentParser()
     parser.add_argument('--device', type=str, default=None)
     except Exception as e:
         print(f"Audio extraction error: {str(e)}")
         audio_path = ""  # Set to "" if extraction fails
     # extract frames
     try:
         print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
     image_size = (frames[0].shape[0],frames[0].shape[1])
+    # [remove for local demo] resize if resolution too big
+    # if image_size[0]>=1280 and image_size[0]>=1280:
+    #     scale = 1080 / min(image_size)
+    #     new_w = int(image_size[1] * scale)
+    #     new_h = int(image_size[0] * scale)
+    #     # update frames
+    #     frames = [cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA) for f in frames]
+    #     # update image_size
+    #     image_size = (frames[0].shape[0],frames[0].shape[1])
     # initialize video_state
     video_state = {
     video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
     model.samcontroler.sam_controler.reset_image()
     model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
+    return video_state, video_info, video_state["origin_images"][0], gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
                         gr.update(visible=True), gr.update(visible=True), \
                         gr.update(visible=True), gr.update(visible=True),\
                         gr.update(visible=True), gr.update(visible=True), \
     foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
     foreground_output = Image.fromarray(foreground[-1])
     alpha_output = Image.fromarray(alpha[-1][:,:,0])
     return foreground_output, alpha_output
 # video matting
     foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
     alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
     return foreground_output, alpha_output
     'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
     'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
 }
+checkpoint_folder = os.path.join('..', 'pretrained_models')
 sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
 # initialize sams
 model = MaskGenerator(sam_checkpoint, args)
 # initialize matanyone
+pretrain_model_url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
+ckpt_path = load_file_from_url(pretrain_model_url, checkpoint_folder)
+matanyone_model = get_matanyone_model(ckpt_path, args.device)
 matanyone_model = matanyone_model.to(args.device).eval()
+# matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
 # download test samples
+test_sample_path = os.path.join('.', "test_sample/")
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0-720p.mp4', test_sample_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1-720p.mp4', test_sample_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample2-720p.mp4', test_sample_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample3-720p.mp4', test_sample_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0.jpg', test_sample_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1.jpg', test_sample_path)
 # download assets
+assets_path = os.path.join('.', "assets/")
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_single_target.mp4', assets_path)
+load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_multi_targets.mp4', assets_path)
 # documents
 title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
                 with gr.Row():
                     with gr.Column():
                         gr.Markdown("### Case 1: Single Target")
+                        gr.Video(value="./assets/tutorial_single_target.mp4", elem_classes="video")
                     with gr.Column():
                         gr.Markdown("### Case 2: Multiple Targets")
+                        gr.Video(value="./assets/tutorial_multi_targets.mp4", elem_classes="video")
     with gr.Tabs():
         with gr.TabItem("Video"):
     gr.Markdown(article)
 demo.queue()
+demo.launch(share=True, debug=True)