Spaces:

ricklon
/

test_pyan

Sleeping

App Files Files Community

ricklon commited on Oct 8, 2024

Commit

1bc4ac3

verified ·

1 Parent(s): 2b446d2

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -108

app.py CHANGED Viewed

@@ -14,8 +14,8 @@ import pyannote.audio
 import sys
 import traceback
-# Set page title
-st.set_page_config(page_title="Optimized Speaker Diarization App")
 st.title("Optimized Speaker Diarization App")
@@ -62,7 +62,8 @@ def check_hf_api():
         response = requests.get(api_url, headers=headers)
         response.raise_for_status()
         st.success("Successfully connected to Hugging Face API")
-        st.json(response.json())
     except requests.exceptions.RequestException as e:
         st.error(f"Error connecting to Hugging Face API: {str(e)}")
         if response.status_code == 403:
@@ -129,67 +130,76 @@ def load_pipeline():
         raise e
-# File uploader
-uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
-# Advanced options
-st.sidebar.header("Advanced Options")
-num_speakers = st.sidebar.number_input("Number of speakers (0 for auto)", min_value=0, value=0)
-min_speakers = st.sidebar.number_input("Minimum number of speakers", min_value=1, value=1)
-max_speakers = st.sidebar.number_input("Maximum number of speakers", min_value=1, value=5)
-if verify_token(HF_TOKEN):
-    check_hf_api()
-    verify_model_files()
-    try:
-        pipeline = load_pipeline()
-    except Exception as e:
-        st.error("Failed to load pipeline. Please check the error messages above.")
-        st.stop()
-else:
-    st.stop()
-if uploaded_file is not None:
-    # Save uploaded file temporarily
-    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
-        tmp_file.write(uploaded_file.getvalue())
-        tmp_path = tmp_file.name
-    try:
-        # Set up progress hook
-        progress_text = st.empty()
-        progress_bar = st.progress(0)
-        def progress_hook(step: int, total: int, stage: str):
-            progress_text.text(f"Processing: {stage}")
-            progress_bar.progress(step / total)
-        # Run the pipeline on the audio file
-        with st.spinner('Processing audio...'):
-            diarization_args = {
-                "file": tmp_path,
-                "min_speakers": min_speakers,
-                "max_speakers": max_speakers,
-                "hook": ProgressHook(progress_hook)
-            }
-            if num_speakers > 0:
-                diarization_args["num_speakers"] = num_speakers
-            diarization = pipeline(**diarization_args)
-        # Rest of the code remains the same...
-        # Generate RTTM content
-        rttm_content = ""
-        for turn, _, speaker in diarization.itertracks(yield_label=True):
-            rttm_line = f"SPEAKER {os.path.basename(tmp_path)} 1 {turn.start:.3f} {turn.duration:.3f} <NA> <NA> {speaker} <NA> <NA>\n"
-            rttm_content += rttm_line
-        # Display RTTM content
-        st.subheader("Diarization Results (RTTM format)")
-        st.text_area("RTTM Output", rttm_content, height=300)
-        # Provide download button for RTTM file
         st.download_button(
             label="Download RTTM file",
             data=rttm_content,
@@ -197,58 +207,34 @@ if uploaded_file is not None:
             mime="text/plain"
         )
-        # Display additional information
-        st.subheader("Diarization Information")
-        st.write(f"Number of speakers detected: {len(diarization.labels())}")
-        # Visualize diarization
         if st.button("Visualize Diarization"):
             fig, ax = plt.subplots(figsize=(10, 2))
             notebook.plot_diarization(diarization, ax=ax)
             plt.tight_layout()
             st.pyplot(fig)
-    except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
-        st.error("Error details:")
-        st.code(traceback.format_exc())
-    # Clean up the temporary file
-    os.unlink(tmp_path)
-else:
-    st.info("Please upload an audio file to start.")
-# Display usage instructions
-st.sidebar.markdown("""
-## Usage Instructions
-1. Upload an audio file (WAV, MP3, or FLAC).
-2. Adjust advanced options if needed.
-3. Wait for the diarization process to complete.
-4. View and download the RTTM results.
-5. Optionally, visualize the diarization.
-""")
-# Display system information
-st.sidebar.markdown(f"""
-## System Information
-- Python version: {sys.version.split()[0]}
-- PyTorch version: {torch.__version__}
-- Pyannote Audio version: {pyannote.audio.__version__}
-- CUDA available: {torch.cuda.is_available()}
-- Device: {"CUDA" if torch.cuda.is_available() else "CPU"}
-""")
 # Token Permissions Instructions
-st.markdown("""
-## Token Permissions
-If you're encountering access issues, please ensure your Hugging Face token has the following permissions:
-1. Go to [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
-2. Find your token or create a new one
-3. Ensure "Read" access is granted
-4. Check the box for "Access to gated repositories"
-5. Save the changes and try again
-""")
 # Clear Cache Button
 if st.button("Clear Cache"):
@@ -258,9 +244,4 @@ if st.button("Clear Cache"):
         shutil.rmtree(cache_dir)
         st.success("Cache cleared successfully.")
     else:
-        st.info("No cache directory found.")
-# Debug Information
-st.subheader("Debug Information")
-st.write(f"Working directory: {os.getcwd()}")
-st.write(f"Files in working directory: {os.listdir()}")

 import sys
 import traceback
+# Set page configuration
+st.set_page_config(page_title="Optimized Speaker Diarization App", layout="wide")
 st.title("Optimized Speaker Diarization App")
         response = requests.get(api_url, headers=headers)
         response.raise_for_status()
         st.success("Successfully connected to Hugging Face API")
+        with st.expander("API Response"):
+            st.json(response.json())
     except requests.exceptions.RequestException as e:
         st.error(f"Error connecting to Hugging Face API: {str(e)}")
         if response.status_code == 403:
         raise e
+# Sidebar
+with st.sidebar:
+    st.header("Settings")
+    show_advanced = st.toggle("Show Advanced Options")
+    if show_advanced:
+        num_speakers = st.number_input("Number of speakers (0 for auto)", min_value=0, value=0)
+        min_speakers = st.number_input("Minimum number of speakers", min_value=1, value=1)
+        max_speakers = st.number_input("Maximum number of speakers", min_value=1, value=5)
+# Main content
+tab1, tab2, tab3 = st.tabs(["Upload & Process", "Results", "Visualization"])
+with tab1:
+    uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
+    if uploaded_file is not None:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
+            tmp_file.write(uploaded_file.getvalue())
+            tmp_path = tmp_file.name
+        try:
+            if verify_token(HF_TOKEN):
+                check_hf_api()
+                verify_model_files()
+                pipeline = load_pipeline()
+            else:
+                st.stop()
+            with st.status("Processing audio...", expanded=True) as status:
+                # Set up progress hook
+                def progress_hook(step: int, total: int, stage: str):
+                    status.update(label=f"Processing: {stage}", state="running")
+                    st.progress(step / total)
+                # Run the pipeline on the audio file
+                diarization_args = {
+                    "file": tmp_path,
+                    "min_speakers": min_speakers if show_advanced else 1,
+                    "max_speakers": max_speakers if show_advanced else 5,
+                    "hook": ProgressHook(progress_hook)
+                }
+                if show_advanced and num_speakers > 0:
+                    diarization_args["num_speakers"] = num_speakers
+                diarization = pipeline(**diarization_args)
+                status.update(label="Diarization complete!", state="complete")
+            # Generate RTTM content
+            rttm_content = ""
+            for turn, _, speaker in diarization.itertracks(yield_label=True):
+                rttm_line = f"SPEAKER {os.path.basename(tmp_path)} 1 {turn.start:.3f} {turn.duration:.3f} <NA> <NA> {speaker} <NA> <NA>\n"
+                rttm_content += rttm_line
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+            st.error("Error details:")
+            st.code(traceback.format_exc())
+        # Clean up the temporary file
+        os.unlink(tmp_path)
+with tab2:
+    if 'diarization' in locals():
+        st.subheader("Diarization Results")
+        st.metric("Number of speakers detected", len(diarization.labels()))
+        with st.expander("RTTM Output"):
+            st.text_area("RTTM Content", rttm_content, height=300)
         st.download_button(
             label="Download RTTM file",
             data=rttm_content,
             mime="text/plain"
         )
+with tab3:
+    if 'diarization' in locals():
         if st.button("Visualize Diarization"):
             fig, ax = plt.subplots(figsize=(10, 2))
             notebook.plot_diarization(diarization, ax=ax)
             plt.tight_layout()
             st.pyplot(fig)
+# Debug Information
+with st.expander("Debug Information"):
+    st.write(f"Working directory: {os.getcwd()}")
+    st.write(f"Files in working directory: {os.listdir()}")
+    st.write(f"Python version: {sys.version.split()[0]}")
+    st.write(f"PyTorch version: {torch.__version__}")
+    st.write(f"Pyannote Audio version: {pyannote.audio.__version__}")
+    st.write(f"CUDA available: {torch.cuda.is_available()}")
+    st.write(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
 # Token Permissions Instructions
+with st.expander("Token Permissions"):
+    st.markdown("""
+    If you're encountering access issues, please ensure your Hugging Face token has the following permissions:
+    1. Go to [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
+    2. Find your token or create a new one
+    3. Ensure "Read" access is granted
+    4. Check the box for "Access to gated repositories"
+    5. Save the changes and try again
+    """)
 # Clear Cache Button
 if st.button("Clear Cache"):
         shutil.rmtree(cache_dir)
         st.success("Cache cleared successfully.")
     else:
+        st.info("No cache directory found.")