Spaces:

pasxalisag
/

Codey-Bryant

Sleeping

App Files Files Community

pasxalisag commited on Dec 4, 2025

Commit

d75ea89

verified ·

1 Parent(s): 9f0aafb

Upload 2 files

Browse files

Files changed (1) hide show

app.py +117 -60

app.py CHANGED Viewed

@@ -410,7 +410,7 @@ Improved:"""
         return final[:k]
     def answer_stream(self, text: str) -> Iterator[str]:
-        """Stream answer - same implementation"""
         retrieved = self.retrieve_enhanced(text, k=3)
         context = ""
@@ -419,22 +419,60 @@ Improved:"""
             ans = meta["answer"][:200]
             context = f"Reference example:\nQ: {q}\nA: {ans}\n\n"
         messages = [
-            {"role": "system", "content": "You are a concise, accurate Python coding assistant. Use the reference if helpful." + context},
             {"role": "user", "content": text}
         ]
-        prompt = self.generator.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = self.generator.tokenizer(prompt, return_tensors="pt").to(DEVICE)
-        streamer = TextIteratorStreamer(self.generator.tokenizer, skip_prompt=True, skip_special_tokens=True)
-        thread = Thread(target=self.generator.model.generate, kwargs=dict(
-            **inputs, streamer=streamer, generation_config=self.generator.generation_config
-        ))
         thread.start()
         for token in streamer:
             yield token
         thread.join()
 # 4) Gradio UI (Optimized for Hugging Face)
@@ -442,38 +480,36 @@ Improved:"""
 ASSISTANT: Optional[HybridCodeAssistant] = None
 def initialize_assistant():
-    """Initialize assistant with progress tracking"""
     global ASSISTANT
     if ASSISTANT is None:
-        yield "Initializing Codey Bryant 3.0..."
-        yield "Loading retrieval system..."
         ASSISTANT = HybridCodeAssistant()
-        yield "Codey Bryant 3.0 Ready!"
-        yield "SOTA RAG Features: HyDE + Query Rewriting + Multi-Query + Answer-Space Retrieval"
-        yield "Ask coding questions like: 'it's not working', 'help with error', 'make it faster'"
     else:
-        yield "Assistant already initialized!"
-def chat(message: str, history: list):
-    """Chat function with error handling"""
     if ASSISTANT is None:
-        yield "Please click 'Initialize Assistant' first!"
-        return
-    # Append user message
     history.append([message, ""])
-    yield history
-    # Stream response
     try:
-        response = ""
         for token in ASSISTANT.answer_stream(message):
-            response += token
-            history[-1][1] = response
             yield history
     except Exception as e:
-        logger.error(f"Chat error: {e}")
-        history[-1][1] = f"Error: {str(e)}"
         yield history
 # 4) Gradio UI
@@ -522,60 +558,81 @@ if __name__ == "__main__":
     server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
     server_port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
-    # SIMPLE, WORKING UI
-    with gr.Blocks() as demo:
-        gr.Markdown("# 🤖 Codey Bryant 3.0")
-        gr.Markdown("Python Coding Assistant with SOTA RAG")
-        # Status and initialization
-        status = gr.Textbox("Click 'Initialize' to start", label="Status", interactive=False)
-        init_btn = gr.Button("🚀 Initialize", variant="primary")
         # Chat interface
-        chatbot = gr.Chatbot(label="Conversation")
-        msg = gr.Textbox(placeholder="Type your Python question here...", label="Your Question")
-        submit = gr.Button("Send")
-        clear = gr.Button("Clear")
         # Event handlers
-        def init_and_enable():
-            yield from initialize_assistant()
-            return gr.update(interactive=True)
-        init_btn.click(
-            init_and_enable,
-            outputs=[status]
-        )
-        def add_message(message, history):
-            return "", history + [[message, None]]
-        def respond(message, history):
-            for response in chat(message, history):
-                yield response
-        # Connect the send button
-        submit.click(
-            add_message,
             [msg, chatbot],
             [msg, chatbot]
         ).then(
-            respond,
             [msg, chatbot],
             chatbot
         )
-        # Also connect Enter key
         msg.submit(
-            add_message,
             [msg, chatbot],
             [msg, chatbot]
         ).then(
-            respond,
             [msg, chatbot],
             chatbot
         )
-        clear.click(lambda: [], None, chatbot)
-    demo.launch(server_name=server_name, server_port=server_port, share=False)

         return final[:k]
     def answer_stream(self, text: str) -> Iterator[str]:
+        """Stream answer with proper message formatting"""
         retrieved = self.retrieve_enhanced(text, k=3)
         context = ""
             ans = meta["answer"][:200]
             context = f"Reference example:\nQ: {q}\nA: {ans}\n\n"
+        # Create properly formatted messages
+        system_content = "You are a concise, accurate Python coding assistant. " + context.strip()
+        # Format messages for TinyLlama chat template
         messages = [
             {"role": "user", "content": text}
         ]
+        # Add system message if context exists
+        if context:
+            messages.insert(0, {"role": "system", "content": system_content})
+        # Debug: Print messages format
+        logger.debug(f"Messages format: {messages}")
+        try:
+            # Apply chat template
+            prompt = self.generator.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            logger.debug(f"Generated prompt length: {len(prompt)}")
+        except Exception as e:
+            logger.error(f"Error applying chat template: {e}")
+            # Fallback: Use simple formatting
+            if context:
+                prompt = f"<|system|>\n{system_content}</s>\n<|user|>\n{text}</s>\n<|assistant|>\n"
+            else:
+                prompt = f"<|user|>\n{text}</s>\n<|assistant|>\n"
         inputs = self.generator.tokenizer(prompt, return_tensors="pt").to(DEVICE)
+        streamer = TextIteratorStreamer(
+            self.generator.tokenizer,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        generation_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            generation_config=self.generator.generation_config,
+            max_new_tokens=300
+        )
+        thread = Thread(target=self.generator.model.generate, kwargs=generation_kwargs)
         thread.start()
         for token in streamer:
             yield token
         thread.join()
 # 4) Gradio UI (Optimized for Hugging Face)
 ASSISTANT: Optional[HybridCodeAssistant] = None
 def initialize_assistant():
+    """Initialize assistant"""
     global ASSISTANT
     if ASSISTANT is None:
         ASSISTANT = HybridCodeAssistant()
+        return "Codey Bryant 3.0 Ready! Ask your Python questions below."
     else:
+        return "Assistant already initialized!"
+def chat(message: str, history):
+    """Chat function with proper history handling"""
     if ASSISTANT is None:
+        # If not initialized, return error message
+        history.append([message, "Please click 'Initialize Assistant' first!"])
+        return history
+    # Append user message to history
     history.append([message, ""])
     try:
+        # Stream the response
+        response_text = ""
         for token in ASSISTANT.answer_stream(message):
+            response_text += token
+            # Update the last message in history
+            history[-1] = [message, response_text]
             yield history
     except Exception as e:
+        logger.error(f"Error generating response: {e}")
+        history[-1] = [message, f"Error: {str(e)}"]
         yield history
 # 4) Gradio UI
     server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
     server_port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
+    # Create a simple, robust UI
+    with gr.Blocks(title="Codey Bryant 3.0") as demo:
+        gr.Markdown("""
+        # 🤖 Codey Bryant 3.0
+        ## SOTA RAG Python Coding Assistant
+        **Features:** HyDE + Query Rewriting + Multi-Query + Answer-Space Retrieval
+        """)
+        # Status display
+        status = gr.Markdown("### Status: Click 'Initialize Assistant' to start")
+        # Initialize button
+        init_btn = gr.Button("🚀 Initialize Assistant", variant="primary")
         # Chat interface
+        chatbot = gr.Chatbot(height=500)
+        # Input and send
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="Ask Python coding questions...",
+                label="Your Question",
+                scale=4
+            )
+            submit_btn = gr.Button("Send", variant="secondary", scale=1)
+        # Clear button
+        clear_btn = gr.Button("Clear Chat")
         # Event handlers
+        def on_init():
+            return initialize_assistant()
+        init_btn.click(on_init, outputs=status)
+        def process_message(message, chat_history):
+            # Add user message
+            chat_history.append([message, ""])
+            return "", chat_history
+        def generate_response(message, chat_history):
+            # Generate assistant response
+            for updated_history in chat(message, chat_history):
+                yield updated_history
+        # Connect submit button
+        submit_btn.click(
+            process_message,
             [msg, chatbot],
             [msg, chatbot]
         ).then(
+            generate_response,
             [msg, chatbot],
             chatbot
         )
+        # Connect Enter key
         msg.submit(
+            process_message,
             [msg, chatbot],
             [msg, chatbot]
         ).then(
+            generate_response,
             [msg, chatbot],
             chatbot
         )
+        # Clear chat
+        clear_btn.click(lambda: [], None, chatbot)
+    # Launch the app
+    logger.info(f"Starting Codey Bryant 3.0 on {server_name}:{server_port}")
+    demo.launch(
+        server_name=server_name,
+        server_port=server_port,
+        share=False
+    )