Spaces:

MCP-1st-Birthday
/

Hivenet_ComputeAgent

Sleeping

App Files Files Community

carraraig commited on Nov 28, 2025

Commit

8816dfd

1 Parent(s): a8d3d79

Hello

Browse files

Files changed (43) hide show

ComputeAgent/ComputeAgent.png +0 -0
ComputeAgent/basic_agent_graph.png +0 -0
ComputeAgent/chains/tool_result_chain.py +240 -0
ComputeAgent/compute_agent_graph.png +0 -0
ComputeAgent/graph/__init__.py +0 -0
ComputeAgent/graph/basic_agent_graph.png +0 -0
ComputeAgent/graph/graph.py +411 -0
ComputeAgent/graph/graph_ReAct.py +331 -0
ComputeAgent/graph/graph_deploy.py +363 -0
ComputeAgent/graph/state.py +84 -0
ComputeAgent/hivenet.jpg +0 -0
ComputeAgent/main.py +284 -0
ComputeAgent/models/__init__.py +0 -0
ComputeAgent/models/doc.py +55 -0
ComputeAgent/models/model_manager.py +100 -0
ComputeAgent/models/model_router.py +146 -0
ComputeAgent/nodes/ReAct/__init__.py +58 -0
ComputeAgent/nodes/ReAct/agent_reasoning_node.py +399 -0
ComputeAgent/nodes/ReAct/auto_approval_node.py +81 -0
ComputeAgent/nodes/ReAct/decision_functions.py +135 -0
ComputeAgent/nodes/ReAct/generate_node.py +510 -0
ComputeAgent/nodes/ReAct/human_approval_node.py +284 -0
ComputeAgent/nodes/ReAct/tool_execution_node.py +190 -0
ComputeAgent/nodes/ReAct/tool_rejection_exit_node.py +93 -0
ComputeAgent/nodes/ReAct_DeployModel/__init__.py +13 -0
ComputeAgent/nodes/ReAct_DeployModel/capacity_approval.py +183 -0
ComputeAgent/nodes/ReAct_DeployModel/capacity_estimation.py +387 -0
ComputeAgent/nodes/ReAct_DeployModel/extract_model_info.py +291 -0
ComputeAgent/nodes/ReAct_DeployModel/generate_additional_info.py +83 -0
ComputeAgent/nodes/__init__.py +0 -0
ComputeAgent/routers/compute_agent_HITL.py +590 -0
ComputeAgent/vllm_engine_args.py +325 -0
Compute_MCP/api_data_structure.py +398 -0
Compute_MCP/main.py +16 -0
Compute_MCP/tools.py +96 -0
Compute_MCP/utils.py +26 -0
Dockerfile +29 -0
Gradio_interface.py +1374 -0
README.md +12 -4
constant.py +195 -0
logging_setup.py +73 -0
requirements.txt +21 -0
run.sh +21 -0

ComputeAgent/ComputeAgent.png ADDED Viewed

ComputeAgent/basic_agent_graph.png ADDED Viewed

ComputeAgent/chains/tool_result_chain.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Tool Result Chain Module for ReAct Workflow
+This module implements the ToolResultChain class, which serves as a specialized
+response generation component within the ReAct (Reasoning and Acting) workflow
+for synthesizing responses from non-researcher tool execution results.
+The ToolResultChain provides professional formatting and comprehensive response
+generation for various tool outputs (math calculations, file operations, API calls,
+etc.) while maintaining the same quality standards as ResearcherChain and DirectAnswerChain.
+The ToolResultChain is used when the ReAct workflow has executed tools other than
+the researcher tool and needs to create a well-formatted, contextual response
+that integrates tool results with conversation memory and user intent.
+Key Features:
+- Tool result synthesis with professional formatting
+- Memory context integration for personalized responses
+- Comprehensive formatting consistent with other chains
+- Support for multiple tool result types (JSON, text, structured data)
+- Timezone-aware timestamp integration
+- Professional markdown structure for consistency
+- Integration with HiveGPT system prompts for unified behavior
+Author: HiveNetCode
+License: Private
+"""
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from zoneinfo import ZoneInfo
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_openai import ChatOpenAI
+from constant import Constants
+class ToolResultChain:
+    """
+    Specialized chain optimized for tool result synthesis within ReAct workflow.
+    This class implements a tool result-based response generation system specifically
+    designed for the ReAct (Reasoning and Acting) workflow pattern. It provides responses
+    that synthesize and contextualize tool execution results into comprehensive,
+    user-friendly answers that maintain professional presentation standards.
+    The ToolResultChain handles various tool output types and ensures:
+    - Professional formatting consistent with other chains
+    - Integration of tool results with user context and intent
+    - Memory-aware response generation
+    - Comprehensive explanations that go beyond raw tool output
+    This chain is typically used when the ReAct workflow has executed tools like:
+    - Mathematical calculations
+    - File operations
+    - API calls
+    - Data processing tools
+    - System utilities
+    And needs to present the results in a user-friendly, contextual manner.
+    """
+    def __init__(self, llm: ChatOpenAI):
+        """
+        Initialize the ToolResultChain with language model and prompt configuration.
+        Args:
+            llm: ChatOpenAI instance configured for response generation.
+                Should be the same model type used in other chains for consistency.
+        """
+        self.llm = llm
+        # Build the system prompt for tool result synthesis
+        tool_result_system_prompt = self._build_tool_result_system_prompt()
+        # Create the prompt template for tool result responses
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", tool_result_system_prompt),
+            ("human", self._get_human_message_template())
+        ])
+        # Build the complete processing chain
+        self.chain = self.prompt | self.llm | StrOutputParser()
+    def _build_tool_result_system_prompt(self) -> str:
+        """
+        Construct the complete system prompt for tool result synthesis.
+        Returns:
+            Complete system prompt combining HiveGPT base behavior with tool result instructions
+        """
+        return Constants.GENERAL_SYSTEM_PROMPT + r"""
+        ## TOOL RESULT SYNTHESIS INSTRUCTIONS
+        **YOU ARE SYNTHESIZING AND PRESENTING TOOL EXECUTION RESULTS.**
+        - **ANALYZE** the provided tool results and understand what was accomplished.
+        - **CONTEXTUALIZE** the results within the user's original query and intent.
+        - **PROVIDE** comprehensive explanations that go beyond just presenting raw data.
+        - **INTEGRATE** conversation context to make responses personalized and relevant.
+        - **FORMAT** responses with appropriate markdown structure for professional presentation.
+        ### Response Quality Guidelines
+        - **Explain what was done**: Clearly describe what tool(s) were executed and why.
+        - **Present results clearly**: Format tool outputs in a user-friendly way.
+        - **Provide context**: Explain the significance or implications of the results.
+        - **Answer the user's intent**: Address the underlying question, not just the tool output.
+        - **Use professional formatting**: Employ headers, lists, code blocks as appropriate.
+        ### Tool Result Processing
+        - **Parse and understand** different tool output formats (JSON, text, structured data).
+        - **Extract key information** and present it in an organized manner.
+        - **Explain technical details** in terms accessible to the user.
+        - **Connect results** to the user's original question or request.
+        - **Provide next steps** or additional insights when relevant.
+        ### Professional Presentation Standards
+        - Match the formatting quality and structure used in document-based responses
+        - Provide explanations that demonstrate understanding of the tool's purpose
+        - Include practical context that helps the user understand the results
+        - Maintain consistency with HiveGPT's helpful and informative persona
+        - Use clear, professional language appropriate for the context
+        - **NEVER include technical identifiers, call IDs, or internal system references in your response**
+        - Focus on the content and meaning, not the technical implementation details
+        """
+    def _get_human_message_template(self) -> str:
+        """
+        Get the human message template for tool result synthesis.
+        Returns:
+            Template string for structuring tool results with user context
+        """
+        return """**CURRENT DATE/TIME:** {currentDateTime}
+**ORIGINAL USER QUERY:**
+{query}
+**TOOL EXECUTION RESULTS:**
+{tool_results}
+**CONVERSATION CONTEXT:**
+{memory_context}
+Please synthesize the tool execution results into a comprehensive, well-formatted response that addresses the user's original query. Explain what was accomplished, present the results clearly, and provide context that helps the user understand the significance of the results."""
+    async def ainvoke(self, query: str, tool_results: List[Any], memory_context: Optional[str] = None) -> str:
+        """
+        Generate a comprehensive response by synthesizing tool execution results.
+        This method processes tool execution results and creates a well-formatted,
+        contextual response that integrates the results with the user's original
+        intent and conversation context.
+        The response generation process:
+        1. Analyzes and formats tool results for presentation
+        2. Integrates conversation context for personalization
+        3. Synthesizes results into a comprehensive explanation
+        4. Applies professional formatting for clarity
+        5. Ensures the response addresses the user's underlying intent
+        Args:
+            query: The user's original question or request that triggered tool execution.
+                  Used to ensure the response addresses the user's actual intent.
+            tool_results: List of tool execution results from various tools. Can include
+                         different formats (JSON strings, text, structured objects).
+            memory_context: Optional conversation context to personalize the response
+                           and maintain conversation continuity.
+        Returns:
+            A comprehensive, well-formatted response that synthesizes tool results
+            into a user-friendly explanation with professional presentation.
+        Raises:
+            Exception: If response generation fails, returns an error message with
+                      tool results preserved for debugging and transparency.
+        Example:
+            >>> chain = ToolResultChain(llm)
+            >>> tool_results = [{"status": "success", "result": 42}]
+            >>> response = await chain.ainvoke("Calculate 6*7", tool_results)
+            >>> print(response)  # Comprehensive formatted response explaining the calculation
+        """
+        try:
+            # Get current timestamp for temporal context
+            current_time = datetime.now(ZoneInfo("Europe/Rome")).strftime("%Y-%m-%d %H:%M:%S %Z")
+            # Format tool results for presentation
+            formatted_tool_results = self._format_tool_results(tool_results)
+            # Prepare memory context
+            context_text = memory_context if memory_context else "No previous conversation context available."
+            # Execute the tool result synthesis chain
+            result = await self.chain.ainvoke({
+                "query": query,
+                "tool_results": formatted_tool_results,
+                "memory_context": context_text,
+                "currentDateTime": current_time
+            })
+            return result
+        except Exception as e:
+            # Provide comprehensive error handling while preserving tool results
+            error_message = (
+                f"I was able to execute the requested tools, but encountered an issue synthesizing the response: {str(e)}\n\n"
+                f"Tool execution results: {self._format_tool_results(tool_results)}\n\n"
+                f"Your original query: {query}\n\n"
+                f"Please try rephrasing your question or contact support if the issue persists."
+            )
+            return error_message
+    def _format_tool_results(self, tool_results: List[Any]) -> str:
+        """
+        Format tool results for presentation in the prompt.
+        Args:
+            tool_results: List of tool execution results in various formats
+        Returns:
+            Formatted string representation of tool results without technical IDs
+        """
+        if not tool_results:
+            return "No tool results available."
+        formatted_results = []
+        for i, result in enumerate(tool_results, 1):
+            if hasattr(result, 'content'):
+                # Tool message with content attribute - exclude tool_call_id from user-facing content
+                content = result.content
+                formatted_results.append(f"Tool {i} Result:\n{content}")
+            elif isinstance(result, dict):
+                # Dictionary result
+                formatted_results.append(f"Tool {i} Result:\n{str(result)}")
+            else:
+                # Other formats
+                formatted_results.append(f"Tool {i} Result:\n{str(result)}")
+        return "\n\n".join(formatted_results)

ComputeAgent/compute_agent_graph.png ADDED Viewed

ComputeAgent/graph/__init__.py ADDED Viewed

File without changes

ComputeAgent/graph/basic_agent_graph.png ADDED Viewed

ComputeAgent/graph/graph.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+Basic Agent Main Graph Module (FastAPI Compatible - Minimal Changes)
+This module implements the core workflow graph for the Basic Agent system.
+It defines the agent's decision-making flow between model deployment and
+React-based compute workflows.
+CHANGES FROM ORIGINAL:
+- __init__ now accepts optional tools and llm parameters
+- Added async create() classmethod for FastAPI
+- Fully backwards compatible with existing CLI code
+Author: Your Name
+License: Private
+"""
+import asyncio
+from typing import Dict, Any, List, Optional
+import uuid
+import json
+import logging
+from langgraph.graph import StateGraph, END, START
+from typing_extensions import TypedDict
+from constant import Constants
+# Import node functions (to be implemented in separate files)
+from langgraph.checkpoint.memory import MemorySaver
+from graph.graph_deploy import DeployModelAgent
+from graph.graph_ReAct import ReactWorkflow
+from models.model_manager import ModelManager
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from graph.state import AgentState
+# Initialize model manager for dynamic LLM loading and management
+model_manager = ModelManager()
+# Global MemorySaver (persists state across requests)
+memory_saver = MemorySaver()
+logger = logging.getLogger("ComputeAgent")
+mcp_client = MultiServerMCPClient(
+    {
+        "hivecompute": {
+            "command": "python",
+            "args": ["/home/hivenet/Compute_MCP/main.py"],
+            "transport": "stdio"
+        }
+    }
+)
+class ComputeAgent:
+    """
+    Main Compute Agent class providing AI-powered decision routing and execution.
+    This class orchestrates the complete agent workflow including:
+    - Decision routing between model deployment and React agent
+    - Model deployment workflow with capacity estimation and approval
+    - React agent execution with compute capabilities
+    - Error handling and state management
+    Attributes:
+        graph: Compiled LangGraph workflow
+        model_name: Default model name for operations
+    Usage:
+        # For CLI (backwards compatible):
+        agent = ComputeAgent()
+        # For FastAPI (async):
+        agent = await ComputeAgent.create()
+    """
+    def __init__(self, tools=None, llm=None):
+        """
+        Initialize Compute Agent with optional pre-loaded dependencies.
+        Args:
+            tools: Pre-loaded MCP tools (optional, will load if not provided)
+            llm: Pre-loaded LLM model (optional, will load if not provided)
+        """
+        # If tools/llm not provided, load them synchronously (for CLI)
+        if tools is None:
+            self.tools = asyncio.run(mcp_client.get_tools())
+        else:
+            self.tools = tools
+        if llm is None:
+            self.llm = asyncio.run(model_manager.load_llm_model(Constants.DEFAULT_LLM_FC))
+        else:
+            self.llm = llm
+        self.deploy_subgraph = DeployModelAgent(llm=self.llm, react_tools=self.tools)
+        self.react_subgraph = ReactWorkflow(llm=self.llm, tools=self.tools)
+        self.graph = self._create_graph()
+    @classmethod
+    async def create(cls):
+        """
+        Async factory method for creating ComputeAgent.
+        Use this in FastAPI to avoid asyncio.run() issues.
+        Returns:
+            Initialized ComputeAgent instance
+        """
+        logger.info("🔧 Loading tools and LLM asynchronously...")
+        tools = await mcp_client.get_tools()
+        llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_FC)
+        # Initialize DeployModelAgent with its own tools
+        deploy_subgraph = await DeployModelAgent.create(llm=llm, custom_tools=None)
+        return cls(tools=tools, llm=llm)
+    async def decision_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Node that handles routing decisions for the ComputeAgent workflow.
+        Analyzes the user query to determine whether to route to:
+        - Model deployment workflow (deploy_model)
+        - React agent workflow (react_agent)
+        Args:
+            state: Current agent state with memory fields
+        Returns:
+            Updated state with routing decision
+        """
+        # Get user context
+        user_id = state.get("user_id", "")
+        session_id = state.get("session_id", "")
+        query = state.get("query", "")
+        logger.info(f"🎯 Decision node processing query for {user_id}:{session_id}")
+        # Build memory context for decision making
+        memory_context = ""
+        if user_id and session_id:
+            try:
+                from helpers.memory import get_memory_manager
+                memory_manager = get_memory_manager()
+                memory_context = await memory_manager.build_context_for_node(user_id, session_id, "decision")
+                if memory_context:
+                    logger.info(f"🧠 Using memory context for decision routing")
+            except Exception as e:
+                logger.warning(f"⚠️ Could not load memory context for decision: {e}")
+        try:
+            # Create a simple LLM for decision making
+            # Load main LLM using ModelManager
+            llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_NAME)
+            # Create decision prompt
+            decision_system_prompt = f"""
+            You are a routing assistant for ComputeAgent. Analyze the user's query and decide which workflow to use.
+            Choose between:
+            1. DEPLOY_MODEL - For queries about deploy AI model from HuggingFace. In this case the user MUST specify the model card name (like meta-llama/Meta-Llama-3-70B).
+                - The user can specify the hardware capacity needed.
+                - The user can ask for model analysis, deployment steps, or capacity estimation.
+            2. REACT_AGENT - For all the rest of queries.
+            {f"Conversation Context: {memory_context}" if memory_context else "No conversation context available."}
+            User Query: {query}
+            Respond with only: "DEPLOY_MODEL" or "REACT_AGENT"
+            """
+            # Get routing decision
+            decision_response = await llm.ainvoke([
+                SystemMessage(content=decision_system_prompt)
+            ])
+            routing_decision = decision_response.content.strip().upper()
+            # Validate and set decision
+            if "DEPLOY_MODEL" in routing_decision:
+                agent_decision = "deploy_model"
+                logger.info(f"📦 Routing to model deployment workflow")
+            elif "REACT_AGENT" in routing_decision:
+                agent_decision = "react_agent"
+                logger.info(f"⚛️ Routing to React agent workflow")
+            else:
+                # Default fallback to React agent for general queries
+                agent_decision = "react_agent"
+                logger.warning(f"⚠️ Ambiguous routing decision '{routing_decision}', defaulting to React agent")
+            # Update state with decision
+            updated_state = state.copy()
+            updated_state["agent_decision"] = agent_decision
+            updated_state["current_step"] = "decision_complete"
+            logger.info(f"✅ Decision node complete: {agent_decision}")
+            return updated_state
+        except Exception as e:
+            logger.error(f"❌ Error in decision node: {e}")
+            # Update state with fallback decision
+            updated_state = state.copy()
+            updated_state["error"] = f"Decision error (fallback used): {str(e)}"
+            return updated_state
+    def _create_graph(self) -> StateGraph:
+        """
+        Create and configure the Compute Agent workflow graph.
+        This method builds the complete workflow including:
+        1. Initial decision node - routes to deployment or React agent
+        2. Model deployment path:
+           - Fetch model card from HuggingFace
+           - Extract model information
+           - Estimate capacity requirements
+           - Human approval checkpoint
+           - Deploy model or provide info
+        3. React agent path:
+           - Execute React agent with compute MCP capabilities
+        Returns:
+            Compiled StateGraph ready for execution
+        """
+        workflow = StateGraph(AgentState)
+        # Add decision node
+        workflow.add_node("decision", self.decision_node)
+        # Add model deployment workflow nodes
+        workflow.add_node("deploy_model", self.deploy_subgraph.get_compiled_graph())
+        # Add React agent node
+        workflow.add_node("react_agent", self.react_subgraph.get_compiled_graph())
+        # Set entry point
+        workflow.set_entry_point("decision")
+        # Add conditional edges from decision node
+        workflow.add_conditional_edges(
+            "decision",
+            lambda state: state["agent_decision"],
+            {
+                "deploy_model": "deploy_model",
+                "react_agent": "react_agent",
+            }
+        )
+        # Add edges to END
+        workflow.add_edge("deploy_model", END)
+        workflow.add_edge("react_agent", END)
+        # Compile with checkpointer
+        return workflow.compile(checkpointer=memory_saver)
+    def get_compiled_graph(self):
+        """Return the compiled graph for use in FastAPI"""
+        return self.graph
+    def invoke(self, query: str, user_id: str = "default_user", session_id: str = "default_session") -> Dict[str, Any]:
+        """
+        Execute the graph with a given query and memory context (synchronous wrapper for async).
+        Args:
+            query: User's query
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+        Returns:
+            Final result from the graph execution
+        """
+        return asyncio.run(self.ainvoke(query, user_id, session_id))
+    async def ainvoke(self, query: str, user_id: str = "default_user", session_id: str = "default_session") -> Dict[str, Any]:
+        """
+        Execute the graph with a given query and memory context (async).
+        Args:
+            query: User's query
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+        Returns:
+            Final result from the graph execution containing:
+            - response: Final response to user
+            - agent_decision: Which path was taken
+            - deployment_result: If deployment path was taken
+            - react_results: If React agent path was taken
+        """
+        initial_state = {
+            "user_id": user_id,
+            "session_id": session_id,
+            "query": query,
+            "response": "",
+            "current_step": "start",
+            "agent_decision": "",
+            "deployment_approved": False,
+            "model_name": "",
+            "model_card": {},
+            "model_info": {},
+            "capacity_estimate": {},
+            "deployment_result": {},
+            "react_results": {},
+            "tool_calls": [],
+            "tool_results": [],
+            "messages": [],
+            # Approval fields for ReactWorkflow
+            "pending_tool_calls": [],
+            "approved_tool_calls": [],
+            "rejected_tool_calls": [],
+            "modified_tool_calls": [],
+            "needs_re_reasoning": False,
+            "re_reasoning_feedback": ""
+        }
+        # Create config with thread_id for checkpointer
+        config = {
+            "configurable": {
+                "thread_id": f"{user_id}_{session_id}"
+            }
+        }
+        try:
+            result = await self.graph.ainvoke(initial_state, config)
+            return result
+        except Exception as e:
+            logger.error(f"Error in graph execution: {e}")
+            return {
+                **initial_state,
+                "error": str(e),
+                "error_step": initial_state.get("current_step", "unknown"),
+                "response": f"An error occurred during execution: {str(e)}"
+            }
+    async def astream_generate_nodes(self, query: str, user_id: str = "default_user", session_id: str = "default_session"):
+        """
+        Stream the graph execution node by node (async).
+        Args:
+            query: User's query
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+        Yields:
+            Dict containing node execution updates
+        """
+        initial_state = {
+            "user_id": user_id,
+            "session_id": session_id,
+            "query": query,
+            "response": "",
+            "current_step": "start",
+            "agent_decision": "",
+            "deployment_approved": False,
+            "model_name": "",
+            "model_card": {},
+            "model_info": {},
+            "capacity_estimate": {},
+            "deployment_result": {},
+            "react_results": {},
+            "tool_calls": [],
+            "tool_results": [],
+            "messages": [],
+            # Approval fields for ReactWorkflow
+            "pending_tool_calls": [],
+            "approved_tool_calls": [],
+            "rejected_tool_calls": [],
+            "modified_tool_calls": [],
+            "needs_re_reasoning": False,
+            "re_reasoning_feedback": ""
+        }
+        # Create config with thread_id for checkpointer
+        config = {
+            "configurable": {
+                "thread_id": f"{user_id}_{session_id}"
+            }
+        }
+        try:
+            # Stream through the graph execution
+            async for chunk in self.graph.astream(initial_state, config):
+                # Each chunk contains the node name and its output
+                for node_name, node_output in chunk.items():
+                    yield {
+                        "node": node_name,
+                        "output": node_output,
+                        **node_output  # Include all state updates
+                    }
+        except Exception as e:
+            logger.error(f"Error in graph streaming: {e}")
+            yield {
+                "error": str(e),
+                "status": "error",
+                "error_step": initial_state.get("current_step", "unknown")
+            }
+    def draw_graph(self, output_file_path: str = "basic_agent_graph.png"):
+        """
+        Generate and save a visual representation of the Basic Agent workflow graph.
+        Args:
+            output_file_path: Path where to save the graph PNG file
+        """
+        try:
+            self.graph.get_graph().draw_mermaid_png(output_file_path=output_file_path)
+            logger.info(f"✅ Basic Agent graph visualization saved to: {output_file_path}")
+        except Exception as e:
+            logger.error(f"❌ Failed to generate Basic Agent graph visualization: {e}")

ComputeAgent/graph/graph_ReAct.py ADDED Viewed

	@@ -0,0 +1,331 @@

+"""
+HiveGPT Agent ReAct Graph Module
+This module implements the ReAct workflow for the HiveGPT Agent system.
+It orchestrates agent reasoning, human approval, tool execution, and response refinement
+using LangGraph for workflow management and memory support.
+Key Features:
+- Human-in-the-loop approval for tool execution
+- MCP tool integration
+- Memory-enabled state management
+- Modular node functions for extensibility
+Author: HiveNetCode
+License: Private
+"""
+from typing import Sequence, Dict, Any
+from langchain_core.tools import BaseTool
+from langchain_core.messages import HumanMessage
+from langgraph.graph import StateGraph, END
+import logging
+from typing_extensions import TypedDict
+from typing import Dict, Any, Sequence, List
+from langchain_core.messages import BaseMessage
+from langchain_core.tools import BaseTool
+from langchain_openai.chat_models import ChatOpenAI
+from graph.state import AgentState
+from nodes.ReAct import (
+    agent_reasoning_node,
+    human_approval_node,
+    auto_approval_node,
+    tool_execution_node,
+    generate_node,
+    tool_rejection_exit_node,
+    should_continue_to_approval,
+    should_continue_after_approval,
+    should_continue_after_execution
+)
+logger = logging.getLogger("ReAct Workflow")
+# Global registries (to avoid serialization issues with checkpointer)
+# Nodes access tools and LLM from here instead of storing them in state
+_TOOLS_REGISTRY = {}
+_LLM_REGISTRY = {}
+# State class for ReAct workflow
+class ReactState(AgentState):
+    """
+    ReactState extends HiveGPTMemoryState to support ReAct workflow fields.
+    """
+    pass
+# Main workflow class for ReAct
+class ReactWorkflow:
+    """
+    Orchestrates the ReAct workflow:
+    1. Agent reasoning and tool selection
+    2. Human approval for tool execution
+    3. Tool execution (special handling for researcher tool)
+    4. Response refinement (skipped for researcher tool)
+    Features:
+    - MCP tool integration
+    - Human-in-the-loop approval for all tool calls
+    - Special handling for researcher tool (bypasses refinement, uses generate_node)
+    - Memory management with conversation summaries and recent message context
+    - Proper state management following AgenticRAG pattern
+    """
+    def __init__(self, llm, tools: Sequence[BaseTool]):
+        """
+        Initialize ReAct workflow with LLMs, tools, and optional memory checkpointer.
+        Args:
+            llm: Main LLM for reasoning (will be bound with tools)
+            refining_llm: LLM for response refinement
+            tools: Sequence of MCP tools for execution
+            checkpointer: Optional memory checkpointer for conversation memory
+        """
+        self.llm = llm.bind_tools(tools)
+        self.tools = tools
+        # Register tools and LLM in global registry to avoid serialization issues
+        # Nodes will access them from the registry instead of state
+        self.workflow_id = id(self)
+        _TOOLS_REGISTRY[self.workflow_id] = tools
+        _LLM_REGISTRY[self.workflow_id] = self.llm
+        logger.info(f"✅ Registered {len(tools)} tools and LLM in global registry (ID: {self.workflow_id})")
+        self.graph = self._create_graph()
+    def _initialize_react_state(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Initialize or update state with workflow_id.
+        The workflow_id is used to retrieve both tools and LLM from the global registry,
+        avoiding serialization issues with the checkpointer.
+        Args:
+            state: Current state (may be from parent graph)
+        Returns:
+            Updated state with workflow_id
+        """
+        updated_state = state.copy()
+        # Store workflow ID for registry lookup (both tools and LLM)
+        if not updated_state.get("workflow_id"):
+            updated_state["workflow_id"] = self.workflow_id
+            logger.info(f"✅ Workflow ID set in state: {self.workflow_id}")
+        # Initialize messages if empty (when coming from parent graph)
+        if not updated_state.get("messages"):
+            query = updated_state.get("query", "")
+            if query:
+                updated_state["messages"] = [HumanMessage(content=query)]
+                logger.info(f"💬 Initialized messages with query for ReACT workflow")
+            else:
+                updated_state["messages"] = []
+                logger.warning(f"⚠️ No query found to initialize messages")
+        return updated_state
+    def _create_graph(self) -> StateGraph:
+        """
+        Creates and configures the ReAct workflow graph with memory support.
+        Returns:
+            Compiled StateGraph for ReAct workflow
+        """
+        workflow = StateGraph(ReactState)
+        # Add initialization node to set up LLM and tools
+        workflow.add_node("initialize_react", self._initialize_react_state)
+        # Add nodes - REMOVED refinement node, always use generate for final response
+        workflow.add_node("agent_reasoning", agent_reasoning_node)
+        workflow.add_node("human_approval", human_approval_node)
+        workflow.add_node("auto_approval", auto_approval_node)
+        workflow.add_node("tool_execution", tool_execution_node)
+        workflow.add_node("generate", generate_node)
+        workflow.add_node("tool_rejection_exit", tool_rejection_exit_node)
+        # Set entry point - start with initialization
+        workflow.set_entry_point("initialize_react")
+        # Connect initialization to agent reasoning
+        workflow.add_edge("initialize_react", "agent_reasoning")
+        # Add conditional edges from agent reasoning
+        workflow.add_conditional_edges(
+            "agent_reasoning",
+            should_continue_to_approval,
+            {
+                "human_approval": "human_approval",
+                "auto_approval": "auto_approval",
+                "generate": "generate",  # Changed from refinement to generate
+            }
+        )
+        # Add conditional edges from human approval
+        workflow.add_conditional_edges(
+            "human_approval",
+            should_continue_after_approval,
+            {
+                "tool_execution": "tool_execution",
+                "tool_rejection_exit": "tool_rejection_exit",
+                "agent_reasoning": "agent_reasoning",  # For re-reasoning
+            }
+        )
+        # Add conditional edges from auto approval (for consistency with human approval)
+        workflow.add_conditional_edges(
+            "auto_approval",
+            should_continue_after_approval,
+            {
+                "tool_execution": "tool_execution",
+                "tool_rejection_exit": "tool_rejection_exit",
+                "agent_reasoning": "agent_reasoning",  # For re-reasoning
+            }
+        )
+        # Add conditional edges from tool execution
+        workflow.add_conditional_edges(
+            "tool_execution",
+            should_continue_after_execution,
+            {
+                "agent_reasoning": "agent_reasoning",
+                "generate": "generate",  # Always generate, never refinement
+            }
+        )
+        # Generate goes directly to END (response formatting is done in generate_node)
+        workflow.add_edge("generate", END)
+        # Generation goes directly to END (response formatting is done in generate_node)
+        workflow.add_edge("generate", END)
+        # Tool rejection exit goes to END
+        workflow.add_edge("tool_rejection_exit", END)
+        # Compile with memory checkpointer if provided
+        return workflow.compile()
+    def get_compiled_graph(self):
+        """Return the compiled graph for embedding in parent graph"""
+        return self.graph
+    async def ainvoke(self, query: str, user_id: str = "default_user", session_id: str = "default_session") -> Dict[str, Any]:
+        """
+        Execute the ReAct workflow with a given query and memory context (async version).
+        Args:
+            query: The user's question/request
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+        Returns:
+            Final state with response and execution details
+        """
+        initial_state = {
+            # Memory fields
+            "user_id": user_id,
+            "session_id": session_id,
+            "summary": "",  # Will be loaded from memory if available
+            # Core fields
+            "query": query,
+            "response": "",
+            "messages": [HumanMessage(content=query)],
+            # Tool-related state
+            "tools": self.tools,
+            "pending_tool_calls": [],
+            "approved_tool_calls": [],
+            "rejected_tool_calls": [],
+            "tool_results": [],
+            # LLM instances
+            "llm": self.llm,
+            # Flow control
+            "current_step": "initialized",
+            "skip_refinement": False,
+            "researcher_executed": False,
+            # Retrieved data (for researcher integration)
+            "retrieved_documents": [],
+            "search_results": "",
+            "web_search": "No",
+            # Final response formatting
+            "final_response_dict": {}
+        }
+        # Configure thread for memory if checkpointer is available
+        config = None
+        if self.checkpointer:
+            from helpers.memory import get_memory_manager
+            memory_manager = get_memory_manager()
+            thread_id = f"{user_id}:{session_id}"
+            config = {"configurable": {"thread_id": thread_id}}
+            # Add user message to memory
+            await memory_manager.add_user_message(user_id, session_id, query)
+        logger.info(f"🚀 Starting ReAct workflow for user {user_id}, session {session_id}")
+        if config:
+            result = await self.graph.ainvoke(initial_state, config)
+        else:
+            result = await self.graph.ainvoke(initial_state)
+        # Add AI response to memory if checkpointer is available
+        if self.checkpointer and result.get("response"):
+            from helpers.memory import get_memory_manager
+            memory_manager = get_memory_manager()
+            await memory_manager.add_ai_response(user_id, session_id, result["response"])
+        logger.info("✅ ReAct workflow completed successfully")
+        return result
+    def invoke(self, query: str, user_id: str = "default_user", session_id: str = "default_session") -> Dict[str, Any]:
+        """
+        Synchronous wrapper for async workflow with memory support.
+        Args:
+            query: The user's question/request
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+        Returns:
+            Final state with response and execution details
+        """
+        import asyncio
+        try:
+            # Try to get existing event loop
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                # If loop is running, create a task
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(asyncio.run, self.ainvoke(query, user_id, session_id))
+                    return future.result()
+            else:
+                # Run directly
+                return loop.run_until_complete(self.ainvoke(query, user_id, session_id))
+        except RuntimeError:
+            # No event loop, create new one
+            return asyncio.run(self.ainvoke(query, user_id, session_id))
+    def draw_graph(self, output_file_path: str = "react_workflow_graph.png"):
+        """
+        Generate and save a visual representation of the ReAct workflow graph.
+        Args:
+            output_file_path: Path where to save the graph PNG file
+        """
+        try:
+            self.graph.get_graph().draw_mermaid_png(output_file_path=output_file_path)
+            logger.info(f"✅ ReAct graph visualization saved to: {output_file_path}")
+        except Exception as e:
+            logger.error(f"❌ Failed to generate ReAct graph visualization: {e}")
+            print(f"Error generating ReAct graph: {e}")
+# Legacy ReactAgent class for backward compatibility
+ReactAgent = ReactWorkflow

ComputeAgent/graph/graph_deploy.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+Deploy Model Graph - FIXED
+This module implements the model deployment workflow graph for the ComputeAgent.
+KEY FIX: DeployModelState now correctly inherits from AgentState (TypedDict)
+instead of StateGraph.
+Author: ComputeAgent Team
+License: Private
+"""
+import logging
+from typing import Dict, Any, Optional
+from langgraph.graph import StateGraph, END
+from langgraph.graph.state import CompiledStateGraph
+from graph.graph_ReAct import ReactWorkflow
+from graph.state import AgentState
+# Import nodes from ReAct_DeployModel package
+from nodes.ReAct_DeployModel.extract_model_info import extract_model_info_node
+from nodes.ReAct_DeployModel.generate_additional_info import generate_additional_info_node
+from nodes.ReAct_DeployModel.capacity_estimation import capacity_estimation_node
+from nodes.ReAct_DeployModel.capacity_approval import capacity_approval_node, auto_capacity_approval_node
+from models.model_manager import ModelManager
+from langchain_mcp_adapters.client import MultiServerMCPClient
+# Import constants for human approval settings
+from constant import Constants
+# Initialize model manager for dynamic LLM loading and management
+model_manager = ModelManager()
+logger = logging.getLogger("ComputeAgent")
+mcp_client = MultiServerMCPClient(
+    {
+        "hivecompute": {
+            "command": "python",
+            "args": ["/home/hivenet/Compute_MCP/main.py"],
+            "transport": "stdio"
+        }
+    }
+)
+logger = logging.getLogger("DeployModelGraph")
+# Now inherits from AgentState (TypedDict) instead of StateGraph
+class DeployModelState(AgentState):
+    """
+    DeployModelState extends AgentState to inherit all base agent fields.
+    Inherited from AgentState (TypedDict):
+        - query: str
+        - response: str
+        - current_step: str
+        - messages: List[Dict[str, Any]]
+        - agent_decision: str
+        - deployment_approved: bool
+        - model_name: str
+        - llm: Any
+        - model_card: Dict[str, Any]
+        - model_info: Dict[str, Any]
+        - capacity_estimate: Dict[str, Any]
+        - deployment_result: Dict[str, Any]
+        - react_results: Dict[str, Any]
+        - tool_calls: List[Dict[str, Any]]
+        - tool_results: List[Dict[str, Any]]
+    All fields are inherited from AgentState - no additional fields needed.
+    """
+    pass  # Inherits all fields from AgentState
+class DeployModelAgent:
+    """
+    Standalone Deploy Model Agent class with memory and streaming support.
+    This class provides a dedicated interface for model deployment workflows
+    with full memory management and streaming capabilities.
+    """
+    def __init__(self, llm, react_tools):
+        self.llm = llm
+        self.react_tools = react_tools
+        self.react_subgraph = ReactWorkflow(llm=self.llm, tools=self.react_tools)
+        self.graph = self._create_graph()
+    @classmethod
+    async def create(cls, llm=None, custom_tools=None):
+        """
+        Async factory method for DeployModelAgent.
+        Args:
+            llm: Optional pre-loaded LLM
+            custom_tools: Optional pre-loaded tools for the nested ReactWorkflow
+        Returns:
+            DeployModelAgent instance
+        """
+        if llm is None:
+            llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_FC)
+        if custom_tools is None:
+            # Load a separate MCP toolset for deployment React
+            custom_tools = await mcp_client.get_tools()
+        return cls(llm=llm, react_tools=custom_tools)
+    def _create_graph(self) -> CompiledStateGraph:
+        """
+        Creates and configures the deploy model workflow.
+        ✅ FIXED: Now correctly creates StateGraph with DeployModelState (TypedDict)
+        """
+        # ✅ This now works because DeployModelState is a TypedDict (via AgentState)
+        workflow = StateGraph(DeployModelState)
+        # Add nodes
+        workflow.add_node("extract_model_info", extract_model_info_node)
+        workflow.add_node("generate_model_name", generate_additional_info_node)
+        workflow.add_node("capacity_estimation", capacity_estimation_node)
+        workflow.add_node("capacity_approval", capacity_approval_node)
+        workflow.add_node("auto_capacity_approval", auto_capacity_approval_node)
+        workflow.add_node("react_deployment", self.react_subgraph.get_compiled_graph())
+        # Set entry point
+        workflow.set_entry_point("extract_model_info")
+        # Add conditional edges - Decision point after model extraction
+        workflow.add_conditional_edges(
+            "extract_model_info",
+            self.should_validate_or_generate,
+            {
+                "generate_model_name": "generate_model_name",
+                "capacity_estimation": "capacity_estimation"
+            }
+        )
+        # Add conditional edges from capacity estimation to approval
+        workflow.add_conditional_edges(
+            "capacity_estimation",
+            self.should_continue_to_capacity_approval,
+            {
+                "capacity_approval": "capacity_approval",
+                "auto_capacity_approval": "auto_capacity_approval",
+                "end": END
+            }
+        )
+        # Add conditional edges from capacity approval
+        workflow.add_conditional_edges(
+            "capacity_approval",
+            self.should_continue_after_capacity_approval,
+            {
+                "react_deployment": "react_deployment",
+                "capacity_estimation": "capacity_estimation",
+                "end": END
+            }
+        )
+        # Auto approval always goes to deployment
+        workflow.add_edge("auto_capacity_approval", "react_deployment")
+        # Final edges
+        workflow.add_edge("generate_model_name", END)
+        workflow.add_edge("react_deployment", END)
+        # Compile
+        return workflow.compile()
+    def get_compiled_graph(self):
+        """Return the compiled graph for embedding in parent graph"""
+        return self.graph
+    def should_validate_or_generate(self, state: Dict[str, Any]) -> str:
+        """
+        Decision routing function after model extraction.
+        Path 1: If model found and valid → proceed to capacity estimation
+        Path 1A: If no model info or invalid → generate helpful response with suggestions
+        Args:
+            state: Current workflow state
+        Returns:
+            Next node name or END
+        """
+        if state.get("model_name") and state.get("model_info") and not state.get("model_info", {}).get("error"):
+            return "capacity_estimation"  # Path 1: Valid model case
+        else:
+            return "generate_model_name"  # Path 1A: No info case
+    def should_continue_to_capacity_approval(self, state: Dict[str, Any]) -> str:
+        """
+        Determine whether to proceed to human approval, auto-approval, or end.
+        This function controls the flow after capacity estimation based on HUMAN_APPROVAL_CAPACITY setting:
+        - If HUMAN_APPROVAL_CAPACITY is True: Route to capacity_approval for manual approval
+        - If HUMAN_APPROVAL_CAPACITY is False: Route to auto_capacity_approval for automatic approval
+        - If capacity estimation failed: Route to end
+        Args:
+            state: Current workflow state containing capacity estimation results
+        Returns:
+            Next node name: "capacity_approval", "auto_capacity_approval", or "end"
+        """
+        # Check if capacity estimation was successful
+        if state.get("capacity_estimation_status") != "success":
+            logger.info("🔄 Capacity estimation failed - routing to end")
+            return "end"
+        # Check if human approval is enabled
+        HUMAN_APPROVAL_CAPACITY = True if Constants.HUMAN_APPROVAL_CAPACITY == "true" else False
+        if not HUMAN_APPROVAL_CAPACITY:
+            logger.info("🔄 HUMAN_APPROVAL_CAPACITY disabled - routing to auto-approval")
+            return "auto_capacity_approval"
+        else:
+            logger.info("🔄 HUMAN_APPROVAL_CAPACITY enabled - routing to human approval")
+            return "capacity_approval"
+    def should_continue_after_capacity_approval(self, state: Dict[str, Any]) -> str:
+        """
+        Decide whether to proceed to ReAct deployment, re-estimate capacity, or end.
+        """
+        logger.info(f"🔍 Routing after capacity approval:")
+        logger.info(f"   - capacity_approved: {state.get('capacity_approved')}")
+        logger.info(f"   - needs_re_estimation: {state.get('needs_re_estimation')}")
+        logger.info(f"   - capacity_approval_status: {state.get('capacity_approval_status')}")
+        # 1. FIRST check for re-estimation (highest priority)
+        needs_re_estimation = state.get("needs_re_estimation")
+        if needs_re_estimation is True:
+            logger.info("🔄 Re-estimation requested - routing to capacity_estimation")
+            return "capacity_estimation"
+        # 2. THEN check if APPROVED (explicit True check)
+        capacity_approved = state.get("capacity_approved")
+        if capacity_approved is True:
+            logger.info("✅ Capacity approved - proceeding to react_deployment")
+            return "react_deployment"
+        # 3. Check if REJECTED (explicit False check)
+        if capacity_approved is False:
+            logger.info("❌ Capacity rejected - ending workflow")
+            return "end"
+        # 4. If capacity_approved is None and no re-estimation, something is wrong
+        logger.warning(f"⚠️ Unexpected state in capacity approval routing")
+        logger.warning(f"   capacity_approved: {capacity_approved} (type: {type(capacity_approved)})")
+        logger.warning(f"   needs_re_estimation: {needs_re_estimation} (type: {type(needs_re_estimation)})")
+        logger.warning(f"   Full state keys: {list(state.keys())}")
+        # Default to end to prevent infinite loops
+        return "end"
+    async def ainvoke(self,
+                      query: str,
+                      user_id: str = "default_user",
+                      session_id: str = "default_session",
+                      enable_memory: bool = False,
+                      config: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Asynchronously invoke the Deploy Model Agent workflow.
+        Args:
+            query: User's model deployment query
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+            enable_memory: Whether to enable conversation memory management
+            config: Optional config dict
+        Returns:
+            Final workflow state with deployment results
+        """
+        # Initialize state with all required fields from AgentState
+        initial_state = {
+            # Core fields
+            "query": query,
+            "response": "",
+            "current_step": "initialized",
+            "messages": [],
+            # Decision fields
+            "agent_decision": "",
+            "deployment_approved": False,
+            # Model deployment fields
+            "model_name": "",
+            "llm": None,
+            "model_card": {},
+            "model_info": {},
+            "capacity_estimate": {},
+            "deployment_result": {},
+            # React agent fields
+            "react_results": {},
+            "tool_calls": [],
+            "tool_results": [],
+        }
+        # Extract approval from config if provided
+        if config and "configurable" in config:
+            if "capacity_approved" in config["configurable"]:
+                initial_state["deployment_approved"] = config["configurable"]["capacity_approved"]
+                logger.info(f"📋 DeployModelAgent received approval: {config['configurable']['capacity_approved']}")
+        # Configure memory if checkpointer is available
+        memory_config = None
+        if self.checkpointer:
+            thread_id = f"{user_id}:{session_id}"
+            memory_config = {"configurable": {"thread_id": thread_id}}
+        # Merge configs
+        final_config = memory_config or {}
+        if config:
+            if "configurable" in final_config:
+                final_config["configurable"].update(config.get("configurable", {}))
+            else:
+                final_config = config
+        logger.info(f"🚀 Starting Deploy Model workflow")
+        # Execute the graph
+        if final_config:
+            result = await self.graph.ainvoke(initial_state, final_config)
+        else:
+            result = await self.graph.ainvoke(initial_state)
+        return result
+    def invoke(self, query: str, user_id: str = "default_user", session_id: str = "default_session", enable_memory: bool = False) -> Dict[str, Any]:
+        """
+        Synchronously invoke the Deploy Model Agent workflow.
+        Args:
+            query: User's model deployment query
+            user_id: User identifier for memory management
+            session_id: Session identifier for memory management
+            enable_memory: Whether to enable conversation memory management
+        Returns:
+            Final workflow state with deployment results
+        """
+        import asyncio
+        return asyncio.run(self.ainvoke(query, user_id, session_id, enable_memory))
+    def draw_graph(self, output_file_path: str = "deploy_model_graph.png"):
+        """
+        Generate and save a visual representation of the Deploy Model workflow graph.
+        Args:
+            output_file_path: Path where to save the graph PNG file
+        """
+        try:
+            self.graph.get_graph().draw_mermaid_png(output_file_path=output_file_path)
+            logger.info(f"✅ Graph visualization saved to: {output_file_path}")
+        except Exception as e:
+            logger.error(f"❌ Failed to generate graph visualization: {e}")
+            print(f"Error generating graph: {e}")

ComputeAgent/graph/state.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import Dict, Any, List
+from typing_extensions import TypedDict
+class AgentState(TypedDict):
+    """
+    AgentState extends HiveGPTMemoryState to provide a unified state structure for the Compute Agent workflow.
+    Inherits all memory-related fields and adds compute agent-specific workflow fields:
+    Core Fields:
+        - query: User's input query
+        - response: Final response to user
+        - current_step: Current workflow step identifier
+    Decision Fields:
+        - agent_decision: Routing decision ('deploy_model' or 'react_agent')
+        - deployment_approved: Whether human approved deployment
+    Model Deployment Fields:
+        - model_name: Name/ID of the model to deploy
+        - model_card: Raw model card data from HuggingFace
+        - model_info: Extracted model information (JSON)
+        - capacity_estimate: Estimated compute resources needed
+        - deployment_result: Result of model deployment
+    React Agent Fields:
+        - react_results: Results from React agent execution
+        - tool_calls: List of tool calls made by React agent
+        - tool_results: Results from tool executions
+    Error Handling:
+        - error: Error message if any step fails
+        - error_step: Step where error occurred
+    """
+    # Core fields
+    query: str
+    response: str
+    current_step: str
+    messages: List[Dict[str, Any]]
+    # Decision fields
+    agent_decision: str
+    deployment_approved: bool
+    # Model deployment fields
+    model_name: str
+    model_card: Dict[str, Any]
+    model_info: Dict[str, Any]
+    capacity_estimate: Dict[str, Any]
+    deployment_result: Dict[str, Any]
+    capacity_estimation_status: str
+    capacity_approval_status: str
+    capacity_approved: bool
+    estimated_gpu_memory: float
+    gpu_requirements: Dict[str, Any]
+    cost_estimates: Dict[str, Any]
+    need_reestimation: bool
+    # React agent fields
+    react_results: Dict[str, Any]
+    tool_calls: List[Dict[str, Any]]
+    tool_results: List[Dict[str, Any]]
+    # Tool approval fields (for human-in-the-loop)
+    pending_tool_calls: List[Dict[str, Any]]
+    approved_tool_calls: List[Dict[str, Any]]
+    rejected_tool_calls: List[Dict[str, Any]]
+    modified_tool_calls: List[Dict[str, Any]]
+    needs_re_reasoning: bool
+    re_reasoning_feedback: str
+    # User identification
+    user_id: str
+    session_id: str
+    # Workflow identification (for tools registry lookup)
+    workflow_id: int
+    # Compute instance fields (for HiveCompute deployment)
+    instance_id: str
+    instance_status: str
+    instance_created: bool

ComputeAgent/hivenet.jpg ADDED Viewed

ComputeAgent/main.py ADDED Viewed

	@@ -0,0 +1,284 @@

+# This allows importing modules from the top-level project directory
+import os
+import sys
+sys.path.append("/home/hivenet")
+"""
+ComputeAgent FastAPI Main Application
+This is the main entry point for the ComputeAgent FastAPI application.
+It creates the FastAPI app, includes routers, and manages the application lifecycle.
+Features:
+- FastAPI application setup
+- Router inclusion for modular organization
+- Application lifecycle management (startup/shutdown)
+- CORS middleware configuration
+- Global error handlers
+- Background task management for memory operations
+- Interactive API documentation
+Usage:
+    python main.py
+Or with uvicorn directly:
+    uvicorn main:app --host 0.0.0.0 --port 8000 --reload
+Author: ComputeAgent Team
+License: Private
+"""
+import asyncio
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, RedirectResponse
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+# Import the compute agent router and initialization function
+from routers.compute_agent_HITL import compute_agent_router, initialize_agent
+# Initialize logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger("ComputeAgent Main")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Lifespan context manager for FastAPI application.
+    Handles startup and shutdown events.
+    Args:
+        app: FastAPI application instance
+    """
+    # Startup
+    logger.info("=" * 80)
+    logger.info("🚀 Starting ComputeAgent API Application...")
+    logger.info("=" * 80)
+    try:
+        # Initialize the ComputeAgent
+        await initialize_agent()
+        logger.info("✅ ComputeAgent API ready to serve requests")
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize application: {e}")
+        raise
+    logger.info("=" * 80)
+    logger.info("📚 API Documentation available at:")
+    logger.info("   - Swagger UI: http://localhost:8000/docs")
+    logger.info("   - ReDoc: http://localhost:8000/redoc")
+    logger.info("=" * 80)
+    yield
+    # Shutdown
+    logger.info("=" * 80)
+    logger.info("👋 Shutting down ComputeAgent API Application...")
+    logger.info("✅ ComputeAgent API shutdown complete")
+    logger.info("=" * 80)
+# Create FastAPI application
+app = FastAPI(
+    title="ComputeAgent API",
+    description="""
+    AI-powered agent system for model deployment and compute workflows.
+    ## Features
+    * **Model Deployment**: Deploy AI models from HuggingFace with capacity estimation
+    * **React Agent**: Execute compute tasks with MCP tool integration
+    * **Memory Management**: Persistent conversations across sessions
+    * **Streaming Support**: Real-time updates via Server-Sent Events
+    * **Human-in-the-Loop**: Approval workflow for capacity decisions
+    ## Endpoints
+    ### ComputeAgent
+    - **POST /api/compute/query** - Process queries (non-streaming)
+    - **POST /api/compute/query/stream** - Process queries (streaming)
+    - **POST /api/compute/memory/clear** - Clear conversation memory
+    - **POST /api/compute/memory/inspect** - Inspect memory status
+    - **GET /api/compute/health** - Health check
+    - **GET /api/compute/examples** - Example queries
+    - **GET /api/compute/info** - Router information
+    ## Getting Started
+    1. Check API health: `GET /api/compute/health`
+    2. Get example queries: `GET /api/compute/examples`
+    3. Process a query: `POST /api/compute/query`
+    For streaming responses, use: `POST /api/compute/query/stream`
+    """,
+    version="1.0.0",
+    lifespan=lifespan,
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure appropriately for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(compute_agent_router)
+# Root endpoint
+@app.get("/", tags=["root"])
+async def root():
+    """
+    Root endpoint - redirects to API documentation.
+    Returns:
+        Redirect to Swagger UI documentation
+    """
+    return RedirectResponse(url="/docs")
+@app.get("/health", tags=["root"])
+async def global_health_check():
+    """
+    Global health check endpoint.
+    Returns:
+        Application health status
+    """
+    return {
+        "status": "healthy",
+        "application": "ComputeAgent API",
+        "version": "1.0.0",
+        "docs": "/docs",
+        "compute_agent_health": "/api/compute/health"
+    }
+# Global error handlers
+@app.exception_handler(404)
+async def not_found_handler(request: Request, exc: Exception):
+    """
+    Custom 404 handler for not found endpoints.
+    Args:
+        request: The incoming request
+        exc: The exception raised
+    Returns:
+        JSON response with error details
+    """
+    return JSONResponse(
+        status_code=404,
+        content={
+            "success": False,
+            "error": "Endpoint not found",
+            "path": str(request.url.path),
+            "message": "The requested endpoint does not exist. Visit /docs for available endpoints."
+        }
+    )
+@app.exception_handler(500)
+async def internal_error_handler(request: Request, exc: Exception):
+    """
+    Custom 500 handler for internal server errors.
+    Args:
+        request: The incoming request
+        exc: The exception raised
+    Returns:
+        JSON response with error details
+    """
+    logger.error(f"Internal server error on {request.url.path}: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "success": False,
+            "error": "Internal server error",
+            "detail": str(exc),
+            "message": "An unexpected error occurred. Please try again or contact support."
+        }
+    )
+@app.exception_handler(Exception)
+async def general_exception_handler(request: Request, exc: Exception):
+    """
+    General exception handler for uncaught exceptions.
+    Args:
+        request: The incoming request
+        exc: The exception raised
+    Returns:
+        JSON response with error details
+    """
+    logger.error(f"Unhandled exception on {request.url.path}: {exc}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={
+            "success": False,
+            "error": "Unexpected error",
+            "detail": str(exc),
+            "message": "An unexpected error occurred. Please check logs for details."
+        }
+    )
+# Middleware for logging
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    """
+    Middleware to log all incoming requests.
+    Args:
+        request: The incoming request
+        call_next: The next middleware or route handler
+    Returns:
+        Response from the route handler
+    """
+    logger.info(f"📨 {request.method} {request.url.path}")
+    try:
+        response = await call_next(request)
+        logger.info(f"✅ {request.method} {request.url.path} - Status: {response.status_code}")
+        return response
+    except Exception as e:
+        logger.error(f"❌ {request.method} {request.url.path} - Error: {e}")
+        raise
+if __name__ == "__main__":
+    """
+    Run the FastAPI application with uvicorn.
+    Configuration:
+    - Host: 0.0.0.0 (accessible from network)
+    - Port: 8000
+    - Reload: Enabled for development
+    - Log level: info
+    """
+    logger.info("🎬 Starting uvicorn server...")
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=False,
+        log_level="info",
+        access_log=True
+    )

ComputeAgent/models/__init__.py ADDED Viewed

File without changes

ComputeAgent/models/doc.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# -*- coding: utf-8 -*-
+from typing import List, Optional, Union
+from pydantic import BaseModel, Field
+class SourceDocument(BaseModel):
+    filename: Optional[str] = Field(
+        None, alias="file_name", description="Name of the source document file"
+    )
+    title: Optional[str] = Field(
+        None, alias="title", description="Title of the source document"
+    )
+    type: Optional[str] = Field(
+        None,
+        alias="file_type",
+        description="Data source type of the source document (e.g., confluence, hiveDisk, slack, etc.).",
+    )
+    content: Optional[str] = Field(
+        None,
+        description="Relevant content in the source document used as a piece of context to generate the response.",
+    )
+    url: Optional[str] = Field(
+        None, alias="doc_url", description="Link to the source document, if available."
+    )
+    created_date: Optional[str] = Field(
+        None, description="Date when the source document was created."
+    )
+    created_by: Optional[str] = Field(
+        None, description="Author of the source document."
+    )
+    mongodb_link: Optional[Union[str, bool]] = Field(
+        None,
+        description="Unique identifier of the source document in the MongoDB. This will use to check whether there is an image in the MongoDB or not related to this document.",
+    )
+    model_config = {
+        "extra": "allow"
+    }
+    def __str__(self):
+        return (
+            f"SourceDocument(\n"
+            f"  Title: {self.title or 'N/A'}\n"
+            f"  Type: {self.type or 'N/A'}\n"
+            f"  Author: {self.created_by or 'N/A'}\n"
+            f"  Content: {self.content or 'N/A'}\n"
+            f"  URL: {self.url or 'N/A'}\n"
+            f"  Created Date: {self.created_date or 'N/A'}\n"
+            f"  Updated Date: {self.updated_date or 'N/A'}\n"
+            f")"
+        )
+class DocumentCollection(BaseModel):
+    list_of_doc: List[SourceDocument]

ComputeAgent/models/model_manager.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+Model Manager Module
+This module provides centralized management of AI models for the HiveGPT Agent
+system. It handles loading, caching, and lifecycle management of both LLM and
+reranking models with thread-safe operations.
+The ModelManager class offers:
+- Lazy loading and caching of language models
+- Thread-safe model access with async locks
+- Integration with ModelRouter for model discovery
+- Memory-efficient model reuse across requests
+Key Features:
+- Singleton pattern for consistent model access
+- Async/await support for non-blocking operations
+- Automatic model caching to improve performance
+- Error handling for model loading failures
+Author: HiveNetCode
+License: Private
+"""
+import asyncio
+from typing import Dict, Any, Optional
+from langchain_openai import ChatOpenAI
+from models.model_router import ModelRouter, LLMModel
+from constant import Constants
+class ModelManager:
+    """
+    Centralized manager for AI model loading, caching, and lifecycle management.
+    This class implements a thread-safe caching system for language models and
+    reranking models, providing efficient model reuse across the application.
+    It integrates with ModelRouter to discover available models and handles
+    the initialization and configuration of ChatOpenAI instances.
+    The ModelManager follows a singleton-like pattern where models are cached
+    at the class level to ensure memory efficiency and consistent model access
+    throughout the application lifecycle.
+    Attributes:
+        _llm_models: Cache of loaded language models
+        _reranker_models: Cache of loaded reranking models
+        _llm_lock: Async lock for thread-safe LLM loading
+        _reranker_lock: Async lock for thread-safe reranker loading
+        model_router: Interface to model discovery service
+        reranking_model_name: Name of the default reranking model
+        reranker: Cached reranking model instance
+    """
+    def __init__(self):
+        """
+        Initialize the ModelManager with empty caches and async locks.
+        Sets up the internal data structures for model caching and thread-safe
+        access. Initializes the ModelRouter for model discovery and sets the
+        default reranking model configuration.
+        """
+        # Model caches for efficient reuse
+        self._llm_models: Dict[str, ChatOpenAI] = {}
+        # Thread safety locks for concurrent access
+        self._llm_lock = asyncio.Lock()
+        # Model discovery and configuration
+        self.model_router = ModelRouter()
+    async def load_llm_model(self, model_name: str) -> ChatOpenAI:
+        """
+        Asynchronously loads and returns a language model for the specified model name.
+        This method checks if the model is already loaded and cached in the class-level
+        dictionary `_llm_models`. If not, it acquires a lock to ensure thread-safe
+        model loading, retrieves the model information from the Model Router, initializes
+        a `ChatOpenAI` instance with the given parameters, and caches it for future use.
+        Args:
+            model_name (str): The name of the language model to load.
+        Returns:
+            ChatOpenAI: An instance of the loaded language model.
+        """
+        if model_name in self._llm_models:
+            return self._llm_models[model_name]
+        async with self._llm_lock:
+            if model_name not in self._llm_models:
+                loaded_model: LLMModel = self.model_router.get_llm_model(model_name)
+                llm = ChatOpenAI(
+                    model_name=model_name,
+                    api_key=Constants.MODEL_ROUTER_TOKEN,
+                    base_url=loaded_model.openai_endpoint,
+                    temperature=0.1,
+                )
+                self._llm_models[model_name] = llm
+            return self._llm_models[model_name]

ComputeAgent/models/model_router.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# -*- coding: utf-8 -*-
+import logging
+from constant import Constants
+import requests
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+from collections import OrderedDict
+logger = logging.getLogger("Reranker")
+@dataclass
+class Model:
+    """
+    Abstract base class representing a model served by the HiveGPT Model Router.
+    Attributes:
+        name (str): The HuggingFace repository path for the model, e.g., "meta-llama/Meta-Llama-3.1-8B".
+        alias (str): A shorter, more user-friendly alias or identifier for the model.
+        openai_endpoint (str): The base openai endpoint through which the model can be accessed.
+    """
+    name: str
+    alias: str
+    openai_endpoint: str
+@dataclass
+class LLMModel(Model):
+    """
+    Represents an LLM served by the HiveGPT Model Router.
+    Attributes:
+        name (str): The HuggingFace repository path for the model, e.g., "meta-llama/Meta-Llama-3.1-8B".
+        alias (str): A shorter, more user-friendly alias or identifier for the model.
+        openai_endpoint (str): The base openai endpoint through which the model can be accessed.
+        max_len (int): The maximum sequence length that the model can handle.
+    """
+    max_len: int
+class ModelRouter:
+    """
+    A wrapper class that fetches info from the HiveGPT Model Router
+    """
+    def __init__(self, host: str = Constants.MODEL_ROUTER_HOST, port: str = Constants.MODEL_ROUTER_PORT):
+        """
+        Initializes the ModelRouter.
+        Args:
+            host (str): The hostname of the Model Router server.
+            port (int): The port number of the Model Router server.
+        Note: The ModelRouter will automatically refresh the map of served models upon initialization.
+        """
+        self.host = host
+        self.port = port
+        self.models_health_endpoint = f"http://{self.host}:{self.port}/v1/models"
+        self.served_models: Dict[str, LLMModel] = {}
+        self.logger = logging.getLogger("HiveGPT Model Router")
+        self.refresh()
+    def _generate_openai_base(self, alias: str, base_endpoint: str = "/v1") -> str:
+        """
+        Generates the base OpenAI endpoint URL for a given alias.
+        Args:
+            alias (str): The alias of the model.
+            base_endpoint (str): The base endpoint for the OpenAI API.
+        Returns:
+            str: The base OpenAI endpoint URL for the given alias.
+        """
+        return f"http://{self.host}:{self.port}/{alias}{base_endpoint}"
+    def _sort_language_models(self):
+        """
+        Sort returned models by alias in ascending order
+        and put the default LLM always on top.
+        """
+        default_model_key = Constants.DEFAULT_LLM_NAME
+        # Get the default model
+        default_model = {default_model_key: self.served_models[default_model_key]} if default_model_key in self.served_models else None
+        # Sort remaining models in ascending order
+        other_models = {k: v for k, v in self.served_models.items() if k != default_model_key}
+        sorted_other_models = OrderedDict(sorted(other_models.items(), key=lambda item: item[0]))
+        # Combine the default model and the sorted models
+        sorted_llms = sorted_other_models
+        if default_model is not None:
+            sorted_llms = OrderedDict(**default_model, **sorted_other_models)
+        # Update the served_models dictionary
+        self.served_models = sorted_llms
+    def refresh(self):
+        """Refreshes the map of served models."""
+        try:
+            response = requests.get(self.models_health_endpoint)
+            response.raise_for_status()
+            models_json = response.json()
+            models = {}
+            for model in models_json:
+                alias = model["model_alias"]
+                name = model["model_name"]
+                max_len = model["max_model_len"]
+                openai_endpoint = self._generate_openai_base(alias=alias)
+                models[name] = LLMModel(name=name, alias=alias, openai_endpoint=openai_endpoint, max_len=max_len)
+            self.served_models = models
+            self._sort_language_models()
+            self.logger.info("Models map successfully refreshed.")
+        except requests.RequestException as e:
+            self.logger.error(f"Failed to refresh models map: {e}")
+            self.served_models = {}
+    def get_llm_model(self, name: str) -> Optional[LLMModel]:
+        """Gets the LLMModel object for the specified model name.
+        Args:
+            name (str): The HuggingFace repository path for the model. for example, "meta-llama/Meta-Llama-3.1-8B"
+        Returns:
+            Optional[Model]: The Model object.
+                             Returns None if the model name is not found.
+        """
+        return self.served_models.get(name)
+    def get_all_llm_models(self) -> Dict[str, LLMModel]:
+        """Returns a map of all served LLMs.
+        Returns:
+            Dict[str, LLMModel]: A dictionary where keys are LLM names and values are LLMModel objects.
+        """
+        self._sort_language_models()
+        return self.served_models

ComputeAgent/nodes/ReAct/__init__.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+ReAct (Reasoning and Acting) Agent Implementation
+This module implements the ReAct pattern for the HiveGPT agent system, providing
+a human-in-the-loop workflow for intelligent tool selection and execution.
+The ReAct pattern combines reasoning and acting in language models, allowing AI agents
+to interleave thought, action, and observation steps to solve complex tasks effectively.
+Key Components:
+    - Agent Reasoning: Two-phase approach for tool selection vs direct answers
+    - Human Approval: Interactive approval system for tool execution (optional based on HUMAN_APPROVAL setting)
+    - Tool Execution: Managed execution of approved tools with error handling
+    - Response Generation: Memory-aware response generation using retrieved data
+    - Response Refinement: LLM-based refinement for non-researcher tool results
+    - Decision Functions: Routing logic between workflow nodes
+Features:
+    - Memory context integration for personalized responses
+    - Configurable human approval workflow (HUMAN_APPROVAL environment variable)
+    - Special handling for researcher tool with document retrieval
+    - Comprehensive error handling and fallback mechanisms
+    - Structured API response formatting with source attribution
+    - Extensive logging for debugging and monitoring
+Environment Configuration:
+    HUMAN_APPROVAL: Set to "False" to automatically approve all tools and bypass
+                   human approval step. Defaults to "True" for interactive approval.
+Example:
+    >>> from nodes.ReAct import agent_reasoning_node
+    >>> state = {"messages": [...], "llm": model, "tools": tools}
+    >>> result = await agent_reasoning_node(state)
+"""
+from .agent_reasoning_node import agent_reasoning_node
+from .human_approval_node import human_approval_node
+from .auto_approval_node import auto_approval_node
+from .tool_execution_node import tool_execution_node
+from .generate_node import generate_node
+from .tool_rejection_exit_node import tool_rejection_exit_node
+from .decision_functions import (
+    should_continue_to_approval,
+    should_continue_after_approval,
+    should_continue_after_execution
+)
+__all__ = [
+    "agent_reasoning_node",
+    "human_approval_node",
+    "auto_approval_node",
+    "tool_execution_node",
+    "generate_node",
+    "tool_rejection_exit_node",
+    "should_continue_to_approval",
+    "should_continue_after_approval",
+    "should_continue_after_execution"
+]

ComputeAgent/nodes/ReAct/agent_reasoning_node.py ADDED Viewed

	@@ -0,0 +1,399 @@

+"""
+Agent Reasoning Node for ReAct Pattern - Enhanced Version
+This module implements enhanced agent reasoning with support for:
+1. Initial tool selection based on query
+2. Re-reasoning after tool execution with results
+3. Re-reasoning after user feedback/modifications
+4. Memory context integration
+Key Enhancements:
+    - User feedback integration for re-reasoning
+    - Modified tool context awareness
+    - Conversation history preservation
+    - Memory-enhanced reasoning
+Author: ComputeAgent Team
+"""
+from typing import Dict, Any
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+from constant import Constants
+import logging
+logger = logging.getLogger("ReAct Agent Reasoning")
+def _get_llm_from_registry(workflow_id: int):
+    """
+    Get LLM from the global registry using workflow ID.
+    This avoids storing non-serializable LLM objects in state.
+    """
+    from graph.graph_ReAct import _LLM_REGISTRY
+    llm = _LLM_REGISTRY.get(workflow_id)
+    if llm is None:
+        raise ValueError(f"LLM not found in registry for workflow_id: {workflow_id}")
+    return llm
+async def agent_reasoning_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Enhanced agent reasoning node that handles initial reasoning and re-reasoning.
+    Supports three reasoning scenarios:
+    1. Initial reasoning: Fresh query, no prior tool executions
+    2. Post-execution reasoning: After tools executed, decide if more tools needed
+    3. Re-reasoning: After user feedback/modifications, reconsider approach
+    Special handling for deployment workflow:
+    - Detects when in deployment mode (capacity_approved=True)
+    - Provides specific instructions for calling create_compute_instance
+    - Passes deployment parameters from capacity estimation
+    Args:
+        state: Current ReAct state
+    Returns:
+        Updated state with tool calls or completion decision
+    """
+    # Extract state information
+    query = state.get("query", "")
+    messages = state.get("messages", [])
+    tool_results = state.get("tool_results", [])
+    user_id = state.get("user_id", "")
+    session_id = state.get("session_id", "")
+    needs_re_reasoning = state.get("needs_re_reasoning", False)
+    re_reasoning_feedback = state.get("re_reasoning_feedback", "")
+    modified_tool_calls = state.get("modified_tool_calls", [])
+    # Extract deployment-specific information
+    capacity_approved = state.get("capacity_approved", False)
+    model_name = state.get("model_name", "")
+    model_info = state.get("model_info", {})
+    gpu_requirements = state.get("gpu_requirements", {})
+    estimated_gpu_memory = state.get("estimated_gpu_memory", 0)
+    # Get LLM from registry using workflow_id (avoids serialization issues)
+    workflow_id = state.get("workflow_id")
+    if not workflow_id:
+        logger.error("❌ No workflow_id in state - cannot retrieve LLM")
+        updated_state = state.copy()
+        updated_state["pending_tool_calls"] = []
+        updated_state["current_step"] = "agent_reasoning_error"
+        updated_state["error"] = "Missing workflow_id"
+        return updated_state
+    try:
+        llm = _get_llm_from_registry(workflow_id)
+        logger.info(f"✅ Retrieved LLM from registry")
+    except ValueError as e:
+        logger.error(f"❌ {e}")
+        updated_state = state.copy()
+        updated_state["pending_tool_calls"] = []
+        updated_state["current_step"] = "agent_reasoning_error"
+        updated_state["error"] = str(e)
+        return updated_state
+    # Determine reasoning scenario
+    if needs_re_reasoning:
+        logger.info("🔄 Re-reasoning mode: User requested reconsideration")
+        reasoning_mode = "re_reasoning"
+    elif tool_results:
+        logger.info("🔄 Post-execution mode: Evaluating if more tools needed")
+        reasoning_mode = "post_execution"
+    else:
+        logger.info("🎯 Initial reasoning mode: Processing fresh query")
+        reasoning_mode = "initial"
+    # Build memory context if available
+    memory_context = ""
+    if user_id and session_id:
+        try:
+            from helpers.memory import get_memory_manager
+            memory_manager = get_memory_manager()
+            memory_context = await memory_manager.build_context_for_node(
+                user_id,
+                session_id,
+                "agent_reasoning"
+            )
+            if memory_context:
+                logger.info("🧠 Using memory context for reasoning")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load memory context: {e}")
+    # Build reasoning prompt based on scenario
+    reasoning_prompt = _build_reasoning_prompt(
+        query=query,
+        reasoning_mode=reasoning_mode,
+        memory_context=memory_context,
+        tool_results=tool_results,
+        re_reasoning_feedback=re_reasoning_feedback,
+        modified_tool_calls=modified_tool_calls,
+        # Pass deployment context
+        capacity_approved=capacity_approved,
+        model_name=model_name,
+        model_info=model_info,
+        gpu_requirements=gpu_requirements,
+        estimated_gpu_memory=estimated_gpu_memory
+    )
+    # Prepare messages for LLM - ALWAYS include conversation history for context
+    if messages:
+        # Include conversation history so agent can reference previous responses
+        llm_messages = messages + [HumanMessage(content=reasoning_prompt)]
+        logger.info(f"📝 Including {len(messages)} previous messages for context")
+    else:
+        # First message in conversation
+        llm_messages = [HumanMessage(content=reasoning_prompt)]
+        logger.info("📝 Starting new conversation (no previous messages)")
+    logger.info(f"🤖 Invoking LLM for {reasoning_mode} reasoning...")
+    try:
+        # Invoke LLM with tools bound
+        response = await llm.ainvoke(llm_messages)
+        # Extract tool calls if any
+        tool_calls = []
+        if hasattr(response, 'tool_calls') and response.tool_calls:
+            tool_calls = [
+                {
+                    "id": tc.get("id", f"call_{i}"),
+                    "name": tc.get("name"),
+                    "args": tc.get("args", {})
+                }
+                for i, tc in enumerate(response.tool_calls)
+            ]
+            logger.info(f"🔧 Agent selected {len(tool_calls)} tool(s)")
+        else:
+            logger.info("✅ Agent decided no tools needed - ready to generate response")
+        # Update state
+        updated_state = state.copy()
+        updated_state["messages"] = llm_messages + [response]
+        updated_state["pending_tool_calls"] = tool_calls
+        updated_state["current_step"] = "agent_reasoning_complete"
+        # Clear re-reasoning flags after processing
+        if needs_re_reasoning:
+            updated_state["needs_re_reasoning"] = False
+            updated_state["re_reasoning_feedback"] = ""
+            logger.info("🔄 Re-reasoning complete, flags cleared")
+        # Clear modified tool calls after processing
+        if modified_tool_calls:
+            updated_state["modified_tool_calls"] = []
+        # NOTE: Don't remove tools here - they may be needed for next node
+        # Tools are only removed in terminal nodes (generate, tool_rejection_exit)
+        return updated_state
+    except Exception as e:
+        logger.error(f"❌ Error in agent reasoning: {e}")
+        # Fallback: set empty tool calls to proceed to generation
+        updated_state = state.copy()
+        updated_state["pending_tool_calls"] = []
+        updated_state["current_step"] = "agent_reasoning_error"
+        updated_state["error"] = str(e)
+        # NOTE: Don't remove tools here - they may be needed for next node
+        # Tools are only removed in terminal nodes (generate, tool_rejection_exit)
+        return updated_state
+def _build_reasoning_prompt(
+    query: str,
+    reasoning_mode: str,
+    memory_context: str,
+    tool_results: list,
+    re_reasoning_feedback: str,
+    modified_tool_calls: list,
+    capacity_approved: bool = False,
+    model_name: str = "",
+    model_info: dict = None,
+    gpu_requirements: dict = None,
+    estimated_gpu_memory: float = 0
+) -> str:
+    """
+    Build appropriate reasoning prompt based on the reasoning scenario.
+    Args:
+        query: Original user query
+        reasoning_mode: "initial", "post_execution", or "re_reasoning"
+        memory_context: Conversation memory context
+        tool_results: Previous tool execution results
+        re_reasoning_feedback: User's feedback for re-reasoning
+        modified_tool_calls: Tools that were modified by user
+        capacity_approved: Whether in deployment workflow with approved capacity
+        model_name: Name of model to deploy
+        model_info: Model information from capacity estimation
+        gpu_requirements: GPU requirements from capacity estimation
+        estimated_gpu_memory: Estimated GPU memory
+    Returns:
+        Formatted reasoning prompt
+    """
+    base_prompt = Constants.GENERAL_SYSTEM_PROMPT
+    # Handle deployment workflow
+    if capacity_approved and reasoning_mode == "initial":
+        # Deployment-specific reasoning
+        if model_info is None:
+            model_info = {}
+        if gpu_requirements is None:
+            gpu_requirements = {}
+        # Get deployment parameters
+        location = model_info.get("location", "UAE-1")
+        gpu_type = model_info.get("GPU_type", "RTX 4090")
+        num_gpus = gpu_requirements.get(gpu_type, 1)
+        config = f"{num_gpus}x {gpu_type}"
+        deployment_instructions = f"""
+🚀 **DEPLOYMENT MODE ACTIVATED** 🚀
+You are in a model deployment workflow. The capacity has been approved and you need to create a compute instance.
+**Deployment Information:**
+- Model to deploy: {model_name}
+- Approved Location: {location}
+- Required GPU Configuration: {config}
+- GPU Memory Required: {estimated_gpu_memory:.2f} GB
+**YOUR TASK:**
+Call the `create_compute_instance` tool with appropriate arguments based on the deployment information above.
+**IMPORTANT:**
+1. Review the tool's specification to understand the valid parameter values
+2. Use the deployment information provided to determine the correct arguments:
+   - For the `name` parameter: Format the model name "{model_name}" following these rules:
+     * Convert to lowercase
+     * Replace forward slashes (/) with hyphens (-)
+     * Replace dots (.) with hyphens (-)
+     * Replace underscores (_) with hyphens (-)
+     * Keep existing hyphens as-is
+   - For the `location` parameter: Map the approved location to the tool's valid location format (see mapping below)
+   - For the `config` parameter: Use the exact GPU configuration "{config}"
+3. After the tool returns the instance_id and status, do NOT call any other tools
+4. The generate node will handle creating the deployment instructions
+**Location Mapping (map approved location to MCP tool format):**
+- "UAE-1" or "uae-1" or "UAE" → use "uae"
+- "UAE-2" or "uae-2" → use "uae-2"
+- "France" or "FRANCE" → use "france"
+- "Texas" or "TEXAS" → use "texas"
+**Example name formatting:**
+- "meta-llama/Llama-3.1-8B" → "meta-llama-llama-3-1-8b"
+- "Qwen/Qwen2.5-7B" → "qwen-qwen2-5-7b"
+- "google/gemma-2-9b" → "google-gemma-2-9b"
+Make sure your tool call arguments exactly match the MCP tool's specification format.
+"""
+        prompt = f"""{base_prompt}
+{deployment_instructions}
+User Query: {query}
+{f"Conversation Context: {memory_context}" if memory_context else ""}"""
+        return prompt
+    if reasoning_mode == "initial":
+        # Initial reasoning (non-deployment)
+        # Include available model information for tool calls
+        model_info_text = f"""
+Available Models:
+- For general queries: {Constants.DEFAULT_LLM_NAME}
+- For function calling: {Constants.DEFAULT_LLM_FC}
+When calling the research tool, use the model parameter: "{Constants.DEFAULT_LLM_NAME}"
+"""
+        prompt = f"""{base_prompt}
+{model_info_text}
+User Query: {query}
+{f"Conversation Context: {memory_context}" if memory_context else ""}
+IMPORTANT INSTRUCTIONS:
+1. **Check conversation history first**: If this is a follow-up question, review previous messages to see if you already have the information.
+2. **Avoid redundant tool calls**: Don't call tools to fetch information you've already provided in this conversation.
+3. **Answer directly when possible**: If you can answer based on previous responses or your knowledge, respond without calling tools.
+4. **Use tools only when necessary**: Only call tools if you genuinely need new information that isn't available in the conversation history.
+When calling tools that require a "model" parameter (like the research tool),
+use the model "{Constants.DEFAULT_LLM_NAME}" unless the user explicitly requests a different model."""
+    elif reasoning_mode == "post_execution":
+        # Post-execution reasoning
+        tool_results_summary = "\n\n".join([
+            f"Tool {i+1} ({getattr(r, 'name', 'unknown')}): {getattr(r, 'content', str(r))}"
+            for i, r in enumerate(tool_results)
+        ])
+        prompt = f"""{base_prompt}
+Original Query: {query}
+{f"Conversation Context: {memory_context}" if memory_context else ""}
+Tool Execution Results:
+{tool_results_summary}
+IMPORTANT: Evaluate if you have enough information to answer the user's query.
+Decision Logic:
+1. If the tool results provide sufficient information to answer the query → DO NOT call any tools (respond without tool calls)
+2. Only if critical information is still missing → Select specific tools to gather that information
+Remember:
+- The generate node will format your final response, so you don't need to call tools just to format data
+- Be efficient - don't call tools unless absolutely necessary
+- If you respond without calling tools, the workflow will move to generate the final answer"""
+    else:  # re_reasoning
+        # Re-reasoning after user feedback
+        model_info = f"""
+Available Models:
+- For general queries: {Constants.DEFAULT_LLM_NAME}
+- For function calling: {Constants.DEFAULT_LLM_FC}
+When calling the research tool, use the model parameter: "{Constants.DEFAULT_LLM_NAME}"
+"""
+        modified_summary = ""
+        if modified_tool_calls:
+            modified_summary = "\n\nUser Modified These Tools:\n" + "\n".join([
+                f"- Tool {mod['index']}: {mod['modified']['name']} with args {mod['modified']['args']}"
+                for mod in modified_tool_calls
+            ])
+        prompt = f"""{base_prompt}
+{model_info}
+Original Query: {query}
+{f"Conversation Context: {memory_context}" if memory_context else ""}
+User Feedback: {re_reasoning_feedback}
+{modified_summary}
+The user has provided feedback on your previous tool selection. Please reconsider your approach:
+1. Review the user's feedback carefully
+2. Consider the modified tool arguments if provided
+3. Determine a new strategy that addresses the user's concerns
+Select appropriate tools based on this feedback, or proceed without tools if you can now answer directly.
+IMPORTANT: When calling tools that require a "model" parameter (like the research tool),
+use the model "{Constants.DEFAULT_LLM_NAME}" unless the user explicitly requests a different model."""
+    return prompt

ComputeAgent/nodes/ReAct/auto_approval_node.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Auto Approval Node for ReAct Pattern
+This module implements automatic tool approval for the ReAct workflow when
+human approval is disabled via the HUMAN_APPROVAL environment variable.
+When HUMAN_APPROVAL is set to False, this node automatically approves all
+pending tool calls without requiring user interaction, allowing for
+fully automated tool execution in trusted environments.
+Key Features:
+    - Automatic approval of all pending tool calls
+    - Consistent state management with human approval flow
+    - Comprehensive logging for audit trails
+    - Safety checks for empty pending lists
+The auto approval process mirrors the human approval workflow but bypasses
+user interaction, making it suitable for automated scenarios, testing,
+or trusted environments where human oversight is not required.
+State Updates:
+    After auto-approval, the state is updated with:
+    - approved_tool_calls: All pending tools moved to approved
+    - rejected_tool_calls: Empty list (no rejections in auto mode)
+    - pending_tool_calls: Cleared after approval process
+Example:
+    >>> state = {
+    ...     "pending_tool_calls": [
+    ...         {"name": "research", "args": {"query": "AI trends"}}
+    ...     ]
+    ... }
+    >>> result = await auto_approval_node(state)
+    >>> # All pending tools automatically approved
+    >>> print(state["approved_tool_calls"])  # Contains the research tool call
+"""
+from typing import Dict, Any
+import logging
+logger = logging.getLogger("ReAct Auto Approval")
+async def auto_approval_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Node that automatically approves all pending tool calls without user interaction.
+    This node is used when HUMAN_APPROVAL is disabled, providing a seamless
+    automated workflow while maintaining the same state structure as human approval.
+    Args:
+        state: Current ReAct state with pending tool calls
+    Returns:
+        Updated state with all pending tools moved to approved_tool_calls
+    """
+    pending_tools = state.get("pending_tool_calls", [])
+    if not pending_tools:
+        logger.info("ℹ️ No pending tool calls for auto-approval")
+        return state
+    logger.info(f"🤖 Auto-approving {len(pending_tools)} tool call(s)")
+    # Log each tool being auto-approved for audit trail
+    for tool_call in pending_tools:
+        logger.info(f"✅ Auto-approved tool: '{tool_call['name']}' with args: {tool_call['args']}")
+    # Update state with auto-approval results
+    updated_state = state.copy()
+    updated_state["approved_tool_calls"] = pending_tools.copy()  # Approve all pending tools
+    updated_state["rejected_tool_calls"] = []  # No rejections in auto mode
+    updated_state["pending_tool_calls"] = []  # Clear pending calls
+    updated_state["current_step"] = "auto_approval_complete"
+    # NOTE: Don't remove tools here - tool_execution needs them next
+    # Tools are only removed in terminal nodes (generate, tool_rejection_exit)
+    logger.info(f"📊 Auto-approval complete: {len(pending_tools)} tools approved, 0 rejected")
+    return updated_state

ComputeAgent/nodes/ReAct/decision_functions.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""
+Decision Functions for ReAct Workflow Routing - Enhanced Version
+This module contains enhanced routing logic for the ReAct workflow with support for
+tool argument modification and re-reasoning capabilities.
+Enhanced Features:
+    - Re-reasoning support when user modifies tool arguments
+    - Handles modified tool calls routing
+    - Backward compatible with existing routing logic
+Functions:
+    should_continue_to_approval: Routes to human approval, direct tool execution, or refinement
+    should_continue_after_approval: Routes to execution, rejection handling, or re-reasoning
+    should_continue_after_execution: Routes based on tool execution results
+New Routing Paths:
+    - Re-reasoning: When user requests changes and wants agent to reconsider
+    - Modified execution: When user modifies tool arguments but wants to proceed
+Environment Variables:
+    HUMAN_APPROVAL: When set to "False", automatically approves all tools and
+                   bypasses the human approval step. Defaults to "True".
+Example:
+    >>> # User modifies tool arguments
+    >>> state = {"needs_re_reasoning": True, "re_reasoning_feedback": "..."}
+    >>> next_node = should_continue_after_approval(state)
+    >>> # Returns "agent_reasoning" to re-evaluate
+"""
+from typing import Dict, Any, Literal
+import logging
+from constant import Constants
+logger = logging.getLogger("ReAct Decision Functions")
+def should_continue_to_approval(state: Dict[str, Any]) -> str:
+    """
+    Determine whether to proceed to human approval, auto-approval, or generate.
+    This function controls the flow after agent reasoning based on HUMAN_APPROVAL setting:
+    - If HUMAN_APPROVAL is True: Route to human_approval for manual approval
+    - If HUMAN_APPROVAL is False: Route to auto_approval for automatic approval
+    - If no tools selected: Route directly to generate (CHANGED from refinement)
+    - If force_generate is set: Route directly to generate
+    Args:
+        state: Current workflow state containing pending tool calls
+    Returns:
+        Next node name: "human_approval", "auto_approval", or "generate"
+    """
+    # Check if generate is forced (e.g., due to max iterations)
+    if state.get("force_refinement", False):  # Keep the flag name for backward compatibility
+        logger.info("📄 Force refinement flag set - routing to generate")
+        return "generate"
+    pending_tools = state.get("pending_tool_calls", [])
+    if not pending_tools:
+        logger.info("📄 No tools selected - routing to generate")
+        return "generate"
+    # Check if human approval is enabled
+    HUMAN_APPROVAL = True if Constants.HUMAN_APPROVAL == "true" else False
+    if not HUMAN_APPROVAL:
+        logger.info(f"📄 HUMAN_APPROVAL disabled - routing to auto-approval for {len(pending_tools)} tool call(s)")
+        return "auto_approval"
+    else:
+        logger.info(f"📄 HUMAN_APPROVAL enabled - routing to human approval for {len(pending_tools)} tool call(s)")
+        return "human_approval"
+def should_continue_after_approval(state: Dict[str, Any]) -> Literal["tool_execution", "tool_rejection_exit", "agent_reasoning"]:
+    """
+    Decide whether to execute tools, exit due to rejection, or re-reason.
+    Enhanced to support:
+    1. Execute approved tools (approved_tool_calls)
+    2. Exit if all rejected (no approved tools)
+    3. Re-reason if user requests it (needs_re_reasoning)
+    Args:
+        state: Current ReAct state after human approval
+    Returns:
+        Next node name: "tool_execution", "tool_rejection_exit", or "agent_reasoning"
+    """
+    approved_calls = state.get("approved_tool_calls", [])
+    needs_re_reasoning = state.get("needs_re_reasoning", False)
+    # PRIORITY 1: Check if re-reasoning is requested
+    if needs_re_reasoning:
+        logger.info("🔄 Re-reasoning requested - routing back to agent_reasoning")
+        return "agent_reasoning"
+    # PRIORITY 2: Check if there are approved tools to execute
+    if approved_calls:
+        logger.info(f"📄 Routing to tool execution for {len(approved_calls)} approved tool(s)")
+        return "tool_execution"
+    # PRIORITY 3: No approved tools and no re-reasoning means rejection
+    logger.info("📄 No approved tools (rejected) - routing to tool rejection exit")
+    return "tool_rejection_exit"
+def should_continue_after_execution(state: Dict[str, Any]) -> Literal["agent_reasoning", "generate"]:
+    """
+    Decide where to go after tool execution.
+    Strategy: ALWAYS route to agent_reasoning after tool execution.
+    Let the agent evaluate the results and intelligently decide whether to:
+    - Generate final response (no tools needed)
+    - Call additional tools (more information needed)
+    This gives the agent full control to decide based on the quality and
+    completeness of the tool results, regardless of which tools were used.
+    Args:
+        state: Current ReAct state after tool execution
+    Returns:
+        Next node name (always "agent_reasoning")
+    """
+    tool_results = state.get("tool_results", [])
+    if tool_results:
+        logger.info(f"📄 Tool execution complete ({len(tool_results)} results) - routing to agent reasoning for evaluation")
+        return "agent_reasoning"
+    # No tool results - this shouldn't happen but fallback to generate
+    logger.warning("⚠️ No tool results after execution - falling back to generate")
+    return "generate"

ComputeAgent/nodes/ReAct/generate_node.py ADDED Viewed

	@@ -0,0 +1,510 @@

+"""
+ReAct Generate Node - Simplified version with 3 clear paths
+Node that generates final response using:
+1. DirectAnswerChain for direct answers (no tools)
+2. ResearcherChain for researcher tool results
+3. ToolResultChain for other tool results
+All chains provide consistent formatting and professional presentation with memory context support.
+Independent implementation for ReAct workflow - no dependency on AgenticRAG.
+"""
+from typing import Dict, Any
+from chains.tool_result_chain import ToolResultChain
+from models.model_manager import ModelManager
+from constant import Constants
+import asyncio
+import logging
+import json
+from langgraph.config import get_stream_writer
+from langchain_core.messages import HumanMessage, SystemMessage
+# Initialize model manager for LLM loading
+model_manager = ModelManager()
+# Initialize logger for generate node
+logger = logging.getLogger("ReAct Generate Node")
+def _create_error_response(state: Dict[str, Any], query: str, error_msg: str) -> Dict[str, Any]:
+    """Create a standardized error response"""
+    final_response_dict = {
+        "query": query,
+        "final_response": f"I apologize, but I encountered an error: {error_msg}",
+        "sources": []
+    }
+    updated_state = state.copy()
+    updated_state["response"] = final_response_dict["final_response"]
+    updated_state["final_response_dict"] = final_response_dict
+    updated_state["current_step"] = "generate_complete"
+    # Send it via custom stream
+    writer = get_stream_writer()
+    writer({"final_response_dict": final_response_dict})
+    return updated_state
+async def _generate_deployment_instructions(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Generate deployment instructions when instance has been created.
+    Args:
+        state: Current state with instance_id and deployment info
+    Returns:
+        Updated state with deployment instructions
+    """
+    logger.info("📝 Generating deployment instructions")
+    # Extract deployment information
+    instance_id = state.get("instance_id", "")
+    instance_status = state.get("instance_status", "")
+    model_name = state.get("model_name", "Unknown Model")
+    model_info = state.get("model_info", {})
+    gpu_requirements = state.get("gpu_requirements", {})
+    estimated_gpu_memory = state.get("estimated_gpu_memory", 0)
+    # Get deployment configuration
+    location = model_info.get("location", "UAE-1")
+    gpu_type = model_info.get("GPU_type", "RTX 4090")
+    num_gpus = gpu_requirements.get(gpu_type, 1)
+    config = f"{num_gpus}x {gpu_type}"
+    # Determine capacity source
+    custom_capacity = state.get("custom_capacity", {})
+    capacity_source = "custom" if custom_capacity else "estimated"
+    # Build SSH command
+    ssh_command = f'ssh -i ~/.ssh/id_rsa -o "ProxyCommand=ssh bastion@ssh.hivecompute.ai %h" ubuntu@{instance_id}.ssh.hivecompute.ai'
+    # Get capacity estimation parameters
+    max_model_len = model_info.get("max_model_len", 2048)
+    max_num_seqs = model_info.get("max_num_seqs", 256)
+    max_batched_tokens = model_info.get("max_num_batched_tokens", 2048)
+    dtype = model_info.get("dtype", "BF16")
+    kv_cache_dtype = model_info.get("kv_cache_dtype", "auto")
+    gpu_memory_utilization = model_info.get("gpu_memory_utilization", 0.9)
+    # Use LLM to generate optimal vLLM command based on documentation and specs
+    logger.info("🤖 Using LLM to determine optimal vLLM parameters")
+    # Import vLLM documentation
+    try:
+        from vllm_engine_args import get_vllm_docs
+        vllm_docs = get_vllm_docs()
+    except ImportError:
+        logger.warning("⚠️ Could not import vllm_engine_args, using basic documentation")
+        vllm_docs = "Basic vLLM parameters: --model, --dtype, --max-model-len, --gpu-memory-utilization, --tensor-parallel-size, --enable-prefix-caching, --enable-chunked-prefill"
+    vllm_params_prompt = f"""You are an expert in vLLM deployment. Based on the model specifications and capacity estimation, generate an optimal vLLM serve command.
+**Model Information:**
+- Model: {model_name}
+- GPU Type: {gpu_type}
+- Number of GPUs: {num_gpus}
+- GPU Memory: {estimated_gpu_memory:.2f} GB
+- Location: {location}
+**Capacity Estimation Parameters:**
+- Max Model Length: {max_model_len}
+- Max Sequences: {max_num_seqs}
+- Max Batched Tokens: {max_batched_tokens}
+- Data Type: {dtype}
+- KV Cache dtype: {kv_cache_dtype}
+- GPU Memory Utilization: {gpu_memory_utilization}
+**vLLM Engine Arguments Documentation:**
+{vllm_docs}
+**Task:**
+Generate the optimal vLLM serve command for this deployment. Consider:
+1. Use the capacity estimation parameters provided
+2. For multi-GPU setups ({num_gpus} GPUs), add --tensor-parallel-size {num_gpus} if num_gpus > 1
+3. Add --enable-chunked-prefill if max_model_len > 8192 for better long context handling
+4. Use --quantization fp8 only if dtype contains 'fp8' or 'FP8'
+5. Always include --enable-prefix-caching for better performance
+6. Set --host 0.0.0.0 and --port 8888
+7. Use --download-dir /home/ubuntu/workspace/models
+8. Consider other relevant parameters from the documentation based on the model and hardware specs
+Return ONLY the complete vLLM command without any explanation, starting with 'vllm serve'."""
+    try:
+        from langchain_openai import ChatOpenAI
+        from constant import Constants
+        llm = ChatOpenAI(
+            base_url=Constants.LLM_BASE_URL,
+            api_key=Constants.LLM_API_KEY,
+            model=Constants.DEFAULT_LLM_NAME,
+            temperature=0.0
+        )
+        vllm_response = await llm.ainvoke(vllm_params_prompt)
+        vllm_command = vllm_response.content.strip()
+        logger.info(f"✅ Generated vLLM command: {vllm_command}")
+    except Exception as e:
+        logger.error(f"❌ Failed to generate vLLM command with LLM: {e}")
+        # Fallback to basic command
+        quantization = "fp8" if "fp8" in dtype.lower() else None
+        vllm_command = f'vllm serve {model_name} --download-dir /home/ubuntu/workspace/models --gpu-memory-utilization {gpu_memory_utilization} --max-model-len {max_model_len} --max-num-seqs {max_num_seqs} --max-num-batched-tokens {max_batched_tokens} --dtype {dtype}'
+        if quantization:
+            vllm_command += f' --quantization {quantization}'
+        if num_gpus > 1:
+            vllm_command += f' --tensor-parallel-size {num_gpus}'
+        vllm_command += f' --kv-cache-dtype {kv_cache_dtype} --enable-prefix-caching --host 0.0.0.0 --port 8888'
+    # Build curl test command
+    curl_command = f'''curl -k https://{instance_id}-8888.tenants.hivecompute.ai/v1/chat/completions \\
+  -H "Content-Type: application/json" \\
+  -d '{{
+    "model": "{model_name}",
+    "messages": [
+      {{"role": "user", "content": "What is the capital of France?"}}
+    ],
+    "max_tokens": 512
+  }}' '''
+    # Build complete deployment instructions response
+    final_response = f"""
+# 🚀 Deployment Instructions for {model_name}
+## ✅ Instance Created Successfully
+**Instance ID:** `{instance_id}`
+**Status:** `{instance_status}`
+**Location:** `{location}`
+**Configuration:** `{config}`
+---
+## 📊 Capacity Configuration
+- **GPU Memory Required:** {estimated_gpu_memory:.2f} GB
+- **GPU Type:** {gpu_type}
+- **Number of GPUs:** {num_gpus}
+- **Capacity Source:** {capacity_source}
+---
+## 🔐 Step 1: SSH to the Instance
+```bash
+{ssh_command}
+```
+---
+## 📁 Step 2: Create Models Directory
+Once connected via SSH, create the models directory inside the workspace:
+```bash
+mkdir -p /home/ubuntu/workspace/models
+mkdir -p /home/ubuntu/workspace/tmpdir
+```
+**Note:** Cannot use docker file in HiveCompute since there is no VM support. Use an instance from HiveCompute with Template with Pytorch.
+---
+## 📦 Step 3: Install Dependencies (Using UV)
+Install UV package manager:
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+source $HOME/.local/bin/env
+```
+Create and activate environment:
+```bash
+uv venv --python 3.12
+source .venv/bin/activate
+```
+Install vLLM and dependencies:
+```bash
+uv pip install vllm==0.11.0 ray[default]
+```
+---
+## 🤖 Step 4: Start vLLM Server
+Run the vLLM server with the following configuration:
+```bash
+{vllm_command}
+```
+**Configuration Details:**
+The vLLM command above was intelligently generated based on:
+- **Model Specifications:** {model_name} with {num_gpus}x {gpu_type}
+- **Capacity Estimation:** {estimated_gpu_memory:.2f} GB GPU memory, {int(gpu_memory_utilization * 100)}% utilization
+- **Context Length:** {max_model_len} tokens
+- **Batch Configuration:** {max_num_seqs} max sequences, {max_batched_tokens} max batched tokens
+- **Data Type:** {dtype} with {kv_cache_dtype} KV cache
+- **vLLM Documentation:** Optimized parameters from https://docs.vllm.ai/en/v0.7.2/serving/engine_args.html
+The LLM analyzed your deployment requirements and selected optimal parameters including tensor parallelism, chunked prefill, and caching strategies.
+---
+## 🧪 Step 5: Test the Deployment
+Test your deployed model with a curl command:
+```bash
+{curl_command}
+```
+This will send a test request to your model and verify it's responding correctly.
+---
+## 📝 Additional Notes
+- The vLLM server will download the model to `/home/ubuntu/workspace/models` on first run
+- Make sure to monitor GPU memory usage during model loading
+- The instance is accessible via the HiveCompute tenant URL: `https://{instance_id}-8888.tenants.hivecompute.ai`
+- For production use, consider setting up monitoring and health checks
+---
+**Deployment Complete! 🎉**
+"""
+    final_response_dict = {
+        "query": f"Deploy model {model_name}",
+        "final_response": final_response,
+        "instance_id": instance_id,
+        "instance_status": instance_status,
+        "sources": []
+    }
+    # Update state
+    updated_state = state.copy()
+    updated_state["response"] = final_response
+    updated_state["final_response_dict"] = final_response_dict
+    updated_state["current_step"] = "generate_complete"
+    # Remove tools to avoid serialization issues
+    if "tools" in updated_state:
+        del updated_state["tools"]
+    # Send via custom stream
+    writer = get_stream_writer()
+    writer({"final_response_dict": final_response_dict})
+    logger.info("✅ Deployment instructions generated successfully")
+    return updated_state
+async def _handle_tool_results(state: Dict[str, Any], query: str, user_id: str, session_id: str,
+                             tool_results: list, memory_context: str, llm) -> Dict[str, Any]:
+    """Handle general tool results using ToolResultChain"""
+    try:
+        logger.info(f"🤖 Synthesizing tool results using ToolResultChain...")
+        tool_result_chain = ToolResultChain(llm=llm)
+        formatted_response = await tool_result_chain.ainvoke(query, tool_results, memory_context)
+        final_response_dict = {
+            "query": query,
+            "final_response": formatted_response,
+            "sources": []
+        }
+        updated_state = state.copy()
+        updated_state["response"] = formatted_response
+        updated_state["final_response_dict"] = final_response_dict
+        updated_state["current_step"] = "generate_complete"
+        # Send it via custom stream
+        writer = get_stream_writer()
+        writer({"final_response_dict": final_response_dict})
+        logger.info("✅ Tool results synthesized successfully")
+        return updated_state
+    except Exception as e:
+        logger.error(f"❌ ToolResultChain Error: {str(e)}")
+        # Final fallback to raw content
+        fallback_response = "I executed the requested tools but encountered formatting issues. Here are the raw results:\n\n"
+        for i, result in enumerate(tool_results, 1):
+            content = result.content if hasattr(result, 'content') else str(result)
+            fallback_response += f"Tool {i}: {content}\n"
+        final_response_dict = {
+            "query": query,
+            "final_response": fallback_response,
+            "sources": []
+        }
+        updated_state = state.copy()
+        updated_state["response"] = fallback_response
+        updated_state["final_response_dict"] = final_response_dict
+        updated_state["current_step"] = "generate_complete"
+        # Send it via custom stream
+        writer = get_stream_writer()
+        writer({"final_response_dict": final_response_dict})
+        logger.info("✅ Tool results formatted using raw content fallback")
+        return updated_state
+async def generate_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Simple response generation with 4 clear paths:
+    1. Deployment Instructions (when instance_created == True)
+    2. Direct Answer (when current_step == "direct_answer_complete")
+    3. Researcher Results (when researcher_used == True)
+    4. General Tool Results (when tool_results exist but no researcher)
+    Args:
+        state: Current ReAct state
+    Returns:
+        Updated state with generated response
+    """
+    logger.info("🤖 Starting response generation")
+    # Extract common variables
+    query = state.get("query", "")
+    user_id = state.get("user_id", "")
+    session_id = state.get("session_id", "")
+    current_step = state.get("current_step", "")
+    tool_results = state.get("tool_results", [])
+    existing_response = state.get("response", "")
+    researcher_used = state.get("researcher_used", False)
+    instance_created = state.get("instance_created", False)
+    # Debug logging to help diagnose path selection
+    logger.info(f"🔍 DEBUG - instance_created: {instance_created}, researcher_used: {researcher_used}, tool_results count: {len(tool_results)}, current_step: {current_step}, existing_response: {bool(existing_response)}")
+    # Special handling for deployment workflow
+    if instance_created:
+        logger.info("🚀 Deployment mode detected - generating deployment instructions")
+        return await _generate_deployment_instructions(state)
+    # Build memory context once
+    memory_context = ""
+    if user_id and session_id:
+        try:
+            from helpers.memory import get_memory_manager
+            memory_manager = get_memory_manager()
+            memory_context = await memory_manager.build_context_for_node(user_id, session_id, "general")
+            if memory_context:
+                logger.info("🧠 Using memory context for response generation")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load memory context: {e}")
+    # Get model info once
+    model_name = Constants.DEFAULT_LLM_NAME
+    if hasattr(state.get("refining_llm"), 'model_name'):
+        model_name = state.get("refining_llm").model_name
+    try:
+        llm = await model_manager.load_llm_model(model_name)
+    except Exception as e:
+        logger.error(f"❌ Failed to load model {model_name}: {e}")
+        return _create_error_response(state, query, "Failed to load language model")
+    # If no tool results, generate a direct response using LLM
+    if not tool_results:
+        logger.info("ℹ️ No tool results found - generating LLM response")
+        system_prompt = """You are a helpful AI assistant. The user has made a request and you need to provide a comprehensive and helpful response.
+        If there's an existing response or context, acknowledge it and build upon it.
+        Be professional, clear, and concise in your response.
+        If you don't have specific information to provide, politely explain what you can help with instead."""
+        context_info = f"Query: {query}"
+        if existing_response:
+            context_info += f"\nExisting context: {existing_response}"
+        if memory_context:
+            context_info += f"\nMemory context: {memory_context}"
+        messages = [
+            SystemMessage(content=system_prompt),
+            HumanMessage(content=context_info)
+        ]
+        try:
+            response = await llm.ainvoke(messages)
+            direct_response = response.content
+            # Create clean copy without tools (tools not serializable)
+            updated_state = state.copy()
+            updated_state["response"] = direct_response
+            updated_state["current_step"] = "generate_complete"
+            if "tools" in updated_state:
+                del updated_state["tools"]
+            logger.info("✅ Generated LLM response successfully")
+            return updated_state
+        except Exception as e:
+            logger.error(f"❌ Error generating LLM response: {str(e)}")
+            return _create_error_response(state, query, f"Failed to generate response: {str(e)}")
+    # If we have tool results, use LLM to synthesize them
+    logger.info("🔧 Processing tool results using LLM synthesis")
+    # Prepare tool results summary
+    tool_results_summary = ""
+    for i, result in enumerate(tool_results, 1):
+        content = result.content if hasattr(result, 'content') else str(result)
+        tool_name = getattr(result, 'name', f'Tool {i}')
+        tool_results_summary += f"\n{tool_name}: {content}\n"
+    system_prompt = """You are a helpful AI assistant that synthesizes tool execution results into a comprehensive response.
+    Your task is to:
+    1. Analyze the tool results provided
+    2. Generate a clear, professional response that summarizes what was accomplished
+    3. Present the information in a well-structured format
+    4. If there are any errors or issues, explain them clearly
+    5. Be concise but thorough in your explanation
+    Always maintain a helpful and professional tone."""
+    context_info = f"Query: {query}\n\nTool Results:{tool_results_summary}"
+    if memory_context:
+        context_info += f"\nMemory context: {memory_context}"
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=context_info)
+    ]
+    try:
+        response = await llm.ainvoke(messages)
+        synthesized_response = response.content
+        # Create clean copy without tools (tools not serializable)
+        updated_state = state.copy()
+        updated_state["response"] = synthesized_response
+        updated_state["current_step"] = "generate_complete"
+        if "tools" in updated_state:
+            del updated_state["tools"]
+        logger.info("✅ Synthesized tool results successfully using LLM")
+        return updated_state
+    except Exception as e:
+        logger.error(f"❌ Error synthesizing tool results with LLM: {str(e)}")
+        # Fallback to ToolResultChain if LLM synthesis fails
+        logger.info("🔄 Falling back to ToolResultChain")
+        return await _handle_tool_results(state, query, user_id, session_id, tool_results, memory_context, llm)

ComputeAgent/nodes/ReAct/human_approval_node.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+Human Approval Node for ReAct Pattern - Enhanced Version
+This module implements an enhanced human-in-the-loop approval system for the ReAct workflow
+using LangGraph's interrupt() for API-friendly tool approval with argument modification support.
+Key Features:
+    - LangGraph interrupt() for clean API integration
+    - Individual tool approval/rejection/modification
+    - Argument modification support with re-execution
+    - Batch approval support
+    - State management for approved/rejected/modified tools
+Enhanced Capabilities:
+    - Approve: Execute tool with original arguments
+    - Reject: Skip tool execution
+    - Modify: Change tool arguments and re-execute reasoning
+    - Batch operations: Approve/reject/modify multiple tools at once
+State Updates:
+    After approval, the state is updated with:
+    - approved_tool_calls: List of tools approved for execution (may include modified args)
+    - rejected_tool_calls: List of tools rejected by user
+    - modified_tool_calls: List of tools with modified arguments
+    - needs_re_reasoning: Flag to indicate if agent should re-reason with modified tools
+    - pending_tool_calls: Cleared after approval process
+Example API Request:
+    >>> # Approve all tools
+    >>> user_decision = {
+    ...     "action": "approve_all"
+    ... }
+    >>> # Approve specific tools
+    >>> user_decision = {
+    ...     "action": "approve_selected",
+    ...     "tool_indices": [0, 2]  # Approve tools at index 0 and 2
+    ... }
+    >>> # Reject all tools
+    >>> user_decision = {
+    ...     "action": "reject_all"
+    ... }
+    >>> # Modify tool arguments
+    >>> user_decision = {
+    ...     "action": "modify_and_approve",
+    ...     "modifications": [
+    ...         {
+    ...             "tool_index": 0,
+    ...             "new_args": {"query": "modified search query"},
+    ...             "approve": True
+    ...         },
+    ...         {
+    ...             "tool_index": 1,
+    ...             "new_args": {"calculation": "2+2"},
+    ...             "approve": False
+    ...         }
+    ...     ]
+    ... }
+    >>> # Request re-reasoning with tool context
+    >>> user_decision = {
+    ...     "action": "request_re_reasoning",
+    ...     "feedback": "Please search for more recent information"
+    ... }
+"""
+from typing import Dict, Any, List
+import logging
+from langgraph.types import interrupt
+logger = logging.getLogger("ReAct Human Approval")
+def _get_tools_from_registry(workflow_id: int):
+    """
+    Get tools from the global registry using workflow ID.
+    """
+    from graph.graph_ReAct import _TOOLS_REGISTRY
+    tools = _TOOLS_REGISTRY.get(workflow_id)
+    if tools is None:
+        logger.warning(f"⚠️ Tools not found in registry for workflow_id: {workflow_id}")
+        return []
+    return tools
+async def human_approval_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Enhanced node that handles human approval for tool execution using LangGraph interrupt.
+    Supports:
+    1. Approve all tools
+    2. Approve selected tools by index
+    3. Reject all tools
+    4. Reject selected tools by index
+    5. Modify tool arguments and approve/reject
+    6. Request re-reasoning with feedback
+    Args:
+        state: Current ReAct state with pending tool calls
+    Returns:
+        Updated state with approved, rejected, and/or modified tool calls
+    """
+    pending_tools = state.get("pending_tool_calls", [])
+    if not pending_tools:
+        logger.info("ℹ️ No pending tool calls for approval")
+        return state
+    logger.info(f"👤 Requesting human approval for {len(pending_tools)} tool call(s)")
+    # Get tools from registry for description lookup
+    workflow_id = state.get("workflow_id")
+    tools = _get_tools_from_registry(workflow_id) if workflow_id else []
+    # Prepare approval data to send to user
+    approval_data = {
+        "tool_calls": [
+            {
+                "index": i,
+                "id": tool.get("id"),
+                "name": tool.get("name"),
+                "args": tool.get("args"),
+                "description": _get_tool_description(tool.get("name"), tools)
+            }
+            for i, tool in enumerate(pending_tools)
+        ],
+        "query": state.get("query", ""),
+        "total_tools": len(pending_tools)
+    }
+    # ✨ USE INTERRUPT - This pauses execution and waits for user input
+    user_decision = interrupt(approval_data)
+    logger.info(f"📥 Received tool approval decision: {user_decision.get('action', 'unknown')}")
+    # Process the approval decision
+    return await _process_tool_approval_decision(state, pending_tools, user_decision)
+async def _process_tool_approval_decision(
+    state: Dict[str, Any],
+    pending_tools: List[Dict[str, Any]],
+    user_decision: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Process user's tool approval decision and update state accordingly.
+    Args:
+        state: Current workflow state
+        pending_tools: List of pending tool calls
+        user_decision: User's decision dictionary
+    Returns:
+        Updated state with approval results
+    """
+    action = user_decision.get("action", "reject_all")
+    approved_calls = []
+    rejected_calls = []
+    modified_calls = []
+    needs_re_reasoning = False
+    # Handle different action types
+    if action == "approve_all":
+        logger.info("✅ User approved all tools")
+        approved_calls = pending_tools.copy()
+    elif action == "approve_selected":
+        tool_indices = user_decision.get("tool_indices", [])
+        logger.info(f"✅ User approved tools at indices: {tool_indices}")
+        for i, tool in enumerate(pending_tools):
+            if i in tool_indices:
+                approved_calls.append(tool)
+            else:
+                rejected_calls.append(tool)
+    elif action == "reject_all":
+        logger.info("❌ User rejected all tools")
+        rejected_calls = pending_tools.copy()
+    elif action == "reject_selected":
+        tool_indices = user_decision.get("tool_indices", [])
+        logger.info(f"❌ User rejected tools at indices: {tool_indices}")
+        for i, tool in enumerate(pending_tools):
+            if i in tool_indices:
+                rejected_calls.append(tool)
+            else:
+                approved_calls.append(tool)
+    elif action == "modify_and_approve":
+        modifications = user_decision.get("modifications", [])
+        logger.info(f"🔧 User requested modifications for {len(modifications)} tool(s)")
+        # Create a mapping of tool indices to modifications
+        mod_map = {mod["tool_index"]: mod for mod in modifications}
+        for i, tool in enumerate(pending_tools):
+            if i in mod_map:
+                mod = mod_map[i]
+                modified_tool = tool.copy()
+                # Update arguments
+                modified_tool["args"] = mod.get("new_args", tool["args"])
+                modified_calls.append({
+                    "original": tool,
+                    "modified": modified_tool,
+                    "index": i
+                })
+                # Decide if this modified tool should be approved or rejected
+                if mod.get("approve", True):
+                    approved_calls.append(modified_tool)
+                    logger.info(f"✅ Modified and approved tool at index {i}: {modified_tool['name']}")
+                else:
+                    rejected_calls.append(modified_tool)
+                    logger.info(f"❌ Modified but rejected tool at index {i}: {modified_tool['name']}")
+            else:
+                # No modification for this tool, keep original
+                approved_calls.append(tool)
+    elif action == "request_re_reasoning":
+        logger.info("🔄 User requested re-reasoning")
+        needs_re_reasoning = True
+        rejected_calls = pending_tools.copy()  # Reject current tools
+        # Store user feedback for re-reasoning
+        state["re_reasoning_feedback"] = user_decision.get("feedback", "")
+    else:
+        logger.warning(f"⚠️ Unknown action '{action}', defaulting to reject all")
+        rejected_calls = pending_tools.copy()
+    # Update state with approval results
+    updated_state = state.copy()
+    updated_state["approved_tool_calls"] = approved_calls
+    updated_state["rejected_tool_calls"] = rejected_calls
+    updated_state["modified_tool_calls"] = modified_calls
+    updated_state["needs_re_reasoning"] = needs_re_reasoning
+    updated_state["pending_tool_calls"] = []  # Clear pending calls
+    updated_state["current_step"] = "human_approval_complete"
+    # NOTE: Don't remove tools here - tool_execution needs them next
+    # Tools are only removed in terminal nodes (generate, tool_rejection_exit)
+    logger.info(
+        f"📊 Approval results: "
+        f"{len(approved_calls)} approved, "
+        f"{len(rejected_calls)} rejected, "
+        f"{len(modified_calls)} modified, "
+        f"re-reasoning: {needs_re_reasoning}"
+    )
+    return updated_state
+def _get_tool_description(tool_name: str, tools: List[Any]) -> str:
+    """
+    Get concise description for a tool by name (first sentence only).
+    Args:
+        tool_name: Name of the tool
+        tools: List of available tool objects
+    Returns:
+        First sentence of tool description or empty string
+    """
+    for tool in tools:
+        if hasattr(tool, 'name') and tool.name == tool_name:
+            full_description = getattr(tool, 'description', '')
+            if full_description:
+                # Extract first sentence (split by period, newline, or question mark)
+                import re
+                # Split by sentence-ending punctuation
+                sentences = re.split(r'[.!?\n]+', full_description)
+                # Return first non-empty sentence
+                first_sentence = next((s.strip() for s in sentences if s.strip()), '')
+                return first_sentence if first_sentence else full_description
+            return ''
+    return ''

ComputeAgent/nodes/ReAct/tool_execution_node.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from typing import Dict, Any
+from langchain_core.messages import ToolMessage
+import json
+import logging
+logger = logging.getLogger("ReAct Tool Execution")
+def _get_tools_from_registry(workflow_id: int):
+    """
+    Get tools from the global registry using workflow ID.
+    This avoids storing non-serializable tool objects in state.
+    """
+    from graph.graph_ReAct import _TOOLS_REGISTRY
+    tools = _TOOLS_REGISTRY.get(workflow_id)
+    if tools is None:
+        raise ValueError(f"Tools not found in registry for workflow_id: {workflow_id}")
+    return tools
+async def tool_execution_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Node that executes approved tools and handles special researcher tool case.
+    Args:
+        state: Current ReAct state with approved tool calls
+    Returns:
+        Updated state with tool results and special handling for researcher
+    """
+    approved_calls = state.get("approved_tool_calls", [])
+    if not approved_calls:
+        logger.info("ℹ️ No approved tool calls to execute")
+        return state
+    # Get tools from registry using workflow_id (avoids serialization issues)
+    workflow_id = state.get("workflow_id")
+    if not workflow_id:
+        logger.error("❌ No workflow_id in state - cannot retrieve tools")
+        return state
+    try:
+        tools = _get_tools_from_registry(workflow_id)
+        tools_dict = {tool.name: tool for tool in tools}
+        logger.info(f"✅ Retrieved {len(tools)} tools from registry")
+    except ValueError as e:
+        logger.error(f"❌ {e}")
+        return state
+    tool_results = []
+    researcher_executed = False
+    instance_created = False
+    logger.info(f"⚡ Executing {len(approved_calls)} approved tool call(s)")
+    for tool_call in approved_calls:
+        tool_name = tool_call['name']
+        try:
+            tool = tools_dict.get(tool_name)
+            if not tool:
+                error_msg = f"Error: Tool {tool_name} not found."
+                logger.error(error_msg)
+                tool_results.append(
+                    ToolMessage(
+                        content=error_msg,
+                        tool_call_id=tool_call['id']
+                    )
+                )
+                continue
+            logger.info(f"🔄 Executing tool: {tool_name}")
+            result = await tool.ainvoke(tool_call['args'])
+            # Special handling for create_compute_instance tool
+            if tool_name == "create_compute_instance":
+                instance_created = True
+                logger.info("🚀 create_compute_instance tool executed - storing instance details")
+                # Extract instance_id and status from result
+                # Result may be a string containing JSON or a dict
+                try:
+                    logger.info(f"📋 Raw result type: {type(result)}, value: {result}")
+                    if isinstance(result, str):
+                        # Parse JSON string to dict
+                        result_dict = json.loads(result)
+                    elif isinstance(result, dict):
+                        result_dict = result
+                    else:
+                        result_dict = {}
+                    # The result may be nested in a 'result' key
+                    if "result" in result_dict and isinstance(result_dict["result"], dict):
+                        instance_data = result_dict["result"]
+                    else:
+                        instance_data = result_dict
+                    instance_id = instance_data.get("id", "")
+                    instance_status = str(instance_data.get("status", ""))
+                    logger.info(f"📋 Extracted instance_id: '{instance_id}', status: '{instance_status}'")
+                    # Store in state for generate node
+                    state["instance_id"] = instance_id
+                    state["instance_status"] = instance_status
+                    state["instance_created"] = True
+                    logger.info(f"✅ Instance created and stored in state: {instance_id} (status: {instance_status})")
+                except (json.JSONDecodeError, AttributeError) as e:
+                    logger.warning(f"⚠️ Could not parse result from create_compute_instance: {e}")
+                    logger.warning(f"⚠️ Result: {result}")
+                    state["instance_created"] = False
+                # Store the result for tool results
+                tool_results.append(
+                    ToolMessage(
+                        content=str(result),
+                        tool_call_id=tool_call['id']
+                    )
+                )
+            # Special handling for researcher tool
+            elif tool_name == "research":
+                researcher_executed = True
+                logger.info("🌐 Researcher tool executed - storing results for generation")
+                # Set flag to indicate researcher was used
+                state["researcher_used"] = True
+                # Store the research result for the generate node to use
+                tool_results.append(
+                    ToolMessage(
+                        content=str(result),
+                        tool_call_id=tool_call['id']
+                    )
+                )
+                logger.info("✅ Researcher tool completed - results stored for generation")
+            else:
+                # Regular tool execution
+                tool_results.append(
+                    ToolMessage(
+                        content=str(result),
+                        tool_call_id=tool_call['id']
+                    )
+                )
+                logger.info(f"✅ Tool {tool_name} executed successfully")
+        except Exception as e:
+            error_msg = f"Error executing tool {tool_name}: {str(e)}"
+            logger.error(error_msg)
+            tool_results.append(
+                ToolMessage(
+                    content=error_msg,
+                    tool_call_id=tool_call['id']
+                )
+            )
+    # Update state with execution results
+    updated_state = state.copy()
+    # Append new tool results to existing ones for multi-tool scenarios
+    existing_results = updated_state.get("tool_results", [])
+    updated_state["tool_results"] = existing_results + tool_results
+    updated_state["messages"] = state["messages"] + tool_results
+    updated_state["approved_tool_calls"] = []  # Clear approved calls
+    updated_state["researcher_executed"] = researcher_executed
+    updated_state["skip_refinement"] = researcher_executed  # Skip refinement if researcher executed
+    updated_state["current_step"] = "tool_execution_complete"
+    # Ensure instance creation flags are preserved in updated_state
+    if state.get("instance_created"):
+        updated_state["instance_created"] = state["instance_created"]
+        updated_state["instance_id"] = state.get("instance_id", "")
+        updated_state["instance_status"] = state.get("instance_status", "")
+        logger.info(f"✅ Instance creation flags preserved in state: {updated_state['instance_id']}")
+    # Clear force_refinement flag after tool execution
+    if "force_refinement" in updated_state:
+        del updated_state["force_refinement"]
+    # NOTE: Don't remove tools here - agent_reasoning needs them next
+    # Tools are only removed in terminal nodes (generate, tool_rejection_exit)
+    logger.info(f"📈 Tool execution completed: {len(tool_results)} new results, {len(updated_state['tool_results'])} total results")
+    return updated_state

ComputeAgent/nodes/ReAct/tool_rejection_exit_node.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from typing import Dict, Any
+from langchain_core.messages import HumanMessage
+from models.model_manager import ModelManager
+from constant import Constants
+import logging
+logger = logging.getLogger("ReAct Tool Rejection Exit")
+# Initialize model manager for LLM loading
+model_manager = ModelManager()
+async def tool_rejection_exit_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Node that handles the case when human declines all tool executions.
+    Provides a helpful, personalized response using memory context and LLM.
+    Args:
+        state: Current ReAct state with memory fields
+    Returns:
+        Updated state with helpful exit message and final response
+    """
+    logger.info("🚪 User declined all tool executions - generating helpful response")
+    # Get original query and user context
+    query = state.get("query", "")
+    user_id = state.get("user_id", "")
+    session_id = state.get("session_id", "")
+    # Build memory context for personalized response
+    memory_context = ""
+    if user_id and session_id:
+        try:
+            from helpers.memory import get_memory_manager
+            memory_manager = get_memory_manager()
+            memory_context = await memory_manager.build_context_for_node(user_id, session_id, "general")
+            if memory_context:
+                logger.info(f"🧠 Using memory context for tool rejection response")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load memory context for tool rejection: {e}")
+    # Generate a more helpful and personalized response using LLM
+    try:
+        # Load LLM for generating helpful response
+        llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_NAME)
+        # Get information about what tools were proposed
+        pending_tool_calls = state.get("pending_tool_calls", [])
+        tool_names = [tool.get('name', 'unknown tool') for tool in pending_tool_calls] if pending_tool_calls else ["tools"]
+        # Create prompt for helpful response
+        system_prompt = Constants.GENERAL_SYSTEM_PROMPT + r"""
+            You are ComputeAgent, a helpful AI assistant. The user has chosen **not to use** the recommended {', '.join(tool_names)} for their query.
+            Your task is to respond in a **positive, supportive, and helpful way** that:
+            1. Respectfully acknowledges their choice.
+            2. Suggests alternative ways to assist them.
+            3. Offers ideas on how they might **rephrase or clarify** their query for better results.
+            4. Personalizes the response using any available conversation context.
+            User's Query: {query}
+            {f"Conversation Context: {memory_context}" if memory_context else ""}
+            Provide a **helpful, encouraging, and concise response** (2-3 sentences) that guides the user toward next steps without pressuring them to use the tool.
+            """
+        response = await llm.ainvoke([HumanMessage(content=system_prompt)])
+        exit_message = response.content.strip()
+        logger.info(f"🤖 Generated personalized tool rejection response for user {user_id}, session {session_id}")
+    except Exception as e:
+        logger.warning(f"⚠️ Could not generate LLM response for tool rejection: {e}")
+        # Fallback to enhanced static message
+        if memory_context:
+            exit_message = f"I understand you'd prefer not to use the suggested tools. Based on our conversation, I can try to help you in other ways. Could you please rephrase your question or let me know what specific information you're looking for? I'm here to assist you however I can."
+        else:
+            exit_message = "I understand you'd prefer not to use the suggested tools. I'm happy to help you in other ways! Could you please rephrase your question or provide more details about what you're looking for? I'm here to assist you with direct answers whenever possible."
+    # Update state with final response
+    updated_state = state.copy()
+    updated_state["response"] = exit_message
+    updated_state["current_step"] = "tool_rejection_exit"
+    # Remove tools from state to avoid serialization issues
+    if "tools" in updated_state:
+        del updated_state["tools"]
+    logger.info("✅ Tool rejection exit complete with helpful response")
+    return updated_state

ComputeAgent/nodes/ReAct_DeployModel/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+ReAct DeployModel Package
+This package contains specialized nodes for the model deployment workflow.
+Each node handles a specific aspect of the deployment process:
+- extract_model_info: Extracts model information from user queries
+- generate_model_name: Generates model names when information is unknown
+- capacity_estimation: Estimates compute capacity requirements
+Author: ComputeAgent Team
+License: Private
+"""

ComputeAgent/nodes/ReAct_DeployModel/capacity_approval.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+Capacity Approval Node Using LangGraph Interrupt
+This version uses LangGraph's native interrupt() for human-in-the-loop approval.
+Much cleaner than the previous implementation!
+Key Changes:
+1. Uses interrupt() to pause execution and wait for approval
+2. Returns approval decision directly from interrupt
+3. Simpler state management
+"""
+import logging
+from typing import Dict, Any
+from langgraph.types import interrupt
+from constant import Constants
+logger = logging.getLogger("CapacityApproval")
+LOCATION_GPU_MAP = {
+    "France": ["RTX 4090"],
+    "UAE-1": ["RTX 4090"],
+    "Texas": ["RTX 5090"],
+    "UAE-2": ["RTX 5090"]
+}
+# Reverse mapping for finding locations by GPU type
+GPU_LOCATION_MAP = {}
+for location, gpu_types in LOCATION_GPU_MAP.items():
+    for gpu_type in gpu_types:
+        if gpu_type not in GPU_LOCATION_MAP:
+            GPU_LOCATION_MAP[gpu_type] = []
+        GPU_LOCATION_MAP[gpu_type].append(location)
+async def capacity_approval_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Node that handles human approval for capacity estimation using LangGraph interrupt.
+    Uses interrupt() to pause execution and wait for approval decision from API/UI.
+    Args:
+        state: Current deployment state with capacity estimation results
+    Returns:
+        Updated state with approval results
+    """
+    logger.info("👤 Starting capacity estimation approval process")
+    # Check if capacity estimation was successful
+    if state.get("capacity_estimation_status") != "success":
+        logger.error("❌ Cannot approve capacity - estimation not successful")
+        state["capacity_approval_status"] = "error"
+        state["error"] = "Capacity estimation must be successful before approval"
+        return state
+    # Extract capacity estimation details
+    model_name = state.get("model_name", "Unknown Model")
+    estimated_gpu_memory = state.get("estimated_gpu_memory", 0)
+    gpu_requirements = state.get("gpu_requirements", {})
+    logger.info(f"📊 Requesting approval for {model_name}: {estimated_gpu_memory:.2f} GB")
+    # Prepare approval data to send to user
+    capacity_response = state.get("response", "")
+    approval_data = {
+        "model_name": model_name,
+        "estimated_gpu_memory": estimated_gpu_memory,
+        "gpu_requirements": gpu_requirements,
+        "formatted_response": capacity_response,
+        "model_info": state.get("model_info", {}),
+        "capacity_estimate": state.get("capacity_estimate", {}),
+        "gpu_type": state.get("model_info", {}).get("GPU_type", "RTX 4090"),
+        "location": state.get("model_info", {}).get("location", "UAE-1"),
+        "cost_estimates": state.get("capacity_estimate", {}).get("cost_estimates", {})
+    }
+    # ✨ USE INTERRUPT - This pauses execution and waits for user input
+    user_decision = interrupt(approval_data)
+    logger.info(f"📥 Received approval decision: {user_decision}")
+    # Process the approval decision
+    if isinstance(user_decision, dict):
+        capacity_approved = user_decision.get("capacity_approved")
+        custom_config = user_decision.get("custom_config", {})
+        needs_re_estimation = user_decision.get("needs_re_estimation", False)
+        # Handle re-estimation FIRST (highest priority)
+        if needs_re_estimation is True:
+            logger.info(f"🔄 Re-estimation requested with custom config")
+            # Update model_info with new config
+            model_info = state.get("model_info", {}).copy()
+            model_info.update(custom_config)
+            state["model_info"] = model_info
+            # Clear previous estimation results
+            state["capacity_estimate"] = {}
+            state["estimated_gpu_memory"] = 0
+            state["gpu_requirements"] = {}
+            # Set routing flags
+            state["needs_re_estimation"] = True
+            state["capacity_approval_status"] = "re_estimation_requested"
+            state["current_step"] = "capacity_re_estimation"
+            state["capacity_approved"] = None
+            logger.info(f"🔄 State prepared for re-estimation")
+            return state
+        # Handle approval
+        if capacity_approved is True:
+            logger.info(f"✅ Capacity estimation approved for {model_name}")
+            state["capacity_approved"] = True
+            state["capacity_approval_status"] = "approved"
+            state["current_step"] = "capacity_approved"
+            state["needs_re_estimation"] = False
+            # Apply custom configuration if provided
+            if custom_config:
+                logger.info(f"🔧 Applying custom inference configuration")
+                model_info = state.get("model_info", {}).copy()
+                model_info.update(custom_config)
+                state["model_info"] = model_info
+        # Handle rejection
+        elif capacity_approved is False:
+            logger.info(f"❌ Capacity estimation rejected for {model_name}")
+            state["capacity_approved"] = False
+            state["capacity_approval_status"] = "rejected"
+            state["current_step"] = "capacity_rejected"
+            state["needs_re_estimation"] = False
+    else:
+        # Simple boolean response (backward compatibility)
+        if user_decision:
+            logger.info(f"✅ Capacity estimation approved for {model_name}")
+            state["capacity_approved"] = True
+            state["capacity_approval_status"] = "approved"
+            state["current_step"] = "capacity_approved"
+            state["needs_re_estimation"] = False
+        else:
+            logger.info(f"❌ Capacity estimation rejected for {model_name}")
+            state["capacity_approved"] = False
+            state["capacity_approval_status"] = "rejected"
+            state["current_step"] = "capacity_rejected"
+            state["needs_re_estimation"] = False
+    return state
+async def auto_capacity_approval_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Node that automatically approves capacity estimation when human approval is disabled.
+    Args:
+        state: Current deployment state with capacity estimation results
+    Returns:
+        Updated state with automatic approval
+    """
+    logger.info("🤖 Auto-approving capacity estimation")
+    # Check if capacity estimation was successful
+    if state.get("capacity_estimation_status") != "success":
+        logger.error("❌ Cannot auto-approve capacity - estimation not successful")
+        state["capacity_approval_status"] = "error"
+        state["error"] = "Capacity estimation must be successful before auto-approval"
+        return state
+    model_name = state.get("model_name", "Unknown Model")
+    # Automatically approve the capacity estimation
+    state["capacity_approved"] = True
+    state["capacity_approval_status"] = "auto_approved"
+    state["custom_capacity"] = {}
+    state["current_step"] = "capacity_auto_approved"
+    logger.info(f"✅ Capacity estimation auto-approved for {model_name}")
+    return state

ComputeAgent/nodes/ReAct_DeployModel/capacity_estimation.py ADDED Viewed

	@@ -0,0 +1,387 @@

+"""
+Capacity Estimation Node
+This node handles the estimation of compute capacity requirements for model deployment.
+Currently minimal implementation - placeholder for future capacity estimation logic.
+Key Features:
+    - Compute capacity estimation (placeholder)
+    - Resource requirement analysis (placeholder)
+    - State management for workflow
+Author: ComputeAgent Team
+License: Private
+"""
+import logging
+import math
+from typing import Dict, Any
+logger = logging.getLogger("CapacityEstimation")
+# Mapping dtype to factor (bytes per parameter)
+DTYPE_FACTOR = {
+    # Standard PyTorch dtypes
+    "auto": 2,
+    "half": 2,
+    "float16": 2,
+    "fp16": 2,
+    "bfloat16": 2,
+    "bf16": 2,
+    "float": 4,
+    "float32": 4,
+    "fp32": 4,
+    # Quantized dtypes
+    "fp8": 1,
+    "fp8_e4m3": 1,
+    "fp8_e5m2": 1,
+    "f8_e4m3": 1,  # HuggingFace naming convention
+    "f8_e5m2": 1,
+    "int8": 1,
+    "int4": 0.5,
+}
+KV_CACHE_DTYPE_FACTOR = {
+    "auto": None,  # Will be set to model dtype factor
+    "float32": 4,
+    "fp32": 4,
+    "float16": 2,
+    "fp16": 2,
+    "bfloat16": 2,
+    "bf16": 2,
+    "fp8": 1,
+    "fp8_e5m2": 1,
+    "fp8_e4m3": 1,
+    "f8_e4m3": 1,  # HuggingFace naming convention
+    "f8_e5m2": 1,
+    "int8": 1,
+}
+# GPU specifications (in GB)
+GPU_SPECS = {
+    "RTX 4090": 24,
+    "RTX 5090": 32,
+}
+# GPU pricing (in EUR per hour)
+GPU_PRICING = {
+    "RTX 4090": 0.2,
+    "RTX 5090": 0.4,
+}
+def normalize_dtype(dtype: str) -> str:
+    """
+    Normalize dtype string to a canonical form for consistent lookup.
+    Args:
+        dtype: Raw dtype string (e.g., "F8_E4M3", "BF16", "float16")
+    Returns:
+        Normalized dtype string in lowercase with underscores
+    """
+    if not dtype:
+        return "auto"
+    # Convert to lowercase and handle common variations
+    normalized = dtype.lower()
+    # Handle HuggingFace safetensors naming conventions
+    # F8_E4M3 -> f8_e4m3, BF16 -> bf16, etc.
+    return normalized
+def get_dtype_factor(dtype: str, default: int = 2) -> float:
+    """
+    Get the bytes-per-parameter factor for a given dtype.
+    Args:
+        dtype: Data type string
+        default: Default factor if dtype not found
+    Returns:
+        Factor (bytes per parameter)
+    """
+    normalized = normalize_dtype(dtype)
+    return DTYPE_FACTOR.get(normalized, default)
+def estimate_vllm_gpu_memory(
+    num_params: int,
+    dtype: str = "auto",
+    num_hidden_layers: int = None,
+    hidden_size: int = None,
+    intermediate_size: int = None,
+    num_key_value_heads: int = None,
+    head_dim: int = None,
+    max_model_len: int = 2048,
+    max_num_seqs: int = 256,
+    max_num_batched_tokens: int = 2048,
+    kv_cache_dtype: str = "auto",
+    gpu_memory_utilization: float = 0.9,
+    cpu_offload_gb: float = 0.0,
+    is_quantized: bool = None  # NEW: indicate if num_params is already quantized
+) -> float:
+    """
+    Estimate GPU memory for a model. Handles:
+    1. Full parameter info -> detailed estimation
+    2. Only num_params and dtype -> rough estimation
+    Returns memory in GB
+    Args:
+        num_params: Number of parameters. For quantized models from HF API,
+                    this is already in the quantized format.
+        is_quantized: If True, num_params represents quantized size.
+                     If None, auto-detect from dtype.
+    """
+    constant_margin = 1.5
+    dtype_factor = get_dtype_factor(dtype, default=2)
+    # Auto-detect if model is quantized
+    if is_quantized is None:
+        quantized_dtypes = ["fp8", "f8_e4m3", "f8_e5m2", "int8", "int4", "fp8_e4m3", "fp8_e5m2"]
+        is_quantized = normalize_dtype(dtype) in quantized_dtypes
+    # Case 1: Only num_params available (simplified)
+    if None in [num_hidden_layers, hidden_size, intermediate_size, num_key_value_heads, head_dim]:
+        if is_quantized:
+            # num_params already represents quantized size
+            # HF API returns parameter count in the quantized dtype
+            # So we DON'T multiply by dtype_factor again
+            model_weight = num_params / 1e9  # Already accounts for quantization
+        else:
+            # For non-quantized models, calculate weight from params
+            model_weight = (num_params * dtype_factor) / 1e9
+        # Rough activation estimate (typically FP16 regardless of weight dtype)
+        # Activation memory is roughly 1-2x model weight for transformer models
+        activation_estimate = model_weight * 1.5
+        estimated_gpu_memory = (model_weight + activation_estimate + constant_margin) / gpu_memory_utilization - cpu_offload_gb
+        return estimated_gpu_memory
+    # Case 2: Full info available -> detailed vLLM formula
+    if is_quantized:
+        model_weight = num_params / 1e9
+    else:
+        model_weight = (num_params * dtype_factor) / 1e9
+    if kv_cache_dtype == "auto":
+        # For quantized models, KV cache often uses FP16/BF16, not FP8
+        kv_cache_dtype_factor = 2 if is_quantized else dtype_factor
+    else:
+        normalized_kv = normalize_dtype(kv_cache_dtype)
+        kv_cache_dtype_factor = KV_CACHE_DTYPE_FACTOR.get(normalized_kv, 2)
+    per_seq_kv_cache_memory = (2 * num_key_value_heads * head_dim * num_hidden_layers *
+                               kv_cache_dtype_factor * max_model_len) / 1e9
+    total_kv_cache_memory = min(
+        per_seq_kv_cache_memory * max_num_seqs,
+        (2 * num_hidden_layers * hidden_size * kv_cache_dtype_factor * max_num_batched_tokens) / 1e9
+    )
+    # Activations are typically FP16/BF16 even for quantized models
+    activation_dtype_factor = 2  # Assume FP16 activations
+    activation_peak_memory = max_model_len * ((18 * hidden_size) + (4 * intermediate_size)) * activation_dtype_factor / 1e9
+    required_gpu_memory = (model_weight + total_kv_cache_memory + activation_peak_memory + constant_margin) / gpu_memory_utilization - cpu_offload_gb
+    return required_gpu_memory
+def calculate_gpu_requirements(estimated_memory_gb: float) -> Dict[str, Any]:
+    """
+    Calculate number of GPUs needed and costs for different GPU types.
+    Args:
+        estimated_memory_gb: Estimated GPU memory requirement in GB
+    Returns:
+        Dictionary containing GPU requirements and cost information
+    """
+    gpu_requirements = {}
+    cost_estimates = {}
+    for gpu_type, gpu_memory in GPU_SPECS.items():
+        # Account for ~10% overhead for communication and fragmentation in multi-GPU setup
+        usable_memory = gpu_memory * 0.9
+        num_gpus = math.ceil(estimated_memory_gb / usable_memory)
+        # Calculate costs
+        hourly_cost = num_gpus * GPU_PRICING[gpu_type]
+        daily_cost = hourly_cost * 24
+        weekly_cost = hourly_cost * 24 * 7
+        gpu_requirements[gpu_type] = num_gpus
+        cost_estimates[gpu_type] = {
+            "hourly": hourly_cost,
+            "daily": daily_cost,
+            "weekly": weekly_cost
+        }
+    return {
+        "gpu_requirements": gpu_requirements,
+        "cost_estimates": cost_estimates
+    }
+async def capacity_estimation_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Estimate GPU memory for a model deployment using vLLM-based computation.
+    Handles both initial estimation and re-estimation with custom inference config.
+    """
+    # Check if this is a re-estimation
+    is_re_estimation = state.get("needs_re_estimation", False)
+    if is_re_estimation:
+        logger.info("🔄 Starting capacity re-estimation with custom inference configuration")
+        # Reset the re-estimation flag
+        state["needs_re_estimation"] = False
+        state["capacity_approved"] = False
+    else:
+        logger.info("⚡ Starting capacity estimation node")
+    try:
+        model_name = state.get("model_name")
+        model_info = state.get("model_info")
+        if not model_name or not model_info:
+            logger.error("❌ Missing model information")
+            state["capacity_estimation_status"] = "error"
+            state["error"] = "Model information required for capacity estimation"
+            return state
+        # Extract safetensors info
+        dtype = model_info.get("dtype", "auto")
+        num_params = model_info.get("num_params", None)
+        # Extract required parameters for GPU memory estimation
+        params = {
+            "num_params": num_params,
+            "dtype": dtype,
+            "num_hidden_layers": model_info.get("num_hidden_layers"),
+            "hidden_size": model_info.get("hidden_size"),
+            "intermediate_size": model_info.get("intermediate_size"),
+            "num_key_value_heads": model_info.get("num_key_value_heads"),
+            "head_dim": model_info.get("head_dim"),
+            "max_model_len": model_info.get("max_model_len", 2048),
+            "max_num_seqs": model_info.get("max_num_seqs", 256),
+            "max_num_batched_tokens": model_info.get("max_num_batched_tokens", 2048),
+            "kv_cache_dtype": model_info.get("kv_cache_dtype", "auto"),
+            "gpu_memory_utilization": model_info.get("gpu_memory_utilization", 0.9),
+            "cpu_offload_gb": model_info.get("cpu_offload_gb", 0.0)
+        }
+        estimated_gpu_memory = estimate_vllm_gpu_memory(**params)
+        # Calculate GPU requirements and costs
+        gpu_data = calculate_gpu_requirements(estimated_gpu_memory)
+        gpu_requirements = gpu_data["gpu_requirements"]
+        cost_estimates = gpu_data["cost_estimates"]
+        # Store in state
+        state["estimated_gpu_memory"] = estimated_gpu_memory
+        state["gpu_requirements"] = gpu_requirements
+        state["cost_estimates"] = cost_estimates
+        state["capacity_estimation_status"] = "success"
+        # Build comprehensive response
+        model_size_b = num_params / 1e9 if num_params else "Unknown"
+        # Model architecture details
+        architecture_info = []
+        if model_info.get("num_hidden_layers"):
+            architecture_info.append(f"**Layers:** {model_info['num_hidden_layers']}")
+        if model_info.get("hidden_size"):
+            architecture_info.append(f"**Hidden Size:** {model_info['hidden_size']}")
+        if model_info.get("num_attention_heads"):
+            architecture_info.append(f"**Attention Heads:** {model_info['num_attention_heads']}")
+        if model_info.get("num_key_value_heads"):
+            architecture_info.append(f"**KV Heads:** {model_info['num_key_value_heads']}")
+        if model_info.get("intermediate_size"):
+            architecture_info.append(f"**Intermediate Size:** {model_info['intermediate_size']}")
+        if model_info.get("max_position_embeddings"):
+            architecture_info.append(f"**Max Position Embeddings:** {model_info['max_position_embeddings']}")
+        architecture_section = "\n            ".join(architecture_info) if architecture_info else "Limited architecture information available"
+        # Inference configuration
+        inference_config = f"""**Max Model Length:** {params['max_model_len']}
+            **Max Sequences:** {params['max_num_seqs']}
+            **Max Batched Tokens:** {params['max_num_batched_tokens']}
+            **KV Cache dtype:** {params['kv_cache_dtype']}
+            **GPU Memory Utilization:** {params['gpu_memory_utilization']*100:.0f}%"""
+        # GPU requirements and cost section
+        gpu_req_lines = []
+        cost_lines = []
+        # Highlight RTX 4090 and 5090
+        for gpu_type in ["RTX 4090", "RTX 5090"]:
+            if gpu_type in gpu_requirements:
+                num_gpus = gpu_requirements[gpu_type]
+                gpu_memory = GPU_SPECS[gpu_type]
+                costs = cost_estimates[gpu_type]
+                gpu_req_lines.append(f"**{gpu_type}** ({gpu_memory}GB): **{num_gpus} GPU{'s' if num_gpus > 1 else ''}**")
+                cost_lines.append(f"**{gpu_type}:** €{costs['hourly']:.2f}/hour | €{costs['daily']:.2f}/day | €{costs['weekly']:.2f}/week")
+        gpu_requirements_section = "\n            ".join(gpu_req_lines)
+        cost_section = "\n            ".join(cost_lines)
+        # Build final response
+        estimation_title = "**Capacity Re-Estimation Complete**" if is_re_estimation else "**Capacity Estimation Complete**"
+        custom_note = "*Note: Re-estimated with custom inference configuration. " if is_re_estimation else "*Note: "
+        GPU_type = state['custom_inference_config']['GPU_type'] if is_re_estimation else model_info.get('GPU_type', 'RTX 4090')
+        location = state['custom_inference_config']['location'] if is_re_estimation else model_info.get('location', 'UAE-1')
+        state["response"] = f"""
+        {estimation_title}
+        **Model Information:**
+            **Name:** {model_name}
+            **Parameters:** {model_size_b:.2f}B
+            **Data Type:** {dtype}
+        **Architecture Details:**
+            {architecture_section}
+        **Inference Configuration:**
+            {inference_config}
+        **Estimated GPU Memory Required:** {estimated_gpu_memory:.2f} GB
+        **GPU Requirements:**
+            {gpu_requirements_section}
+        **Cost Estimates:**
+            {cost_section}
+        **Selected GPU Type:** {GPU_type}
+        **Deployment Location:** {location}
+        {custom_note}This estimation includes model weights, KV cache, activation peak, and a safety margin. Multi-GPU setups account for ~10% overhead for communication.*"""
+        logger.info(f"✅ Estimated GPU memory: {estimated_gpu_memory:.2f} GB")
+        logger.info(f"📊 GPU Requirements: RTX 4090: {gpu_requirements.get('RTX 4090', 'N/A')}, RTX 5090: {gpu_requirements.get('RTX 5090', 'N/A')}")
+        # Prepare state for human approval - set pending capacity approval
+        state["pending_capacity_approval"] = True
+        state["needs_re_estimation"] = False # Reset flag after processing
+        state["current_step"] = "capacity_estimation_complete"
+    except Exception as e:
+        logger.error(f"❌ Error in capacity estimation: {str(e)}")
+        state["capacity_estimation_status"] = "error"
+        state["error"] = str(e)
+        state["response"] = f"""❌ **Capacity Estimation Failed**
+        **Model:** {state.get('model_name', 'Unknown')}
+        **Error:** {str(e)}
+        Please check if:
+        1. The model exists on HuggingFace
+        2. You have access to the model (if it's gated)
+        3. Your HuggingFace token is valid"""
+    return state

ComputeAgent/nodes/ReAct_DeployModel/extract_model_info.py ADDED Viewed

	@@ -0,0 +1,291 @@

+"""
+Extract Model Info Node
+This node handles the extraction of model information from user queries.
+It uses LLM to extract HuggingFace model names and fetches model metadata from the API.
+Key Features:
+    - LLM-based model name extraction
+    - HuggingFace API integration
+    - Error handling for invalid models
+    - State management for workflow
+Author: ComputeAgent Team
+License: Private
+"""
+import logging
+from typing import Dict, Any, Optional
+import json
+import aiohttp
+from constant import Constants
+from models.model_manager import ModelManager
+from langchain_core.messages import HumanMessage, SystemMessage
+from transformers import AutoConfig
+# Initialize model manager for dynamic LLM loading and management
+model_manager = ModelManager()
+logger = logging.getLogger("ExtractModelInfo")
+async def extract_model_info_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Extract model information from user query and fetch model details.
+    This node:
+    1. Extracts model name from query using LLM
+    2. Fetches model info from HuggingFace API
+    3. Updates state with model information or error status
+    Args:
+        state: Current workflow state containing query
+    Returns:
+        Updated state with model information or extraction status
+    """
+    logger.info("🔍 Starting model information extraction")
+    try:
+        # Initialize LLM
+        llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_NAME)
+        # Extract model name from query using LLM
+        query = state.get("query", "")
+        logger.info(f"📝 Processing query: {query}")
+        model_name = await extract_model_name_with_llm(query, llm)
+        if model_name == "UNKNOWN" or not model_name:
+            logger.info("❓ Model name not found, will need generation")
+            state["model_extraction_status"] = "unknown"
+            state["needs_generation"] = True
+            return state
+        logger.info(f"📋 Extracted model name: {model_name}")
+        # Fetch model information
+        model_info = await fetch_huggingface_model_info_for_memory(model_name, llm)
+        if "error" in model_info:
+            logger.error(f"❌ Error fetching model info: {model_info['error']}")
+            state["model_extraction_status"] = "error"
+            state["error"] = model_info["error"]
+            return state
+        # Success - update state with model information
+        state["model_name"] = model_name
+        state["model_info"] = model_info
+        state["model_extraction_status"] = "success"
+        state["needs_generation"] = False
+        logger.info(f"✅ Successfully extracted model info for {model_name}")
+        return state
+    except Exception as e:
+        logger.error(f"❌ Error during model info extraction: {str(e)}")
+        state["model_extraction_status"] = "error"
+        state["error"] = f"Model info extraction failed: {str(e)}"
+        return state
+async def extract_model_name_with_llm(query: str, llm) -> str:
+    """
+    Use LLM to extract HuggingFace model name from user query.
+    Args:
+        query: User's natural language query
+        llm: LangChain LLM instance
+    Returns:
+        Extracted model name in format 'owner/model-name' or None
+    """
+    system_prompt = """
+        You are an expert at extracting HuggingFace model names from user queries.
+        Extract the exact HuggingFace model identifier in the format 'owner/model-name'.
+        NEVER fabricate or guess model names. Only extract what is explicitly mentioned in the query.
+        Rule for the UNKNOWN response:
+            - If the model name is written but not the owner, respond with 'UNKNOWN'.
+            - If the owner is written but not the model name, respond with 'UNKNOWN'.
+        Only respond with the model identifier, nothing else.
+        """
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=f"Extract the HuggingFace model name from: {query}")
+    ]
+    response = await llm.ainvoke(messages)
+    model_name = response.content.strip()
+    if model_name == "UNKNOWN":
+        return None
+    return model_name
+async def extract_model_dtype_with_llm(model_name: str, parameters_dict: dict, llm) -> Optional[str]:
+    """
+    Use LLM to extract the correct dtype from model name and available parameters.
+    Args:
+        model_name: HuggingFace model name in format 'owner/model-name'
+        parameters_dict: Available dtypes and their parameter counts from HF API
+        llm: LangChain LLM instance
+    Returns:
+        Matching dtype key from parameters_dict or None if cannot be determined
+    """
+    system_prompt = f"""
+You are an expert at identifying data types from HuggingFace model names.
+Given a model name and available dtype options, determine which dtype the model uses.
+Available dtypes: {json.dumps(list(parameters_dict.keys()))}
+Rules:
+- Analyze the model name for dtype indicators (FP8, BF16, INT4, INT8, FP16, etc.)
+- If no dtype indicator is found in the model name by default is BF16 on the model name side.
+- Return ONLY the dtype key that exists in the available options, nothing else
+Only respond with the dtype key or 'UNKNOWN', nothing else.
+"""
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=f"Extract the dtype from model name: {model_name}")
+    ]
+    response = await llm.ainvoke(messages)
+    dtype = response.content.strip()
+    # Validate that the returned dtype exists in parameters_dict
+    if dtype not in parameters_dict:
+        logger.warning(f"LLM returned dtype '{dtype}' not in available options: {list(parameters_dict.keys())}")
+        return None
+    return dtype
+async def fetch_huggingface_model_info(model_name: str) -> Dict[str, Any]:
+    """
+    Fetch model information from HuggingFace API.
+    Args:
+        model_name: HuggingFace model identifier (e.g., 'meta-llama/Meta-Llama-3-70B')
+    Returns:
+        Dictionary containing model information
+    """
+    api_url = f"https://huggingface.co/api/models/{model_name}"
+    async with aiohttp.ClientSession() as session:
+        try:
+            async with session.get(api_url) as response:
+                if response.status == 200:
+                    model_info = await response.json()
+                    logger.info(f"✅ Successfully fetched model info for {model_name}")
+                    return model_info
+                elif response.status == 404:
+                    logger.error(f"❌ Model not found: {model_name}")
+                    return {"error": "Model not found", "status": 404}
+                else:
+                    logger.error(f"❌ API error: {response.status}")
+                    return {"error": f"API error: {response.status}", "status": response.status}
+        except Exception as e:
+            logger.error(f"❌ Exception while fetching model info: {str(e)}")
+            return {"error": str(e)}
+async def fetch_huggingface_model_info_for_memory(model_name: str, llm) -> Dict[str, Any]:
+    """
+    Fetch only the information needed for GPU memory estimation from HuggingFace.
+    Returns a dictionary containing:
+    - num_params
+    - dtype
+    - num_hidden_layers
+    - hidden_size
+    - intermediate_size
+    - num_attention_heads
+    - head_dim
+    - max_position_embeddings
+    """
+    result: Dict[str, Any] = {}
+    # Step 1: Fetch metadata from HuggingFace API
+    api_url = f"https://huggingface.co/api/models/{model_name}"
+    async with aiohttp.ClientSession() as session:
+        try:
+            async with session.get(api_url) as response:
+                if response.status == 200:
+                    metadata = await response.json()
+                else:
+                    logger.error(f"❌ API error {response.status} for {model_name}")
+                    return {}
+        except Exception as e:
+            logger.error(f"❌ Exception fetching metadata for {model_name}: {str(e)}")
+            return {}
+    # Extract num_params and dtype
+    safetensors = metadata.get("safetensors", {})
+    parameters_dict = safetensors.get("parameters", {})
+    result["location"] = "UAE-1" # Default location
+    result["GPU_type"] = "RTX4090" # Default GPU type
+    # Usage in your main code:
+    if parameters_dict:
+        result["dtype"] = await extract_model_dtype_with_llm(model_name, parameters_dict, llm)
+        if result["dtype"]:
+            result["num_params"] = parameters_dict[result["dtype"]]
+            logger.info(f"✓ LLM selected dtype: {result['dtype']}")
+        else:
+            # Fallback to first available if LLM couldn't determine
+            result["dtype"] = next(iter(parameters_dict.keys()))
+            result["num_params"] = parameters_dict[result["dtype"]]
+            logger.warning(f"⚠ Using fallback dtype: {result['dtype']}")
+    else:
+        result["dtype"] = "auto"
+        result["num_params"] = metadata.get("num_params") or safetensors.get("total")
+    # Step 2: Fetch model config via transformers
+     # Step 2: Fetch model config via transformers
+    try:
+        # Check if token is available
+        token = Constants.HF_TOKEN if hasattr(Constants, 'HF_TOKEN') and Constants.HF_TOKEN else None
+        if not token:
+            logger.warning(f"⚠️ No HF_TOKEN provided for {model_name}")
+        config = AutoConfig.from_pretrained(
+            model_name,
+            token=token,
+            trust_remote_code=True  # Add this if model uses custom code
+        )
+        result.update({
+            "num_hidden_layers": getattr(config, "num_hidden_layers", None),
+            "hidden_size": getattr(config, "hidden_size", None),
+            "intermediate_size": getattr(config, "intermediate_size", None),
+            "num_attention_heads": getattr(config, "num_attention_heads", None),
+            "num_key_value_heads": getattr(config, "num_key_value_heads", None),  # Added
+            "max_position_embeddings": getattr(config, "max_position_embeddings", None),
+        })
+        # Fallback: if num_key_value_heads is not available, use num_attention_heads
+        if result["num_key_value_heads"] is None and result["num_attention_heads"] is not None:
+            result["num_key_value_heads"] = result["num_attention_heads"]
+            logger.info(f"ℹ️ Using num_attention_heads as num_key_value_heads for {model_name}")
+        # Optional: compute head_dim
+        if result["hidden_size"] and result["num_attention_heads"]:
+            result["head_dim"] = result["hidden_size"] // result["num_attention_heads"]
+    except Exception as e:
+        logger.warning(f"⚠️ Could not fetch model config for {model_name}: {str(e)}")
+    return result

ComputeAgent/nodes/ReAct_DeployModel/generate_additional_info.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+Generate Model Name Node
+This node handles the generation of model names when the initial extraction fails.
+It uses LLM to suggest appropriate model names based on user requirements.
+Key Features:
+    - LLM-based model name generation
+    - Context-aware suggestions
+    - Fallback mechanisms
+    - State management for workflow
+Author: ComputeAgent Team
+License: Private
+"""
+import logging
+from typing import Dict, Any
+from constant import Constants
+from models.model_manager import ModelManager
+from langchain_core.messages import HumanMessage, SystemMessage
+# Initialize model manager for dynamic LLM loading and management
+model_manager = ModelManager()
+logger = logging.getLogger("GenerateModelName")
+async def generate_additional_info_node(state: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Generate helpful response with model suggestions when model info is not available.
+    Path 1A: No Info Case
+    This node:
+    1. Analyzes the user query for requirements
+    2. Generates appropriate model name suggestions
+    3. Creates a helpful response asking for more details
+    Args:
+        state: Current workflow state containing query and extraction status
+    Returns:
+        Updated state with helpful response and suggestions
+    """
+    logger.info("🎯 Path 1A: Generating helpful response with model suggestions")
+    try:
+        # Initialize LLM
+        llm = await model_manager.load_llm_model(Constants.DEFAULT_LLM_NAME)
+        # Generate model name suggestions based on query
+        query = state.get("query", "")
+        logger.info(f"📝 Generating helpful response for query: {query}")
+        system_prompt = """The user provide a model that cannot be found.
+            Your task is to generate a helpful response asking the user for more detailsabut the HuggingFace models.
+            Instruct the user to provide the exact model name in the format:
+                owner/model-name
+            Do NOT suggest, guess, or provide any instructions or steps for any model.
+            I will respond ONLY after receiving the exact model name.
+            Be polite and concise in your response."""
+        messages = [
+            SystemMessage(content=system_prompt),
+            HumanMessage(content=f"{query}")
+        ]
+        response = await llm.ainvoke(messages)
+        state["generation_status"] = "completed_with_suggestions"
+        state["response"] = response.content
+        logger.info(f"✅ Generated helpful response")
+        return state
+    except Exception as e:
+        logger.error(f"❌ Error during response generation: {str(e)}")
+        state["generation_status"] = "error"
+        state["error"] = f"Response generation failed: {str(e)}"
+        state["response"] = "I encountered an error while trying to help with model suggestions. Please provide the exact HuggingFace model name in the format 'owner/model-name'."
+        return state

ComputeAgent/nodes/__init__.py ADDED Viewed

File without changes

ComputeAgent/routers/compute_agent_HITL.py ADDED Viewed

	@@ -0,0 +1,590 @@

+"""
+Complete ComputeAgent API Router with Full Approval Support
+This router handles BOTH capacity approval and tool approval using LangGraph interrupts.
+Features:
+- Capacity approval (GPU estimation)
+- Tool approval (tool execution control)
+- Tool argument modification
+- Agent re-reasoning
+- Re-estimation support
+Endpoints:
+- POST /api/compute/query - Start a query (may pause at interrupt)
+- POST /api/compute/continue/{thread_id} - Resume after capacity approval
+- POST /api/compute/approve-tools - Handle tool approval decisions
+- GET /api/compute/state/{thread_id} - Get current state
+- GET /api/compute/health - Health check
+Author: ComputeAgent Team
+"""
+import json
+import logging
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+from langgraph.types import Command
+# Import ComputeAgent
+from graph.graph import ComputeAgent
+from constant import Constants
+# Initialize logger
+logger = logging.getLogger("ComputeAgent Router")
+# Create the API router
+compute_agent_router = APIRouter(prefix="/api/compute", tags=["compute_agent"])
+# Initialize the Agent
+agent: Optional[ComputeAgent] = None
+async def initialize_agent():
+    """Initialize the ComputeAgent instance asynchronously."""
+    global agent
+    try:
+        logger.info("🚀 Initializing ComputeAgent...")
+        agent = await ComputeAgent.create()
+        logger.info("✅ ComputeAgent initialized successfully")
+        # Disabled to prevent HF Spaces file watcher from triggering restarts
+        # try:
+        #     agent.draw_graph("compute_agent_graph.png")
+        #     logger.info("📊 Graph visualization generated")
+        # except Exception as e:
+        #     logger.warning(f"Could not generate graph visualization: {e}")
+        HUMAN_APPROVAL_CAPACITY = True if Constants.HUMAN_APPROVAL_CAPACITY == "true" else False
+        HUMAN_APPROVAL = True if Constants.HUMAN_APPROVAL == "true" else False
+        logger.info(f"👤 Human Approval: {'ENABLED' if HUMAN_APPROVAL else 'DISABLED'}")
+        logger.info(f"⚙️  Capacity Approval: {'ENABLED' if HUMAN_APPROVAL_CAPACITY else 'DISABLED'}")
+        return agent
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize ComputeAgent: {e}")
+        raise
+def get_agent() -> ComputeAgent:
+    """Get the initialized agent instance."""
+    if agent is None:
+        raise HTTPException(
+            status_code=503,
+            detail="ComputeAgent not initialized. Please restart the application."
+        )
+    return agent
+# ============================================================================
+# REQUEST/RESPONSE MODELS
+# ============================================================================
+class QueryRequest(BaseModel):
+    """Request model for agent queries"""
+    query: str = Field(..., description="User query to process")
+    user_id: str = Field(default="default_user", description="User identifier")
+    session_id: str = Field(default="default_session", description="Session identifier")
+class QueryResponse(BaseModel):
+    """Response model for agent queries"""
+    success: bool
+    thread_id: str
+    response: str
+    agent_decision: str = ""
+    current_step: str = ""
+    state: str  # "waiting_for_input" or "completed"
+    approval_type: Optional[str] = None  # "capacity" or "tool"
+    interrupt_data: Optional[Dict[str, Any]] = None
+    deployment_result: Optional[Dict[str, Any]] = None
+    react_results: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+class CapacityApprovalRequest(BaseModel):
+    """Request model for capacity approval"""
+    capacity_approved: Optional[bool] = Field(None, description="Whether to approve capacity")
+    needs_re_estimation: Optional[bool] = Field(False, description="Whether to re-estimate")
+    custom_config: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Custom config")
+    class Config:
+        json_schema_extra = {
+            "examples": [
+                {
+                    "capacity_approved": True,
+                    "needs_re_estimation": False,
+                    "custom_config": {}
+                },
+                {
+                    "capacity_approved": None,
+                    "needs_re_estimation": True,
+                    "custom_config": {
+                        "max_model_len": 4096,
+                        "GPU_type": "RTX 5090"
+                    }
+                }
+            ]
+        }
+class ToolApprovalRequest(BaseModel):
+    """Request model for tool approval"""
+    thread_id: str = Field(..., description="Thread ID")
+    action: str = Field(..., description="Action: approve_all, reject_all, approve_selected, modify_and_approve, request_re_reasoning")
+    tool_indices: Optional[List[int]] = Field(None, description="Tool indices for selective approval")
+    modifications: Optional[List[Dict[str, Any]]] = Field(None, description="Tool modifications")
+    feedback: Optional[str] = Field(None, description="Feedback for re-reasoning")
+    class Config:
+        json_schema_extra = {
+            "examples": [
+                {
+                    "thread_id": "user_session_123",
+                    "action": "approve_all"
+                },
+                {
+                    "thread_id": "user_session_123",
+                    "action": "approve_selected",
+                    "tool_indices": [0, 2]
+                },
+                {
+                    "thread_id": "user_session_123",
+                    "action": "modify_and_approve",
+                    "modifications": [
+                        {
+                            "tool_index": 0,
+                            "new_args": {"query": "modified search"},
+                            "approve": True
+                        }
+                    ]
+                },
+                {
+                    "thread_id": "user_session_123",
+                    "action": "request_re_reasoning",
+                    "feedback": "Please search academic papers instead"
+                }
+            ]
+        }
+class HealthResponse(BaseModel):
+    """Response model for health check"""
+    status: str
+    agent_initialized: bool
+    human_approval_enabled: bool
+    capacity_approval_enabled: bool
+    timestamp: str
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def _determine_approval_type(interrupt_data: Dict[str, Any]) -> str:
+    """Determine if this is capacity or tool approval."""
+    if "tool_calls" in interrupt_data:
+        return "tool"
+    elif "model_name" in interrupt_data and "estimated_gpu_memory" in interrupt_data:
+        return "capacity"
+    return "unknown"
+def _create_success_response(
+    thread_id: str,
+    state: Dict[str, Any],
+    completed: bool = True
+) -> QueryResponse:
+    """Create a success response from state."""
+    return QueryResponse(
+        success=not bool(state.get("error")),
+        thread_id=thread_id,
+        response=state.get("response", "Request completed" if completed else "Waiting for approval"),
+        agent_decision=state.get("agent_decision", ""),
+        current_step=state.get("current_step", ""),
+        state="completed" if completed else "waiting_for_input",
+        approval_type=None,
+        interrupt_data=None,
+        deployment_result=state.get("deployment_result"),
+        react_results=state.get("react_results"),
+        error=state.get("error")
+    )
+def _create_interrupt_response(
+    thread_id: str,
+    state: Dict[str, Any],
+    interrupt_data: Dict[str, Any],
+    approval_type: str
+) -> QueryResponse:
+    """Create an interrupt response."""
+    return QueryResponse(
+        success=True,
+        thread_id=thread_id,
+        response="Waiting for approval",
+        agent_decision=state.get("agent_decision", ""),
+        current_step=state.get("current_step", ""),
+        state="waiting_for_input",
+        approval_type=approval_type,
+        interrupt_data=interrupt_data,
+        deployment_result=None,
+        react_results=None,
+        error=None
+    )
+# ============================================================================
+# MAIN ENDPOINTS
+# ============================================================================
+@compute_agent_router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint."""
+    return HealthResponse(
+        status="healthy" if agent is not None else "unhealthy",
+        agent_initialized=agent is not None,
+        human_approval_enabled=Constants.HUMAN_APPROVAL,
+        capacity_approval_enabled=Constants.HUMAN_APPROVAL_CAPACITY,
+        timestamp=datetime.now().isoformat()
+    )
+@compute_agent_router.post("/query", response_model=QueryResponse)
+async def process_query(request: QueryRequest):
+    """
+    Process a user query through the ComputeAgent.
+    Returns:
+        - If interrupted: state="waiting_for_input" with approval_type and interrupt_data
+        - If completed: state="completed" with final response
+    """
+    current_agent = get_agent()
+    try:
+        logger.info(f"📥 Processing query for {request.user_id}:{request.session_id}")
+        logger.info(f"🔍 Query: {request.query}")
+        thread_id = f"{request.user_id}_{request.session_id}"
+        thread_config = {"configurable": {"thread_id": thread_id}}
+        # Check if there's existing conversation state for this thread
+        try:
+            existing_state = current_agent.graph.get_state(thread_config)
+            if existing_state and existing_state.values:
+                logger.info(f"📚 Found existing conversation state for {thread_id}")
+                # Update existing state with new query, preserve conversation history
+                initial_state = existing_state.values.copy()
+                initial_state["query"] = request.query
+                initial_state["current_step"] = "start"
+                logger.info(f"📝 Preserving {len(initial_state.get('messages', []))} previous messages")
+            else:
+                logger.info(f"🆕 Starting new conversation for {thread_id}")
+                # Create fresh state for new conversation
+                initial_state = {
+                    "query": request.query,
+                    "user_id": request.user_id,
+                    "session_id": request.session_id,
+                    "response": "",
+                    "current_step": "start",
+                    "agent_decision": "",
+                    "deployment_approved": False,
+                    "model_name": "",
+                    "model_card": {},
+                    "model_info": {},
+                    "capacity_estimate": {},
+                    "deployment_result": {},
+                    "react_results": {},
+                    "tool_calls": [],
+                    "tool_results": [],
+                    "messages": [],
+                    # Add approval state fields
+                    "pending_tool_calls": [],
+                    "approved_tool_calls": [],
+                    "rejected_tool_calls": [],
+                    "modified_tool_calls": [],
+                    "needs_re_reasoning": False,
+                    "re_reasoning_feedback": ""
+                }
+        except Exception as e:
+            logger.warning(f"⚠️ Could not load existing state: {e}, starting fresh")
+            initial_state = {
+                "query": request.query,
+                "user_id": request.user_id,
+                "session_id": request.session_id,
+                "response": "",
+                "current_step": "start",
+                "agent_decision": "",
+                "deployment_approved": False,
+                "model_name": "",
+                "model_card": {},
+                "model_info": {},
+                "capacity_estimate": {},
+                "deployment_result": {},
+                "react_results": {},
+                "tool_calls": [],
+                "tool_results": [],
+                "messages": [],
+                "pending_tool_calls": [],
+                "approved_tool_calls": [],
+                "rejected_tool_calls": [],
+                "modified_tool_calls": [],
+                "needs_re_reasoning": False,
+                "re_reasoning_feedback": ""
+            }
+        # Invoke the graph
+        await current_agent.graph.ainvoke(initial_state, config=thread_config)
+        # Get the updated state
+        graph_state = current_agent.graph.get_state(thread_config)
+        current_state = graph_state.values if graph_state.values else {}
+        # Check if interrupted
+        if graph_state.tasks and graph_state.tasks[0].interrupts:
+            interrupt_value = graph_state.tasks[0].interrupts[0].value
+            approval_type = _determine_approval_type(interrupt_value)
+            logger.info(f"⏸️  Graph paused for {approval_type} approval")
+            return _create_interrupt_response(
+                thread_id,
+                current_state,
+                interrupt_value,
+                approval_type
+            )
+        # Normal completion
+        logger.info(f"✅ Query completed")
+        return _create_success_response(thread_id, current_state, completed=True)
+    except Exception as e:
+        logger.error(f"❌ Error processing query: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
+@compute_agent_router.post("/continue/{thread_id}")
+async def continue_after_capacity_approval(
+    thread_id: str,
+    approval: CapacityApprovalRequest
+):
+    """
+    Continue execution after capacity approval.
+    Handles:
+    - Approve capacity (capacity_approved=True)
+    - Reject capacity (capacity_approved=False)
+    - Re-estimate (needs_re_estimation=True with custom_config)
+    """
+    current_agent = get_agent()
+    try:
+        logger.info(f"▶️  Continuing after capacity approval for thread {thread_id}")
+        logger.info(f"   Approved: {approval.capacity_approved}")
+        logger.info(f"   Re-estimate: {approval.needs_re_estimation}")
+        thread_config = {"configurable": {"thread_id": thread_id}}
+        # Prepare user input
+        user_input = {
+            "capacity_approved": approval.capacity_approved,
+            "needs_re_estimation": approval.needs_re_estimation,
+            "custom_config": approval.custom_config or {}
+        }
+        # Resume execution
+        await current_agent.graph.ainvoke(
+            Command(resume=user_input),
+            config=thread_config
+        )
+        # Get updated state
+        graph_state = current_agent.graph.get_state(thread_config)
+        current_state = graph_state.values if graph_state.values else {}
+        # Check if still interrupted (e.g., after re-estimation)
+        if graph_state.tasks and graph_state.tasks[0].interrupts:
+            interrupt_value = graph_state.tasks[0].interrupts[0].value
+            approval_type = _determine_approval_type(interrupt_value)
+            logger.info(f"⏸️  Still interrupted for {approval_type} approval")
+            return _create_interrupt_response(
+                thread_id,
+                current_state,
+                interrupt_value,
+                approval_type
+            )
+        # Execution completed
+        logger.info(f"✅ Execution completed")
+        return _create_success_response(thread_id, current_state, completed=True)
+    except Exception as e:
+        logger.error(f"❌ Error continuing: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error continuing: {str(e)}")
+@compute_agent_router.post("/approve-tools")
+async def approve_tools(request: ToolApprovalRequest):
+    """
+    Handle tool approval decisions.
+    Supports:
+    - approve_all: Approve all tools
+    - reject_all: Reject all tools
+    - approve_selected: Approve specific tools by index
+    - modify_and_approve: Modify tool arguments and approve
+    - request_re_reasoning: Request agent to reconsider
+    """
+    current_agent = get_agent()
+    try:
+        logger.info(f"▶️  Processing tool approval for thread {request.thread_id}")
+        logger.info(f"   Action: {request.action}")
+        thread_config = {"configurable": {"thread_id": request.thread_id}}
+        # Prepare user input based on action
+        user_input = {
+            "action": request.action
+        }
+        if request.action in ["approve_selected", "reject_selected"]:
+            user_input["tool_indices"] = request.tool_indices or []
+            logger.info(f"   Indices: {user_input['tool_indices']}")
+        elif request.action == "modify_and_approve":
+            user_input["modifications"] = request.modifications or []
+            logger.info(f"   Modifications: {len(user_input['modifications'])}")
+        elif request.action == "request_re_reasoning":
+            user_input["feedback"] = request.feedback or ""
+            logger.info(f"   Feedback: {user_input['feedback'][:100]}...")
+        # Resume execution with tool decision
+        await current_agent.graph.ainvoke(
+            Command(resume=user_input),
+            config=thread_config
+        )
+        # Get updated state
+        graph_state = current_agent.graph.get_state(thread_config)
+        current_state = graph_state.values if graph_state.values else {}
+        # Check if still interrupted (e.g., after re-reasoning -> new tools proposed)
+        if graph_state.tasks and graph_state.tasks[0].interrupts:
+            interrupt_value = graph_state.tasks[0].interrupts[0].value
+            approval_type = _determine_approval_type(interrupt_value)
+            logger.info(f"⏸️  New approval needed: {approval_type}")
+            return _create_interrupt_response(
+                request.thread_id,
+                current_state,
+                interrupt_value,
+                approval_type
+            )
+        # Execution completed
+        logger.info(f"✅ Tool approval processed, execution completed")
+        return _create_success_response(request.thread_id, current_state, completed=True)
+    except Exception as e:
+        logger.error(f"❌ Error processing tool approval: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing tool approval: {str(e)}")
+@compute_agent_router.get("/state/{thread_id}")
+async def get_state(thread_id: str):
+    """
+    Get the current state of a conversation.
+    Useful for:
+    - Checking if waiting for approval
+    - Getting current step
+    - Debugging
+    """
+    current_agent = get_agent()
+    try:
+        thread_config = {"configurable": {"thread_id": thread_id}}
+        graph_state = current_agent.graph.get_state(thread_config)
+        if not graph_state.values:
+            raise HTTPException(status_code=404, detail=f"No state found for thread {thread_id}")
+        # Check for interrupts
+        waiting_for_input = False
+        interrupt_data = None
+        approval_type = None
+        if graph_state.tasks and graph_state.tasks[0].interrupts:
+            waiting_for_input = True
+            interrupt_data = graph_state.tasks[0].interrupts[0].value
+            approval_type = _determine_approval_type(interrupt_data)
+        return {
+            "thread_id": thread_id,
+            "values": graph_state.values,
+            "next": graph_state.next,
+            "waiting_for_input": waiting_for_input,
+            "approval_type": approval_type,
+            "interrupt_data": interrupt_data,
+            "current_step": graph_state.values.get("current_step", ""),
+            "agent_decision": graph_state.values.get("agent_decision", "")
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"❌ Error getting state: {e}")
+        raise HTTPException(status_code=500, detail=f"Error getting state: {str(e)}")
+@compute_agent_router.get("/examples")
+async def get_examples():
+    """Get example queries for testing."""
+    return {
+        "deployment_queries": [
+            "Deploy meta-llama/Llama-3.1-70B",
+            "Deploy mistralai/Mistral-7B-v0.1"
+        ],
+        "tool_queries": [
+            "Search for latest AI developments",
+            "Calculate the fibonacci sequence up to n=10",
+            "What's the weather in Paris?"
+        ],
+        "combined": [
+            "Deploy Llama-3.1-70B and search for its benchmarks"
+        ]
+    }
+@compute_agent_router.get("/info")
+async def get_info():
+    """Get router information."""
+    return {
+        "name": "ComputeAgent API Router",
+        "version": "3.0.0",
+        "description": "AI-powered agent with dual approval support",
+        "features": [
+            "Capacity approval",
+            "Tool approval",
+            "Tool modification",
+            "Re-reasoning",
+            "Re-estimation"
+        ],
+        "endpoints": {
+            "query": "POST /api/compute/query - Start a query",
+            "continue": "POST /api/compute/continue/{thread_id} - Resume after capacity approval",
+            "approve_tools": "POST /api/compute/approve-tools - Handle tool approval",
+            "state": "GET /api/compute/state/{thread_id} - Get current state",
+            "health": "GET /api/compute/health - Health check",
+            "examples": "GET /api/compute/examples - Example queries"
+        }
+    }

ComputeAgent/vllm_engine_args.py ADDED Viewed

	@@ -0,0 +1,325 @@

+"""
+vLLM Engine Arguments Documentation
+This module contains the complete documentation for vLLM engine arguments
+from https://docs.vllm.ai/en/v0.11.0/serving/engine_args.html
+This is used by the deployment system to generate optimal vLLM commands
+without requiring online access.
+Author: ComputeAgent Team
+"""
+VLLM_ENGINE_ARGS_DOC = """
+# vLLM Engine Arguments (v0.11.0)
+## Model Configuration
+--model
+Name or path of the huggingface model to use.
+Default: "facebook/opt-125m"
+--task
+Possible choices: auto, generate, embedding, embed, classify, score, reward
+The task to use the model for. Each vLLM instance only supports one task.
+Default: "auto"
+--tokenizer
+Name or path of the huggingface tokenizer to use. If unspecified, model name or path will be used.
+--skip-tokenizer-init
+Skip initialization of tokenizer and detokenizer.
+--revision
+The specific model version to use. It can be a branch name, a tag name, or a commit id.
+--code-revision
+The specific revision to use for the model code on Hugging Face Hub.
+--tokenizer-revision
+Revision of the huggingface tokenizer to use.
+--tokenizer-mode
+Possible choices: auto, slow, mistral
+The tokenizer mode. "auto" will use the fast tokenizer if available.
+Default: "auto"
+--trust-remote-code
+Trust remote code from huggingface.
+--download-dir
+Directory to download and load the weights, default to the default cache dir of huggingface.
+--load-format
+Possible choices: auto, pt, safetensors, npcache, dummy, tensorizer, sharded_state, gguf, bitsandbytes, mistral, runai_streamer
+The format of the model weights to load.
+Default: "auto"
+--config-format
+Possible choices: auto, hf, mistral
+The format of the model config to load.
+Default: "ConfigFormat.AUTO"
+--dtype
+Possible choices: auto, half, float16, bfloat16, float, float32
+Data type for model weights and activations.
+- "auto" will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models.
+- "half" for FP16. Recommended for AWQ quantization.
+- "bfloat16" for a balance between precision and range.
+Default: "auto"
+--kv-cache-dtype
+Possible choices: auto, fp8, fp8_e5m2, fp8_e4m3
+Data type for kv cache storage. If "auto", will use model data type.
+Default: "auto"
+--max-model-len
+Model context length. If unspecified, will be automatically derived from the model config.
+## Performance & Memory
+--gpu-memory-utilization
+The fraction of GPU memory to be used for the model executor (0.0-1.0).
+This is a per-instance limit. For example, 0.5 would use 50% GPU memory.
+Default: 0.9
+--max-num-batched-tokens
+Maximum number of batched tokens per iteration.
+--max-num-seqs
+Maximum number of sequences per iteration.
+--swap-space
+CPU swap space size (GiB) per GPU.
+Default: 4
+--cpu-offload-gb
+The space in GiB to offload to CPU, per GPU. Default is 0 (no offloading).
+This can virtually increase GPU memory. For example, if you have 24GB GPU and set this to 10,
+it's like having a 34GB GPU.
+Default: 0
+--num-gpu-blocks-override
+If specified, ignore GPU profiling result and use this number of GPU blocks.
+## Distributed Execution
+--tensor-parallel-size, -tp
+Number of tensor parallel replicas. Use for multi-GPU inference.
+Default: 1
+--pipeline-parallel-size, -pp
+Number of pipeline stages.
+Default: 1
+--distributed-executor-backend
+Possible choices: ray, mp, uni, external_launcher
+Backend to use for distributed model workers. "mp" for single host, "ray" for multi-host.
+--max-parallel-loading-workers
+Load model sequentially in multiple batches, to avoid RAM OOM when using tensor parallel.
+## Caching & Optimization
+--enable-prefix-caching, --no-enable-prefix-caching
+Enables automatic prefix caching. Highly recommended for better performance.
+--disable-sliding-window
+Disables sliding window, capping to sliding window size.
+--block-size
+Possible choices: 8, 16, 32, 64, 128
+Token block size for contiguous chunks of tokens.
+Default depends on device (CUDA: up to 32, HPU: 128).
+--enable-chunked-prefill
+Enable chunked prefill for long context processing. Recommended for max-model-len > 8192.
+--max-seq-len-to-capture
+Maximum sequence length covered by CUDA graphs. Falls back to eager mode for longer sequences.
+Default: 8192
+## Quantization
+--quantization, -q
+Possible choices: aqlm, awq, deepspeedfp, tpu_int8, fp8, fbgemm_fp8, modelopt, marlin, gguf,
+gptq_marlin_24, gptq_marlin, awq_marlin, gptq, compressed-tensors, bitsandbytes, qqq, hqq,
+experts_int8, neuron_quant, ipex, quark, moe_wna16, None
+Method used to quantize the weights.
+## Speculative Decoding
+--speculative-model
+The name of the draft model to be used in speculative decoding.
+--num-speculative-tokens
+The number of speculative tokens to sample from the draft model.
+--speculative-max-model-len
+The maximum sequence length supported by the draft model.
+--speculative-disable-by-batch-size
+Disable speculative decoding if the number of enqueue requests is larger than this value.
+--ngram-prompt-lookup-max
+Max size of window for ngram prompt lookup in speculative decoding.
+--ngram-prompt-lookup-min
+Min size of window for ngram prompt lookup in speculative decoding.
+## LoRA Support
+--enable-lora
+If True, enable handling of LoRA adapters.
+--max-loras
+Max number of LoRAs in a single batch.
+Default: 1
+--max-lora-rank
+Max LoRA rank.
+Default: 16
+--lora-dtype
+Possible choices: auto, float16, bfloat16
+Data type for LoRA. If auto, will default to base model dtype.
+Default: "auto"
+--fully-sharded-loras
+Use fully sharded LoRA layers. Likely faster at high sequence length or tensor parallel size.
+## Scheduling & Execution
+--scheduling-policy
+Possible choices: fcfs, priority
+The scheduling policy to use. "fcfs" (first come first served) or "priority".
+Default: "fcfs"
+--num-scheduler-steps
+Maximum number of forward steps per scheduler call.
+Default: 1
+--scheduler-delay-factor
+Apply a delay before scheduling next prompt (delay factor * previous prompt latency).
+Default: 0.0
+--device
+Possible choices: auto, cuda, neuron, cpu, openvino, tpu, xpu, hpu
+Device type for vLLM execution.
+Default: "auto"
+## Logging & Monitoring
+--disable-log-stats
+Disable logging statistics.
+--max-logprobs
+Max number of log probs to return when logprobs is specified in SamplingParams.
+Default: 20
+--disable-async-output-proc
+Disable async output processing. May result in lower performance.
+--otlp-traces-endpoint
+Target URL to which OpenTelemetry traces will be sent.
+--collect-detailed-traces
+Valid choices: model, worker, all
+Collect detailed traces for specified modules (requires --otlp-traces-endpoint).
+## Advanced Options
+--rope-scaling
+RoPE scaling configuration in JSON format. Example: {"rope_type":"dynamic","factor":2.0}
+--rope-theta
+RoPE theta. Use with rope_scaling to improve scaled model performance.
+--enforce-eager
+Always use eager-mode PyTorch. If False, uses hybrid eager/CUDA graph mode.
+--seed
+Random seed for operations.
+Default: 0
+--compilation-config, -O
+torch.compile configuration for the model (0, 1, 2, 3 or JSON string).
+Level 3 is recommended for production.
+--worker-cls
+The worker class to use for distributed execution.
+Default: "auto"
+--enable-sleep-mode
+Enable sleep mode for the engine (CUDA platform only).
+--calculate-kv-scales
+Enable dynamic calculation of k_scale and v_scale when kv-cache-dtype is fp8.
+## Serving Options
+--host
+Host address for the server.
+Default: "0.0.0.0"
+--port
+Port number for the server.
+Default: 8000
+--served-model-name
+The model name(s) used in the API. Can be multiple comma-separated names.
+## Multimodal
+--limit-mm-per-prompt
+Limit how many multimodal inputs per prompt (e.g., image=16,video=2).
+--mm-processor-kwargs
+Overrides for multimodal input processing (JSON format).
+--disable-mm-preprocessor-cache
+Disable caching of multi-modal preprocessor/mapper (not recommended).
+"""
+def get_vllm_docs() -> str:
+    """
+    Get the vLLM engine arguments documentation.
+    Returns:
+        str: Complete vLLM engine arguments documentation
+    """
+    return VLLM_ENGINE_ARGS_DOC
+def get_common_parameters_summary() -> str:
+    """
+    Get a summary of the most commonly used vLLM parameters.
+    Returns:
+        str: Summary of key vLLM parameters
+    """
+    return """
+    ## Most Common vLLM Parameters:
+    **Performance:**
+    - --gpu-memory-utilization: Fraction of GPU memory to use (0.0-1.0, default: 0.9)
+    - --max-model-len: Maximum context length
+    - --max-num-seqs: Maximum sequences per iteration
+    - --max-num-batched-tokens: Maximum batched tokens per iteration
+    - --enable-prefix-caching: Enable prefix caching (recommended)
+    - --enable-chunked-prefill: For long contexts (>8192 tokens)
+    **Model Configuration:**
+    - --dtype: Data type (auto, half, float16, bfloat16, float32)
+    - --kv-cache-dtype: KV cache type (auto, fp8, fp16, bf16)
+    - --quantization: Quantization method (fp8, awq, gptq, etc.)
+    **Distributed:**
+    - --tensor-parallel-size: Number of GPUs for tensor parallelism
+    - --pipeline-parallel-size: Number of pipeline stages
+    **Server:**
+    - --host: Server host address (default: 0.0.0.0)
+    - --port: Server port (default: 8000)
+    """

Compute_MCP/api_data_structure.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import logging
+from typing import List, Optional, Dict, Any, Union
+from pydantic import BaseModel, Field
+from typing import Any
+from enum import Enum
+from constant import Constants
+import requests
+# Enum for instance status
+class InstanceStatus(Enum):
+    CREATED = 0
+    DEPLOYED = 1
+    STARTING = 2
+    RUNNING = 3
+    ERRORED = 4
+    TERMINATING = 5
+    TERMINATED = 6
+    STOPPING = 7
+    STOPPED = 8
+class Timestamp(BaseModel):
+    seconds: int
+    nanos: int
+# GPU model
+class GPUInfo(BaseModel):
+    model: str
+# Port info
+class PortInfo(BaseModel):
+    protocol: str
+    container_port: int
+    node_port: int
+class InstanceSpending(BaseModel):
+    instance_id: str
+    hourly_price: float
+    total_spend: float
+# InstanceInfo for GET method
+class InstanceInfo(BaseModel):  # New fields added
+    id: Optional[str] = None
+    deployment_id: Optional[str] = None
+    name: Optional[str] = None
+    user_id: Optional[str] = None
+    container_image: Optional[str] = None
+    status: Optional[InstanceStatus] = None
+    status_string: Optional[str] = None
+    additional_info: Optional[str] = None
+    type: Optional[int] = None
+    created_at: Optional[Timestamp] = None
+    updated_at: Optional[Timestamp] = None
+    ready_at: Optional[Timestamp] = None
+    stopped_at: Optional[Timestamp] = None
+    cpu: Optional[int] = None
+    memory: Optional[int] = None
+    gpu: Optional[List[GPUInfo]] = None
+    disk: Optional[int] = None
+    bandwidth: Optional[int] = None
+    ssh_key_id: Optional[str] = None
+    location: Optional[str] = None
+    ports: Optional[Dict[str, PortInfo]] = None
+    hive_environment_variables: Optional[Dict[str, Any]] = None
+    environment_variables: Optional[Dict[str, Any]] = None
+    runtime: Optional[int] = None
+    spending: Optional[InstanceSpending] = None
+    def __init__(self, **data):
+        super().__init__(**data)
+        if self.status_string is None and isinstance(self.status, InstanceStatus):
+            self.status_string = self.status.name
+# Spending info model
+class InstanceSpending(BaseModel):
+    instance_id: str
+    hourly_price: float
+    total_spend: float
+# Configuration mappings based on the UI tables
+GPU_CONFIGS = {
+    "1x RTX 4090": {
+        "gpu": ["RTX 4090"],
+        "cpu": 8,
+        "memory": 48,
+        "disk": 250,
+        "bandwidth": 1000
+    },
+    "2x RTX 4090": {
+        "gpu": ["RTX 4090", "RTX 4090"],
+        "cpu": 16,
+        "memory": 96,
+        "disk": 500,
+        "bandwidth": 1000
+    },
+    "4x RTX 4090": {
+        "gpu": ["RTX 4090", "RTX 4090", "RTX 4090", "RTX 4090"],
+        "cpu": 32,
+        "memory": 192,
+        "disk": 1000,
+        "bandwidth": 1000
+    },
+    "8x RTX 4090": {
+        "gpu": ["RTX 4090", "RTX 4090", "RTX 4090", "RTX 4090",
+                "RTX 4090", "RTX 4090", "RTX 4090", "RTX 4090"],
+        "cpu": 64,
+        "memory": 384,
+        "disk": 2000,
+        "bandwidth": 1000
+    },
+    "1x RTX 5090": {
+        "gpu": ["RTX 5090"],
+        "cpu": 8,
+        "memory": 48,
+        "disk": 250,
+        "bandwidth": 1000
+    },
+    "2x RTX 5090": {
+        "gpu": ["RTX 5090", "RTX 5090"],
+        "cpu": 16,
+        "memory": 96,
+        "disk": 500,
+        "bandwidth": 1000
+    },
+    "4x RTX 5090": {
+        "gpu": ["RTX 5090", "RTX 5090", "RTX 5090", "RTX 5090"],
+        "cpu": 32,
+        "memory": 192,
+        "disk": 1000,
+        "bandwidth": 1000
+    },
+    "8x RTX 5090": {
+        "gpu": ["RTX 5090", "RTX 5090", "RTX 5090", "RTX 5090",
+                "RTX 5090", "RTX 5090", "RTX 5090", "RTX 5090"],
+        "cpu": 64,
+        "memory": 384,
+        "disk": 2000,
+        "bandwidth": 1000
+    }
+}
+VCPU_CONFIGS = {
+    "2vCPU": {
+        "gpu": [],
+        "cpu": 2,
+        "memory": 4,
+        "disk": 50,
+        "bandwidth": 250
+    },
+    "4vCPU": {
+        "gpu": [],
+        "cpu": 4,
+        "memory": 8,
+        "disk": 100,
+        "bandwidth": 250
+    },
+    "8vCPU": {
+        "gpu": [],
+        "cpu": 8,
+        "memory": 16,
+        "disk": 200,
+        "bandwidth": 500
+    },
+    "16vCPU": {
+        "gpu": [],
+        "cpu": 16,
+        "memory": 32,
+        "disk": 400,
+        "bandwidth": 1000
+    },
+    "32vCPU": {
+        "gpu": [],
+        "cpu": 32,
+        "memory": 64,
+        "disk": 800,
+        "bandwidth": 1000
+    }
+}
+# Location-GPU validation map (using API format - lowercase)
+LOCATION_GPU_MAP = {
+    "france": ["RTX 4090"],
+    "uae": ["RTX 4090"],
+    "texas": ["RTX 5090"],
+    "uae-2": ["RTX 5090"]
+}
+class HiveComputeAPI:
+    """
+    A wrapper class that provides methods to interact with the Hive Compute API.
+    """
+    def __init__(self, base_url: str = Constants.HIVE_COMPUTE_BASE_API_URL, token: str = Constants.HIVE_COMPUTE_DEFAULT_API_TOKEN):
+        """
+        Initializes the HiveComputeAPI handler.
+        Args:
+            base_url (str): The base URL of the Hive Compute API.
+            token (str): The authentication token for the Hive Compute API.
+        Note: The ModelRouter will automatically refresh the map of served models upon initialization.
+        """
+        self.base_url = base_url.strip("/")
+        self.token = token
+        self.logger = logging.getLogger(__name__)
+    def __fetch_instance_structure(self, instance_json) -> InstanceInfo:
+        """
+        Fetches the structure of an instance from the API.
+        Returns:
+            InstanceInfo: An InstanceInfo object representing the structure of an instance.
+        """
+        # Ensure instance_json is a dict
+        if not isinstance(instance_json, dict):
+            return {}
+        # Convert only problematic fields
+        if "status" in instance_json and not isinstance(instance_json["status"], InstanceStatus):
+            try:
+                instance_json["status"] = InstanceStatus(instance_json["status"])
+            except Exception:
+                instance_json["status"] = InstanceStatus.CREATED
+        for field in ["created_at", "updated_at", "ready_at", "stopped_at"]:
+            value = instance_json.get(field)
+            if isinstance(value, dict):
+                instance_json[field] = Timestamp(**value)
+            else:
+                instance_json[field] = None
+        if "gpu" in instance_json:
+            instance_json["gpu"] = [GPUInfo(**gpu) for gpu in instance_json.get("gpu", []) if isinstance(gpu, dict)]
+        if "ports" in instance_json:
+            instance_json["ports"] = {k: PortInfo(**v) for k, v in instance_json.get("ports", {}).items() if isinstance(v, dict)}
+        return InstanceInfo(**instance_json)
+    def get_all_instances(self) -> List[InstanceInfo]:
+        """
+        Fetches all compute instances for the authenticated user.
+        Returns:
+            List[InstanceInfo]: A list of InstanceInfo objects representing the user's compute instances.
+        """
+        try:
+            response = requests.get(f"{self.base_url}/instances", headers={
+                "Authorization": f"Bearer {self.token}"
+            })
+            response.raise_for_status()
+            response_json = response.json()
+            spending_map = response_json.get("spending", {})
+            instances = []
+            for inst in response_json.get("instances", []):
+                inst_struct = self.__fetch_instance_structure(inst)
+                spend = spending_map.get(inst.get("id"))
+                if spend:
+                    inst_struct.spending = InstanceSpending(**spend)
+                instances.append(InstanceInfo.model_validate(inst_struct))
+            return instances
+        except requests.RequestException as e:
+            self.logger.error(f"Failed to fetch instances: {e}")
+            return []
+    def create_instance(
+        self,
+        name: str = "default",
+        location: str = "uae",  # Changed default to API format
+        config: str = "1x RTX 4090",
+        container_image: str = "Dockerfile.vulkan",
+        tcp_ports: Optional[List[int]] = None,
+        https_ports: Optional[List[int]] = None,
+        udp_ports: Optional[List[int]] = None,
+        launch_jupyter_notebook: bool = False,
+        instance_type: int = 0,
+        custom_config: Optional[Dict[str, Any]] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Creates a new compute instance using predefined configurations or custom settings.
+        Args:
+            name (str): Name of the instance. Defaults to "default".
+            location (str): Location where the instance will be deployed. Defaults to "uae".
+                        Valid locations: france, uae, texas, uae-2
+            config (str): Predefined configuration. Options:
+                        GPU configs: "1x RTX 4090", "2x RTX 4090", "4x RTX 4090", "8x RTX 4090",
+                                    "1x RTX 5090", "2x RTX 5090", "4x RTX 5090", "8x RTX 5090"
+                        vCPU configs: "2vCPU", "4vCPU", "8vCPU", "16vCPU", "32vCPU"
+                        Defaults to "1x RTX 4090".
+            container_image (str): Docker container image to use. Defaults to "Dockerfile.vulkan".
+            tcp_ports (List[int], optional): List of TCP ports to expose.
+            https_ports (List[int], optional): List of HTTPS ports to expose.
+            udp_ports (List[int], optional): List of UDP ports to expose.
+            launch_jupyter_notebook (bool): Whether to launch Jupyter notebook. Defaults to False.
+            instance_type (int): Type of instance. Defaults to 0.
+            custom_config (Dict[str, Any], optional): Custom configuration to override defaults.
+                                                    Keys: cpu, memory, disk, bandwidth, gpu
+        Returns:
+            Optional[Dict[str, Any]]: A dictionary with 'id' and 'status' keys if successful, None otherwise.
+        Raises:
+            ValueError: If configuration is invalid or GPU type not available in location.
+        """
+        # Combine all configs
+        ALL_CONFIGS = {**GPU_CONFIGS, **VCPU_CONFIGS}
+        # Validate configuration
+        if config not in ALL_CONFIGS:
+            available_configs = list(ALL_CONFIGS.keys())
+            raise ValueError(
+                f"Invalid config: {config}. Available configs: {available_configs}"
+            )
+        # Get base configuration
+        instance_config = ALL_CONFIGS[config].copy()
+        # Apply custom config if provided
+        if custom_config:
+            instance_config.update(custom_config)
+        # Validate location
+        if location not in LOCATION_GPU_MAP:
+            raise ValueError(
+                f"Invalid location: {location}. Valid locations: {list(LOCATION_GPU_MAP.keys())}"
+            )
+        # Validate GPU type for location (only if GPU instance)
+        if instance_config["gpu"]:  # If not empty (i.e., GPU instance)
+            gpu_type = instance_config["gpu"][0]  # Get the GPU model
+            if gpu_type not in LOCATION_GPU_MAP[location]:
+                raise ValueError(
+                    f"GPU type '{gpu_type}' not available in location '{location}'. "
+                    f"Available GPUs: {LOCATION_GPU_MAP[location]}"
+                )
+        # Build the payload - exact format matching the API request
+        payload = {
+            "bandwidth": instance_config["bandwidth"],
+            "container_image": container_image,
+            "cpu": instance_config["cpu"],
+            "disk": instance_config["disk"],
+            "gpu": instance_config["gpu"],
+            "https_ports": https_ports if https_ports is not None else [8888],
+            "launch_jupyter_notebook": launch_jupyter_notebook,
+            "location": location,
+            "memory": instance_config["memory"],
+            "name": name,
+            "tcp_ports": tcp_ports if tcp_ports is not None else [],
+            "type": instance_type,
+            "udp_ports": udp_ports if udp_ports is not None else []
+        }
+        # Log the payload for debugging
+        self.logger.info(f"Creating instance with payload: {payload}")
+        try:
+            response = requests.post(
+                f"{self.base_url}/instances/instance",
+                headers={
+                    "Authorization": f"Bearer {self.token}",
+                    "Content-Type": "application/json"
+                },
+                json=payload
+            )
+            # Log response details for debugging
+            self.logger.info(f"Response status code: {response.status_code}")
+            if response.status_code != 200:
+                self.logger.error(f"Response body: {response.text}")
+            response.raise_for_status()
+            response_data = response.json()
+            instance_data = response_data.get("instance", {})
+            return {
+                "id": instance_data.get("id"),
+                "status": instance_data.get("status")
+            }
+        except requests.RequestException as e:
+            self.logger.error(f"Failed to create instance: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                self.logger.error(f"Response content: {e.response.text}")
+            return None
+    def get_available_locations(self, gpu_type: Optional[str] = None) -> List[str]:
+        """
+        Get available locations, optionally filtered by GPU type.
+        Args:
+            gpu_type (str, optional): GPU model to filter locations by.
+        Returns:
+            List[str]: List of available locations.
+        """
+        if gpu_type:
+            return [loc for loc, gpus in LOCATION_GPU_MAP.items() if gpu_type in gpus]
+        return list(LOCATION_GPU_MAP.keys())

Compute_MCP/main.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# This allows importing modules from the top-level project directory
+import os
+import sys
+sys.path.append("/home/hivenet")
+"""
+FastMCP HiveCompute Server
+A FastMCP service that performs basic CRUD operations on the 'Compute with Hivenet' platform.
+"""
+from tools import mcp
+if __name__ == "__main__":
+    print("🚀 Compute with Hivenet MCP Server starting ...")
+    mcp.run(transport='stdio')

Compute_MCP/tools.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from api_data_structure import HiveComputeAPI
+from utils import logger, create_success_response, handle_exception
+from typing import Dict, Any
+from fastmcp import FastMCP
+from constant import Constants
+mcp = FastMCP(
+    name="Compute with Hivenet MCP"
+)
+@mcp.tool()
+async def create_compute_instance(name: str = "default", location: str = "uae", config: str = "1x RTX 4090") -> Dict[str, Any]:
+    """
+    Create a new compute instance with the specified configuration.
+    Args:
+        name: Name of the instance. Defaults to "default".
+        location: Location where the instance will be deployed. Defaults to "uae".
+                  Valid locations: france, uae, texas, uae-2
+        config: Predefined configuration. Defaults to "1x RTX 4090".
+                GPU configs: "1x RTX 4090", "2x RTX 4090", "4x RTX 4090", "8x RTX 4090",
+                            "1x RTX 5090", "2x RTX 5090", "4x RTX 5090", "8x RTX 5090"
+                vCPU configs: "2vCPU", "4vCPU", "8vCPU", "16vCPU", "32vCPU"
+    Returns:
+        Dict containing the created instance information.
+    """
+    token = Constants.HIVE_COMPUTE_DEFAULT_API_TOKEN
+    api_handler = HiveComputeAPI(token=token)
+    try:
+        logger.info(f"Creating compute instance: name={name}, location={location}, config={config}")
+        result = api_handler.create_instance(name=name, location=location, config=config)
+        if result is None:
+            logger.error(f"Failed to create instance - API returned None")
+            return {
+                "status": "error",
+                "error": "Failed to create instance. Check API logs for details."
+            }
+        logger.info(f"Successfully created instance: {result.id if hasattr(result, 'id') else 'unknown'}")
+        return create_success_response(result)
+    except Exception as e:
+        logger.error(f"Exception creating instance: {str(e)}")
+        return handle_exception(e, "create_compute_instance")
+@mcp.tool()
+async def list_all_compute_instances(category: str = None) -> Dict[str, Any]:
+    """
+    List all instances belonging to the user, organized into 4 categories: RUNNING, STOPPED, ERRORED, and TERMINATED.
+    Shows basic information about each instance, including ID, name, status, location, spending, and resource allocation.
+    Spending information includes hourly price and total in EUR spent so far.
+    Args:
+        category: Optional filter to return only instances from a specific category.
+                 Valid values: "RUNNING", "STOPPED", "ERRORED", "TERMINATED".
+                 If not provided, returns all categories.
+    Returns:
+        Dict containing instances. If category is specified, returns only instances from that category.
+        If category is not specified, returns all instances organized by status categories.
+    """
+    token = Constants.HIVE_COMPUTE_DEFAULT_API_TOKEN
+    api_handler = HiveComputeAPI(token=token)
+    try:
+        logger.info(f"Listing all compute instances for token: {token}, category filter: {category}")
+        all_instances = api_handler.get_all_instances()
+        # Categorize instances into 4 groups
+        categorized = {
+            "RUNNING": [],
+            "STOPPED": [],
+            "ERRORED": [],
+            "TERMINATED": []
+        }
+        for inst in all_instances:
+            status = inst.status_string
+            # Map statuses to categories
+            if status in ["RUNNING"]:
+                categorized["RUNNING"].append(inst)
+            elif status in ["STOPPED"]:
+                categorized["STOPPED"].append(inst)
+            elif status == "ERRORED":
+                categorized["ERRORED"].append(inst)
+            elif status in ["TERMINATED"]:
+                categorized["TERMINATED"].append(inst)
+        # If category filter is specified, return only that category
+        if category and category.upper() in categorized:
+            return create_success_response(categorized[category.upper()])
+        # Otherwise return all categories
+        return create_success_response(categorized)
+    except Exception as e:
+        return handle_exception(e, "list_all_compute_instances")

Compute_MCP/utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import logging
+from typing import Dict, Any
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(asctime)s][%(levelname)s] - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def create_success_response(result: Any) -> Dict[str, Any]:
+    """Helper to create a standardized success response."""
+    return {
+        "status": "success",
+        "result": result
+    }
+def handle_exception(e: Exception, operation: str) -> Dict[str, Any]:
+    """Helper to standardize error responses."""
+    logger.exception(f"Error during {operation}: {e}")
+    return {
+        "status": "error",
+        "message": str(e),
+        "operation": operation
+    }

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+# Use slim Python base
+FROM python:3.12-bookworm
+# Copy uv binaries from official distroless image
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+WORKDIR /app
+# Copy project dependency files
+#COPY pyproject.toml ./
+#COPY uv.lock ./
+# Install dependencies only (no project code)
+COPY requirements.txt /home/temp/
+RUN pip install --no-cache-dir -r /home/temp/requirements.txt
+COPY . /home/hivenet/
+WORKDIR /home/hivenet/
+# Copy logo files to root directory for Gradio interface
+RUN cp ComputeAgent/hivenet.jpg . 2>/dev/null || true
+RUN cp ComputeAgent/ComputeAgent.png . 2>/dev/null || true
+RUN cp run.sh /usr/bin/
+RUN chmod +x /usr/bin/run.sh
+# Run all the applications
+# Port 7860: Gradio Web Interface
+# Port 8000: ComputeAgent API
+# MCP uses stdio (no port needed)
+EXPOSE 7860 8000
+CMD ["/usr/bin/run.sh"]

Gradio_interface.py ADDED Viewed

	@@ -0,0 +1,1374 @@

+"""
+Enhanced Gradio Interface for ComputeAgent with Tool Approval Support
+This interface supports BOTH capacity approval and tool approval with full
+modification capabilities.
+Features:
+- Capacity approval (existing)
+- Tool approval (NEW)
+- Tool argument modification (NEW)
+- Re-reasoning requests (NEW)
+- Batch tool operations (NEW)
+Author: ComputeAgent Team
+"""
+# This allows importing modules from the top-level project directory
+import os
+import sys
+sys.path.append("/home/hivenet")
+import asyncio
+import gradio as gr
+import httpx
+import logging
+from typing import Optional, Dict, Any, List, Tuple
+from datetime import datetime
+import json
+import base64
+from pathlib import Path
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("ComputeAgent-UI")
+# FastAPI configuration
+API_BASE_URL = "http://localhost:8000"
+API_TIMEOUT = 300.0
+# GPU/Location configuration
+LOCATION_GPU_MAP = {
+    "France": ["RTX 4090"],
+    "UAE-1": ["RTX 4090"],
+    "Texas": ["RTX 5090"],
+    "UAE-2": ["RTX 5090"]
+}
+# Load and encode logo
+def get_logo_base64(filename):
+    """Load a logo and convert to base64 for embedding in HTML."""
+    try:
+        logo_path = Path(__file__).parent / filename
+        with open(logo_path, "rb") as f:
+            logo_bytes = f.read()
+        return base64.b64encode(logo_bytes).decode()
+    except Exception as e:
+        logger.warning(f"Could not load logo {filename}: {e}")
+        return None
+HIVENET_LOGO_BASE64 = get_logo_base64("hivenet.jpg")
+COMPUTEAGENT_LOGO_BASE64 = get_logo_base64("ComputeAgent.png")
+class ComputeAgentClient:
+    """Client for interacting with ComputeAgent FastAPI backend."""
+    def __init__(self, base_url: str = API_BASE_URL):
+        self.base_url = base_url
+        self.client = httpx.AsyncClient(timeout=API_TIMEOUT)
+    async def send_query(
+        self,
+        query: str,
+        user_id: str = "demo_user",
+        session_id: str = "demo_session"
+    ) -> Dict[str, Any]:
+        """Send query to FastAPI backend."""
+        try:
+            response = await self.client.post(
+                f"{self.base_url}/api/compute/query",
+                json={
+                    "query": query,
+                    "user_id": user_id,
+                    "session_id": session_id
+                }
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"❌ Error sending query: {e}")
+            return {"success": False, "error": str(e)}
+    async def continue_execution(
+        self,
+        thread_id: str,
+        user_input: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Continue execution after interrupt."""
+        try:
+            response = await self.client.post(
+                f"{self.base_url}/api/compute/continue/{thread_id}",
+                json=user_input
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"❌ Error continuing: {e}")
+            return {"success": False, "error": str(e)}
+    async def approve_tools(
+        self,
+        thread_id: str,
+        decision: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Approve/reject/modify tools."""
+        try:
+            response = await self.client.post(
+                f"{self.base_url}/api/compute/approve-tools",
+                json={
+                    "thread_id": thread_id,
+                    **decision
+                }
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"❌ Error with tool approval: {e}")
+            return {"success": False, "error": str(e)}
+class ComputeAgentInterface:
+    """Enhanced Gradio interface with tool approval."""
+    def __init__(self):
+        self.client = ComputeAgentClient()
+        self.current_thread_id = None
+        self.current_interrupt_data = None
+        self.approval_type = None  # "capacity" or "tool"
+        self.selected_tools = set()  # For tool selection
+        self.tool_modifications = {}  # For tool argument mods
+        self.stats = {"total": 0, "successful": 0}
+        logger.info("🚀 ComputeAgent UI initialized with tool approval support")
+    def update_gpu_options(self, location: str):
+        """Update GPU dropdown based on location."""
+        gpus = LOCATION_GPU_MAP.get(location, [])
+        return gr.update(choices=gpus, value=gpus[0] if gpus else None)
+    def get_stats_display(self) -> str:
+        """Format stats display."""
+        success_rate = (self.stats["successful"] / max(1, self.stats["total"])) * 100
+        return f"""**📊 Session Statistics**
+Total Requests: {self.stats["total"]}
+Success Rate: {success_rate:.1f}%"""
+    async def process_query(
+        self,
+        message: str,
+        history: List,
+        user_id: str,
+        session_id: str
+    ):
+        """Process query through FastAPI."""
+        if not message.strip():
+            yield (
+                history, "",
+                gr.update(visible=False),  # capacity_approval_panel
+                gr.update(visible=False),  # capacity_param_panel
+                gr.update(visible=False),  # tool_approval_panel
+                gr.update(visible=False),  # tool_list_panel
+                self.get_stats_display()
+            )
+            return
+        user_id = user_id.strip() or "demo_user"
+        session_id = session_id.strip() or f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        # Add user message
+        history.append([message, "🤖 **Processing...**"])
+        yield (
+            history, "",
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            self.get_stats_display()
+        )
+        try:
+            # Send to API
+            result = await self.client.send_query(message, user_id, session_id)
+            if not result.get("success"):
+                error_msg = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+                history[-1][1] = error_msg
+                yield (
+                    history, "",
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    self.get_stats_display()
+                )
+                return
+            # Check if waiting for approval (interrupt)
+            if result.get("state") == "waiting_for_input":
+                self.current_thread_id = result.get("thread_id")
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                # Determine approval type
+                if "tool_calls" in self.current_interrupt_data:
+                    # Tool approval
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history, "",
+                        gr.update(visible=False),  # capacity panels hidden
+                        gr.update(visible=False),
+                        gr.update(visible=True),   # tool approval visible
+                        gr.update(visible=True),   # tool list visible
+                        self.get_stats_display()
+                    )
+                else:
+                    # Capacity approval
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history, "",
+                        gr.update(visible=True),   # capacity approval visible
+                        gr.update(visible=False),
+                        gr.update(visible=False),  # tool panels hidden
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+                return
+            # Normal completion
+            response_text = result.get("response", "Request completed")
+            history[-1][1] = response_text
+            self.stats["total"] += 1
+            self.stats["successful"] += 1
+            yield (
+                history, "",
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                self.get_stats_display()
+            )
+        except Exception as e:
+            logger.error(f"❌ Error: {e}", exc_info=True)
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield (
+                history, "",
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                self.get_stats_display()
+            )
+    def _format_tool_approval(self, interrupt_data: Dict[str, Any]) -> str:
+        """Format tool approval request for display."""
+        tool_calls = interrupt_data.get("tool_calls", [])
+        query = interrupt_data.get("query", "")
+        if not tool_calls:
+            return "⚠️ No tools proposed"
+        tools_list = []
+        for i, tool in enumerate(tool_calls):
+            tool_name = tool.get("name", "unknown")
+            tool_args = json.dumps(tool.get("args", {}), indent=2)
+            tool_desc = tool.get("description", "No description")
+            tools_list.append(f"""
+**Tool {i+1}: {tool_name}**
+- Description: {tool_desc}
+- Arguments:
+```json
+{tool_args}
+```
+""")
+        tools_text = "\n".join(tools_list)
+        return f"""# 🔧 **Tool Approval Required**
+**Query:** {query}
+**Proposed Tools ({len(tool_calls)}):**
+{tools_text}
+⚠️ **Please review and approve, modify, or request re-reasoning.**
+"""
+    def _format_basic_capacity(self, interrupt_data: Dict[str, Any]) -> str:
+        """Basic capacity formatting if formatted_response not available."""
+        model_name = interrupt_data.get("model_name", "Unknown")
+        memory = interrupt_data.get("estimated_gpu_memory", 0)
+        gpu_reqs = interrupt_data.get("gpu_requirements", {})
+        gpu_lines = [f"  • **{gpu}:** {count} GPU{'s' if count > 1 else ''}"
+                     for gpu, count in gpu_reqs.items()]
+        gpu_text = "\n".join(gpu_lines) if gpu_lines else "  • No requirements"
+        return f"""# 📊 **Capacity Estimation**
+**Model:** `{model_name}`
+**Estimated GPU Memory:** **{memory:.2f} GB**
+**GPU Requirements:**
+{gpu_text}
+⚠️ **Please review and approve or modify the configuration.**
+"""
+    def build_tool_checkboxes(self):
+        """Build checkbox UI for tool selection."""
+        if not self.current_interrupt_data or "tool_calls" not in self.current_interrupt_data:
+            return []
+        tool_calls = self.current_interrupt_data.get("tool_calls", [])
+        # Return list of tool names with indices
+        return [
+            f"[{i}] {tool.get('name', 'unknown')}: {json.dumps(tool.get('args', {}))}"
+            for i, tool in enumerate(tool_calls)
+        ]
+    # ========================================================================
+    # CAPACITY APPROVAL HANDLERS
+    # ========================================================================
+    async def approve_capacity(self, history: List, user_id: str, session_id: str):
+        """Handle capacity approval."""
+        if not self.current_thread_id or self.approval_type != "capacity":
+            history.append([None, "⚠️ No pending capacity approval"])
+            yield (
+                history,
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                self.get_stats_display()
+            )
+            return
+        history.append(["✅ **Approved Capacity**", "🚀 **Continuing deployment...**"])
+        yield (
+            history,
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            self.get_stats_display()
+        )
+        try:
+            approval_input = {
+                "capacity_approved": True,
+                "custom_config": {},
+                "needs_re_estimation": False
+            }
+            result = await self.client.continue_execution(
+                self.current_thread_id,
+                approval_input
+            )
+            # Check if there's another interrupt (e.g., tool approval after capacity approval)
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                # Determine approval type
+                if "tool_calls" in self.current_interrupt_data:
+                    # Tool approval needed
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=False),  # capacity panels hidden
+                        gr.update(visible=False),
+                        gr.update(visible=True),   # tool approval visible
+                        gr.update(visible=True),   # tool list visible
+                        self.get_stats_display()
+                    )
+                else:
+                    # Another capacity approval (re-estimation)
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=True),   # capacity approval visible
+                        gr.update(visible=False),
+                        gr.update(visible=False),  # tool panels hidden
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+                return
+            # Normal completion
+            if result.get("success"):
+                response = result.get("response", "Deployment completed")
+                history[-1][1] = f"✅ **{response}**"
+                self.stats["total"] += 1
+                self.stats["successful"] += 1
+            else:
+                history[-1][1] = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+            self._clear_approval_state()
+            yield (
+                history,
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                self.get_stats_display()
+            )
+        except Exception as e:
+            logger.error(f"❌ Approval error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield (
+                history,
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                self.get_stats_display()
+            )
+    async def reject_capacity(self, history: List, user_id: str, session_id: str):
+        """Handle capacity rejection."""
+        if not self.current_thread_id or self.approval_type != "capacity":
+            return self._no_approval_response(history)
+        history.append(["❌ **Rejected Capacity**", "Deployment cancelled"])
+        rejection_input = {
+            "capacity_approved": False,
+            "custom_config": {},
+            "needs_re_estimation": False
+        }
+        await self.client.continue_execution(self.current_thread_id, rejection_input)
+        self._clear_approval_state()
+        return (
+            history,
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            self.get_stats_display()
+        )
+    async def apply_capacity_modifications(
+        self,
+        history: List,
+        user_id: str,
+        session_id: str,
+        max_model_len: int,
+        max_num_seqs: int,
+        max_batched_tokens: int,
+        kv_cache_dtype: str,
+        gpu_util: float,
+        location: str,
+        gpu_type: str
+    ):
+        """Apply capacity modifications and re-estimate."""
+        if not self.current_thread_id or self.approval_type != "capacity":
+            history.append([None, "⚠️ No pending capacity approval"])
+            yield self._all_hidden_response(history)
+            return
+        history.append(["🔧 **Re-estimating with new parameters...**", "⏳ **Please wait...**"])
+        yield self._all_hidden_response(history)
+        try:
+            custom_config = {
+                "GPU_type": gpu_type,
+                "location": location,
+                "max_model_len": int(max_model_len),
+                "max_num_seqs": int(max_num_seqs),
+                "max_num_batched_tokens": int(max_batched_tokens),
+                "kv_cache_dtype": kv_cache_dtype,
+                "gpu_memory_utilization": float(gpu_util)
+            }
+            re_estimate_input = {
+                "capacity_approved": None,
+                "custom_config": custom_config,
+                "needs_re_estimation": True
+            }
+            result = await self.client.continue_execution(
+                self.current_thread_id,
+                re_estimate_input
+            )
+            # Check if still waiting for input (could be capacity or tool approval)
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                # Determine approval type
+                if "tool_calls" in self.current_interrupt_data:
+                    # Tool approval needed after re-estimation
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=False),  # capacity panels hidden
+                        gr.update(visible=False),
+                        gr.update(visible=True),   # tool approval visible
+                        gr.update(visible=True),   # tool list visible
+                        self.get_stats_display()
+                    )
+                else:
+                    # Another capacity approval (re-estimation result)
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=True),   # Show capacity approval
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+            else:
+                # Completed without further interrupts
+                response = result.get("response", "Re-estimation completed")
+                history[-1][1] = f"✅ **{response}**"
+                self._clear_approval_state()
+                yield self._all_hidden_response(history)
+        except Exception as e:
+            logger.error(f"❌ Re-estimation error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield self._all_hidden_response(history)
+    # ========================================================================
+    # TOOL APPROVAL HANDLERS
+    # ========================================================================
+    async def approve_all_tools(self, history: List, user_id: str, session_id: str):
+        """Approve all tools."""
+        if not self.current_thread_id or self.approval_type != "tool":
+            history.append([None, "⚠️ No pending tool approval"])
+            yield self._all_hidden_response(history)
+            return
+        history.append(["✅ **Approved All Tools**", "⚡ **Executing tools...**"])
+        yield self._all_hidden_response(history)
+        try:
+            result = await self.client.approve_tools(
+                self.current_thread_id,
+                {"action": "approve_all"}
+            )
+            # Check if there's another interrupt (agent proposing more tools)
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                # Determine approval type
+                if "tool_calls" in self.current_interrupt_data:
+                    # More tools proposed
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=True),   # tool approval visible
+                        gr.update(visible=True),   # tool list visible
+                        self.get_stats_display()
+                    )
+                else:
+                    # Unexpected capacity approval
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=True),   # capacity approval visible
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+                return
+            # Normal completion
+            if result.get("success"):
+                response = result.get("response", "Tools executed successfully")
+                history[-1][1] = f"✅ **{response}**"
+                self.stats["total"] += 1
+                self.stats["successful"] += 1
+            else:
+                history[-1][1] = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+            self._clear_approval_state()
+            yield self._all_hidden_response(history)
+        except Exception as e:
+            logger.error(f"❌ Tool approval error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield self._all_hidden_response(history)
+    async def reject_all_tools(self, history: List, user_id: str, session_id: str):
+        """Reject all tools."""
+        if not self.current_thread_id or self.approval_type != "tool":
+            return self._no_approval_response(history)
+        history.append(["❌ **Rejected All Tools**", "Generating response without tools..."])
+        try:
+            result = await self.client.approve_tools(
+                self.current_thread_id,
+                {"action": "reject_all"}
+            )
+            if result.get("success"):
+                response = result.get("response", "Completed without tools")
+                history[-1][1] = f"✅ **{response}**"
+            else:
+                history[-1][1] = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+            self._clear_approval_state()
+        except Exception as e:
+            logger.error(f"❌ Tool rejection error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+        return self._all_hidden_response(history)
+    async def approve_selected_tools(
+        self,
+        history: List,
+        user_id: str,
+        session_id: str,
+        selected_indices: str
+    ):
+        """Approve selected tools by indices."""
+        if not self.current_thread_id or self.approval_type != "tool":
+            history.append([None, "⚠️ No pending tool approval"])
+            yield self._all_hidden_response(history)
+            return
+        # Parse indices (convert from 1-based to 0-based)
+        try:
+            # User enters 1-based indices (1,2,3), convert to 0-based (0,1,2)
+            indices = [int(i.strip()) - 1 for i in selected_indices.split(",") if i.strip()]
+            # Validate indices are non-negative
+            if any(idx < 0 for idx in indices):
+                history.append([None, "❌ Tool indices must be positive numbers (starting from 1). Example: 1,2,3"])
+                yield self._all_hidden_response(history)
+                return
+        except:
+            history.append([None, "❌ Invalid indices format. Use: 1,2,3 (starting from 1)"])
+            yield self._all_hidden_response(history)
+            return
+        history.append([
+            f"✅ **Approved Tools: {indices}**",
+            "⚡ **Executing selected tools...**"
+        ])
+        yield self._all_hidden_response(history)
+        try:
+            result = await self.client.approve_tools(
+                self.current_thread_id,
+                {
+                    "action": "approve_selected",
+                    "tool_indices": indices
+                }
+            )
+            # Check if there's another interrupt
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                if "tool_calls" in self.current_interrupt_data:
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                        self.get_stats_display()
+                    )
+                else:
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=True),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+                return
+            # Normal completion
+            if result.get("success"):
+                response = result.get("response", "Selected tools executed")
+                history[-1][1] = f"✅ **{response}**"
+                self.stats["total"] += 1
+                self.stats["successful"] += 1
+            else:
+                history[-1][1] = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+            self._clear_approval_state()
+            yield self._all_hidden_response(history)
+        except Exception as e:
+            logger.error(f"❌ Tool approval error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield self._all_hidden_response(history)
+    async def request_re_reasoning(
+        self,
+        history: List,
+        user_id: str,
+        session_id: str,
+        feedback: str
+    ):
+        """Request agent re-reasoning with feedback."""
+        if not self.current_thread_id or self.approval_type != "tool":
+            history.append([None, "⚠️ No pending tool approval"])
+            yield self._all_hidden_response(history)
+            return
+        if not feedback.strip():
+            history.append([None, "❌ Please provide feedback for re-reasoning"])
+            yield self._all_hidden_response(history)
+            return
+        history.append([
+            f"🔄 **Re-reasoning Request:** {feedback}",
+            "🤔 **Agent reconsidering approach...**"
+        ])
+        yield self._all_hidden_response(history)
+        try:
+            result = await self.client.approve_tools(
+                self.current_thread_id,
+                {
+                    "action": "request_re_reasoning",
+                    "feedback": feedback
+                }
+            )
+            # Should get new tool proposals
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                history[-1][1] = formatted_response
+                yield (
+                    history,
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                    gr.update(visible=True),   # tool approval visible
+                    gr.update(visible=True),   # tool list visible
+                    self.get_stats_display()
+                )
+            else:
+                response = result.get("response", "Re-reasoning completed")
+                history[-1][1] = f"✅ **{response}**"
+                self._clear_approval_state()
+                yield self._all_hidden_response(history)
+        except Exception as e:
+            logger.error(f"❌ Re-reasoning error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield self._all_hidden_response(history)
+    async def modify_tool_args(
+        self,
+        history: List,
+        user_id: str,
+        session_id: str,
+        tool_index: int,
+        new_args_json: str
+    ):
+        """Modify tool arguments and approve."""
+        if not self.current_thread_id or self.approval_type != "tool":
+            history.append([None, "⚠️ No pending tool approval"])
+            yield self._all_hidden_response(history)
+            return
+        # Parse new arguments
+        try:
+            new_args = json.loads(new_args_json)
+        except:
+            history.append([None, "❌ Invalid JSON format for arguments"])
+            yield self._all_hidden_response(history)
+            return
+        history.append([
+            f"🔧 **Modified Tool {tool_index}**",
+            "⚡ **Executing with new arguments...**"
+        ])
+        yield self._all_hidden_response(history)
+        # Convert from 1-based to 0-based index for backend
+        backend_index = tool_index - 1
+        if backend_index < 0:
+            history.append([None, "❌ Tool index must be positive (starting from 1)"])
+            yield self._all_hidden_response(history)
+            return
+        try:
+            result = await self.client.approve_tools(
+                self.current_thread_id,
+                {
+                    "action": "modify_and_approve",
+                    "modifications": [
+                        {
+                            "tool_index": backend_index,
+                            "new_args": new_args,
+                            "approve": True
+                        }
+                    ]
+                }
+            )
+            # Check if there's another interrupt
+            if result.get("state") == "waiting_for_input":
+                self.current_interrupt_data = result.get("interrupt_data", {})
+                if "tool_calls" in self.current_interrupt_data:
+                    self.approval_type = "tool"
+                    formatted_response = self._format_tool_approval(self.current_interrupt_data)
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                        self.get_stats_display()
+                    )
+                else:
+                    self.approval_type = "capacity"
+                    formatted_response = self.current_interrupt_data.get(
+                        "formatted_response",
+                        self._format_basic_capacity(self.current_interrupt_data)
+                    )
+                    history[-1][1] = formatted_response
+                    yield (
+                        history,
+                        gr.update(visible=True),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        self.get_stats_display()
+                    )
+                return
+            # Normal completion
+            if result.get("success"):
+                response = result.get("response", "Tool executed with modifications")
+                history[-1][1] = f"✅ **{response}**"
+                self.stats["total"] += 1
+                self.stats["successful"] += 1
+            else:
+                history[-1][1] = f"❌ **Error:** {result.get('error', 'Unknown error')}"
+            self._clear_approval_state()
+            yield self._all_hidden_response(history)
+        except Exception as e:
+            logger.error(f"❌ Modification error: {e}")
+            history[-1][1] = f"❌ **Error:** {str(e)}"
+            yield self._all_hidden_response(history)
+    # ========================================================================
+    # HELPER METHODS
+    # ========================================================================
+    def _clear_approval_state(self):
+        """Clear all approval state."""
+        self.current_thread_id = None
+        self.current_interrupt_data = None
+        self.approval_type = None
+        self.selected_tools = set()
+        self.tool_modifications = {}
+    def _all_hidden_response(self, history):
+        """Return response with all panels hidden."""
+        return (
+            history,
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            self.get_stats_display()
+        )
+    def _no_approval_response(self, history):
+        """Return response for no pending approval."""
+        history.append([None, "⚠️ No pending approval"])
+        return self._all_hidden_response(history)
+    def show_capacity_modify_dialog(self):
+        """Show capacity parameter modification dialog."""
+        if not self.current_interrupt_data:
+            return (
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                2048, 256, 2048, "auto", 0.9, "France", "RTX 4090"
+            )
+        model_info = self.current_interrupt_data.get("model_info", {})
+        return (
+            gr.update(visible=False),  # Hide capacity approval
+            gr.update(visible=True),   # Show capacity param
+            gr.update(visible=False),  # Hide tool approval
+            gr.update(visible=False),  # Hide tool list
+            model_info.get("max_model_len", 2048),
+            model_info.get("max_num_seqs", 256),
+            model_info.get("max_num_batched_tokens", 2048),
+            model_info.get("kv_cache_dtype", "auto"),
+            model_info.get("gpu_memory_utilization", 0.9),
+            model_info.get("location", "France"),
+            model_info.get("GPU_type", "RTX 4090")
+        )
+    def cancel_capacity_modify(self):
+        """Cancel capacity modification."""
+        return (
+            gr.update(visible=True),   # Show capacity approval
+            gr.update(visible=False),  # Hide capacity param
+            gr.update(visible=False),
+            gr.update(visible=False)
+        )
+    def clear_chat(self, user_id: str, session_id: str):
+        """Clear chat history."""
+        self._clear_approval_state()
+        return (
+            [],  # Empty history
+            "",  # Clear input
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            self.get_stats_display()
+        )
+    def new_session(self, user_id: str):
+        """Generate new session ID."""
+        new_session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        self._clear_approval_state()
+        return new_session_id
+# Initialize interface
+agent_interface = ComputeAgentInterface()
+# Create Gradio theme matching HiveNet brand colors
+def create_theme():
+    return gr.themes.Soft(
+        primary_hue="orange",
+        secondary_hue="stone",
+        neutral_hue="slate",
+        font=gr.themes.GoogleFont("Inter")
+    ).set(
+        body_background_fill="#1a1a1a",
+        body_background_fill_dark="#0d0d0d",
+        button_primary_background_fill="#d97706",
+        button_primary_background_fill_hover="#ea580c",
+        button_primary_text_color="#ffffff",
+        block_background_fill="#262626",
+        block_border_color="#404040",
+        input_background_fill="#1f1f1f",
+        slider_color="#d97706",
+    )
+# Create interface
+with gr.Blocks(
+    title="ComputeAgent - Enhanced with Tool Approval",
+    theme=create_theme(),
+    css="""
+    .gradio-container {
+        max-width: 100% !important;
+    }
+    .header-box {
+        background: linear-gradient(135deg, #d97706 0%, #ea580c 50%, #dc2626 100%);
+        color: white;
+        padding: 20px;
+        border-radius: 10px;
+        margin-bottom: 20px;
+        position: relative;
+        overflow: hidden;
+    }
+    .header-box::before {
+        content: '';
+        position: absolute;
+        top: 0;
+        left: 0;
+        right: 0;
+        bottom: 0;
+        background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 600 600'%3E%3Cfilter id='noise'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='3' /%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noise)' opacity='0.05' /%3E%3C/svg%3E");
+        pointer-events: none;
+    }
+    .tool-box {
+        background: rgba(41, 37, 36, 0.5);
+        border: 2px solid #57534e;
+        border-radius: 8px;
+        padding: 15px;
+        margin: 10px 0;
+    }
+    /* Make chatbot fill available height dynamically */
+    .chatbot {
+        height: calc(100vh - 750px) !important;
+        max-height: calc(100vh - 750px) !important;
+    }
+    """
+) as demo:
+    # Header
+    hivenet_logo_html = f'<img src="data:image/jpeg;base64,{HIVENET_LOGO_BASE64}" alt="HiveNet Logo" style="height: 80px; width: auto; object-fit: contain;">' if HIVENET_LOGO_BASE64 else ''
+    computeagent_logo_html = f'<img src="data:image/png;base64,{COMPUTEAGENT_LOGO_BASE64}" alt="ComputeAgent Logo" style="height: 60px; width: auto; object-fit: contain; margin-right: 15px;">' if COMPUTEAGENT_LOGO_BASE64 else ''
+    gr.HTML(f"""
+    <div class="header-box" style="display: flex; justify-content: space-between; align-items: center;">
+        <div style="display: flex; align-items: center;">
+            {computeagent_logo_html}
+            <div>
+                <h1 style="margin: 0; font-size: 2.5em;">ComputeAgent</h1>
+                <p style="margin: 10px 0 0 0; opacity: 0.9;">
+                    Hivenet AI-Powered Deployment using MCP of Compute by Hivenet
+                </p>
+            </div>
+        </div>
+        <div>
+            {hivenet_logo_html}
+        </div>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=11):
+            # Chat interface
+            chatbot = gr.Chatbot(
+                label="Agent Conversation",
+                height=900,
+                show_copy_button=True,
+                elem_classes=["chatbot"]
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="Deploy meta-llama/Llama-3.1-70B or ask: What are the latest AI developments?",
+                    scale=5,
+                    show_label=False
+                )
+                send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
+            # ================================================================
+            # CAPACITY APPROVAL PANEL
+            # ================================================================
+            with gr.Row(visible=False) as capacity_approval_panel:
+                capacity_approve_btn = gr.Button("✅ Approve", variant="primary", scale=1)
+                capacity_modify_btn = gr.Button("🔧 Modify", variant="secondary", scale=1)
+                capacity_reject_btn = gr.Button("❌ Reject", variant="stop", scale=1)
+            # Capacity parameter modification panel
+            with gr.Column(visible=False) as capacity_param_panel:
+                gr.Markdown("## 🔧 Capacity Parameter Optimization")
+                with gr.Row():
+                    with gr.Column():
+                        max_model_len = gr.Number(
+                            label="Context Length",
+                            value=2048,
+                            minimum=1
+                        )
+                        max_num_seqs = gr.Number(
+                            label="Max Sequences",
+                            value=256,
+                            minimum=1
+                        )
+                    with gr.Column():
+                        max_batched_tokens = gr.Number(
+                            label="Batch Size",
+                            value=2048,
+                            minimum=1
+                        )
+                        kv_cache_dtype = gr.Dropdown(
+                            choices=["auto", "float32", "float16", "bfloat16", "fp8"],
+                            value="auto",
+                            label="KV Cache Type"
+                        )
+                    with gr.Column():
+                        gpu_util = gr.Slider(
+                            minimum=0.1,
+                            maximum=1.0,
+                            value=0.9,
+                            step=0.05,
+                            label="GPU Utilization"
+                        )
+                        location = gr.Dropdown(
+                            choices=list(LOCATION_GPU_MAP.keys()),
+                            value="France",
+                            label="Location"
+                        )
+                        gpu_type = gr.Dropdown(
+                            choices=LOCATION_GPU_MAP["France"],
+                            value="RTX 4090",
+                            label="GPU Type"
+                        )
+                with gr.Row():
+                    capacity_apply_btn = gr.Button("🔄 Re-estimate", variant="primary", scale=2)
+                    capacity_cancel_btn = gr.Button("↩️ Back", variant="secondary", scale=1)
+            # ================================================================
+            # TOOL APPROVAL PANEL
+            # ================================================================
+            with gr.Row(visible=False) as tool_approval_panel:
+                tool_approve_all_btn = gr.Button("✅ Approve All", variant="primary", scale=1)
+                tool_reject_all_btn = gr.Button("❌ Reject All", variant="stop", scale=1)
+            with gr.Column(visible=False) as tool_list_panel:
+                gr.Markdown("### 🔧 Tool Actions")
+                with gr.Tab("Selective Approval"):
+                    tool_indices_input = gr.Textbox(
+                        label="Tool Indices to Approve (comma-separated)",
+                        placeholder="1,2,3",
+                        info="Enter indices of tools to approve (e.g., '1,3' to approve Tool 1 and Tool 3)"
+                    )
+                    tool_approve_selected_btn = gr.Button("✅ Approve Selected", variant="primary")
+                with gr.Tab("Modify Arguments"):
+                    tool_index_input = gr.Number(
+                        label="Tool Index",
+                        value=1,
+                        minimum=1,
+                        precision=0,
+                        info="Enter tool number (e.g., 1 for Tool 1)"
+                    )
+                    tool_args_input = gr.TextArea(
+                        label="New Arguments (JSON)",
+                        placeholder='{"query": "modified search query"}',
+                        lines=5
+                    )
+                    tool_modify_btn = gr.Button("🔧 Modify & Approve", variant="secondary")
+                with gr.Tab("Re-Reasoning"):
+                    feedback_input = gr.TextArea(
+                        label="Feedback for Agent",
+                        placeholder="Please search for academic papers instead of news articles...",
+                        lines=4
+                    )
+                    re_reasoning_btn = gr.Button("🔄 Request Re-Reasoning", variant="secondary")
+        # Sidebar
+        with gr.Column(scale=1):
+            gr.Markdown("## Control Panel")
+            with gr.Group():
+                gr.Markdown("### User Session")
+                user_id = gr.Textbox(
+                    label="User ID",
+                    value="demo_user"
+                )
+                session_id = gr.Textbox(
+                    label="Session ID",
+                    value=f"session_{datetime.now().strftime('%m%d_%H%M')}"
+                )
+            with gr.Group():
+                stats_display = gr.Markdown("### Statistics\nNo requests yet")
+            with gr.Group():
+                gr.Markdown("### Management")
+                clear_btn = gr.Button("Clear History", variant="secondary")
+                new_session_btn = gr.Button("New Session", variant="secondary")
+            gr.Markdown("""
+            ## Examples
+            **Model Deployment:**
+            - Deploy meta-llama/Llama-3.1-8B
+            - Deploy openai/gpt-oss-20b
+            **Tool Usage:**
+            - Search for latest AI developments
+            - Calculate 25 * 34 + 128
+            - What's the weather in Paris?
+            """)
+    # Update GPU options when location changes
+    location.change(
+        fn=agent_interface.update_gpu_options,
+        inputs=[location],
+        outputs=[gpu_type]
+    )
+    # ========================================================================
+    # EVENT HANDLERS
+    # ========================================================================
+    # Query submission
+    msg.submit(
+        agent_interface.process_query,
+        inputs=[msg, chatbot, user_id, session_id],
+        outputs=[chatbot, msg, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    send_btn.click(
+        agent_interface.process_query,
+        inputs=[msg, chatbot, user_id, session_id],
+        outputs=[chatbot, msg, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    # Capacity approval handlers
+    capacity_approve_btn.click(
+        agent_interface.approve_capacity,
+        inputs=[chatbot, user_id, session_id],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    capacity_reject_btn.click(
+        agent_interface.reject_capacity,
+        inputs=[chatbot, user_id, session_id],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    capacity_modify_btn.click(
+        agent_interface.show_capacity_modify_dialog,
+        outputs=[capacity_approval_panel, capacity_param_panel, tool_approval_panel,
+                tool_list_panel, max_model_len, max_num_seqs, max_batched_tokens,
+                kv_cache_dtype, gpu_util, location, gpu_type]
+    )
+    capacity_cancel_btn.click(
+        agent_interface.cancel_capacity_modify,
+        outputs=[capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel]
+    )
+    capacity_apply_btn.click(
+        agent_interface.apply_capacity_modifications,
+        inputs=[chatbot, user_id, session_id, max_model_len, max_num_seqs,
+               max_batched_tokens, kv_cache_dtype, gpu_util, location, gpu_type],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    # Tool approval handlers
+    tool_approve_all_btn.click(
+        agent_interface.approve_all_tools,
+        inputs=[chatbot, user_id, session_id],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    tool_reject_all_btn.click(
+        agent_interface.reject_all_tools,
+        inputs=[chatbot, user_id, session_id],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    tool_approve_selected_btn.click(
+        agent_interface.approve_selected_tools,
+        inputs=[chatbot, user_id, session_id, tool_indices_input],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    tool_modify_btn.click(
+        agent_interface.modify_tool_args,
+        inputs=[chatbot, user_id, session_id, tool_index_input, tool_args_input],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    re_reasoning_btn.click(
+        agent_interface.request_re_reasoning,
+        inputs=[chatbot, user_id, session_id, feedback_input],
+        outputs=[chatbot, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    # Management handlers
+    clear_btn.click(
+        agent_interface.clear_chat,
+        inputs=[user_id, session_id],
+        outputs=[chatbot, msg, capacity_approval_panel, capacity_param_panel,
+                tool_approval_panel, tool_list_panel, stats_display]
+    )
+    new_session_btn.click(
+        agent_interface.new_session,
+        inputs=[user_id],
+        outputs=[session_id]
+    )
+if __name__ == "__main__":
+    logger.info("🚀 Starting Enhanced ComputeAgent Gradio Interface")
+    logger.info(f"📡 Connecting to FastAPI at: {API_BASE_URL}")
+    logger.info("✨ Features: Capacity Approval + Tool Approval")
+    logger.info("🌐 Interface will be available at: http://localhost:7860")
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Hivenet ComputeAgent
-emoji: 🦀
-colorFrom: red
-colorTo: red
 sdk: docker
 pinned: false
 license: apache-2.0
@@ -10,3 +10,11 @@ short_description: AI-Powered Deployment using MCP of Compute by Hivenet
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Hivenet
+emoji: 🔥
+colorFrom: blue
+colorTo: indigo
 sdk: docker
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## 👥 Team
+**Team Name:** Hivenet AI Team
+**Team Members:**
+- **Igor Carrara** - [@carraraig](https://huggingface.co/carraraig) - AI Scientist
+- **Mamoutou Diarra** - [@mdiarra](https://huggingface.co/mdiarra) - AI Scientist

constant.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""Constant Module"""
+import os
+from enum import StrEnum
+from dotenv import load_dotenv
+load_dotenv()
+class Constants(StrEnum):
+    """Constants used in application."""
+    # ============================================================================
+    # COMPUTE CONFIGURATION
+    # ============================================================================
+    HIVE_COMPUTE_BASE_API_URL = os.environ.get("HIVE_COMPUTE_BASE_API_URL", "https://api.hivecompute.ai")
+    HIVE_COMPUTE_DEFAULT_API_TOKEN = os.environ.get("HIVE_COMPUTE_DEFAULT_API_TOKEN", "")
+    # ============================================================================
+    # MODEL CONFIGURATION
+    # ============================================================================
+    # Model Router configuration for AI model management
+    MODEL_ROUTER_TOKEN = os.getenv("MODEL_ROUTER_TOKEN", "your-model-router-token")
+    MODEL_ROUTER_HOST = os.getenv("MODEL_ROUTER_HOST", "localhost")
+    MODEL_ROUTER_PORT = os.getenv("MODEL_ROUTER_PORT", "8080")
+    # Default model configurations
+    DEFAULT_LLM_NAME = os.getenv("DEFAULT_LLM_NAME", "openai/gpt-oss-20b")
+    DEFAULT_LLM_FC = os.getenv("DEFAULT_LLM_FC", "Qwen/Qwen3-14B-FP8")
+    HF_TOKEN = os.getenv("HF_TOKEN", "your-huggingface-token")
+    # ============================================================================
+    # ENVIRONMENT AND PROCESSING CONFIGURATION
+    # ============================================================================
+    # Environment setting (production vs development)
+    ENV = os.environ.get("PRODUCTION", "False").lower()
+    PRODUCTION = "" if ENV == "false" else "true"
+    # Human approval setting for tool execution
+    HUMAN_APPROVAL = os.environ.get("HUMAN_APPROVAL", "True").lower()
+    HUMAN_APPROVAL_CAPACITY = os.environ.get("HUMAN_APPROVAL_CAPACITY", "True").lower()
+    # ============================================================================
+    # AI SYSTEM PROMPTS AND BEHAVIOR CONFIGURATION
+    # ============================================================================
+    # General System Prompt - Applied to all AI interactions
+    GENERAL_SYSTEM_PROMPT = r"""
+    <behavior_instructions>
+    ComputeAgent is Hive’s AI assistant for the Hivenet.com ecosystem.
+    It supports users—called *Hivers*—by providing accurate, professional, and context-aware answers across factual and creative topics that align with Hive’s ethical and community standards.
+    ComputeAgent always:
+    - Remains **professional, accurate, and relevant**.
+    - Uses **Hive data and documentation** when available.
+    - Prioritizes **user intent**, clarifying ambiguity when needed.
+    - Balances **conciseness and completeness**.
+     - **Cites sources** when providing information from specific documents or external resources.
+    </behavior_instructions>
+    <priority_rules>
+    ComputeAgent follows all behavior instructions in this prompt.
+    When multiple instructions may conflict:
+    1. **Safety and compliance** come first.
+    2. **Accuracy and source citation** come second.
+    3. **User tone and formatting** come third.
+    If factual info comes from a verifiable source, ComputeAgent MUST cite it, even if the user doesn’t ask.
+    </priority_rules>
+    <source_citation>
+    ComputeAgent must include **inline citations** for all statements derived from the provided knowledge pieces.
+    ### Citation Rules
+    - Use the **`id` field** from the knowledge pieces as your citation.
+    - Format citations as `[citationId]` placed **between sentences or phrases** that use that knowledge.
+    - Citations should **never appear on a separate line** or as a sub-bullet in lists.
+    - Always cite **every piece of knowledge you use**; skipping a source is considered incorrect.
+    - Never invent or alter citation IDs.
+    - Do not include URLs, parentheses, or prefixes like “Source:”.
+    ### Examples
+    ✅ Correct:
+    - “The capital of Chile is Santiago de Chile[1], and the population is 7 million people[3].”
+    - “Caco loves oranges more than apples[1], and his full name is Joaquín Ossandón Stanke[2].”
+    - “Caco's nickname, cacoos, comes from his initials[3].”
+    ❌ Incorrect:
+    - “The capital of Chile is Santiago de Chile.” (missing citation)
+    - “Caco loves oranges[1] and apples.” (forgot to cite full statement)
+    - “Caco's full name[2](https://...)” (URL included)
+    - Citation on a new line or nested bullet:
+        - Users manage passwords.
+            - [12]
+    ### Style Rules
+    - Integrate citations naturally inline; they should feel like part of the sentence.
+    - Multiple citations in the same sentence can be separated by commas: `[1], [2]`.
+    - Only cite knowledge actually used in the answer.
+    - Ensure readability: citations should feel like part of the sentence.
+    </source_citation>
+    <general_hive_info>
+    Created by Hive for the Hivenet community.
+    Current date: {{currentDateTime}}.
+    HiveGPT can discuss Hive, Hivenet.com, and Hive’s products or community, but lacks access to non-public business details.
+    For pricing, limits, or account issues, refer users to [https://hivenet.com](https://hivenet.com).
+    HiveGPT can also help users craft better prompts, examples, or task formats.
+    See Hive’s documentation portal for more guidance.
+    </general_hive_info>
+    <refusal_handling>
+    HiveGPT must refuse requests that are:
+    - Harmful, illegal, or unethical.
+    - Discriminatory, violent, or harassing.
+    - Related to hacking, exploitation, or malicious code.
+    - About private/confidential data without consent.
+    - Fictional or persuasive content attributed to real people.
+    It may discuss educational or technical topics safely, focusing on ethical concepts.
+    If a request seems unsafe or unclear, HiveGPT politely declines and may redirect to a safe alternative.
+    Always respond respectfully, even to frustration or hostility.
+    </refusal_handling>
+    <tone>
+    HiveGPT maintains a **professional, clear, and approachable** tone—warm for casual chats but never overly informal.
+    Writing guidelines:
+    - Use **paragraphs** over lists unless structure improves clarity.
+    - Keep **formatting minimal and clean**.
+    - Be **concise** for simple questions, **thorough** for complex ones.
+    - Use examples or analogies when useful.
+    - No emojis unless the user does first.
+    - Mirror the user’s tone within professional limits.
+    - Avoid emotes, roleplay asterisks, and unnecessary profanity (respond calmly if user uses it).
+    </tone>
+    <response_quality_standards>
+    HiveGPT produces **expert, long-form answers** that are:
+    - **Comprehensive** – logically structured and detailed.
+    - **Professional** – authoritative and precise.
+    - **Engaging** – conversational yet clear.
+    - **Self-contained** – understandable without extra context.
+    - **Natural** – integrate context seamlessly (avoid meta phrases like “Based on the provided context”).
+    Every fact-based statement must include citations where relevant, especially if the content can be verified externally.
+    Balance clarity, depth, and readability in every response.
+    </response_quality_standards>
+    <user_wellbeing>
+    HiveGPT promotes **safe, balanced, and supportive** communication.
+    Avoid encouraging self-harm, addiction, or misinformation.
+    In sensitive topics (e.g., health, psychology), give factual, responsible info and suggest consulting professionals.
+    If emotional distress is detected, respond with compassion and encourage seeking real support.
+    Never reinforce harmful or delusional beliefs; guide gently toward reality.
+    </user_wellbeing>
+    <formatting_rules>
+    Use markdown-style formatting for clarity and consistency.
+    ### Document Structure
+    - **Title (`##`)**: Main topic.
+    Example: `## How to Reset Your Hive Password`
+    - **Subtitle (`###`)** (optional): Brief intro.
+    - **Horizontal Rule (`---`)**: Separate major sections.
+    ### Text Formatting
+    - **Bold (`**bold**`)** – Key terms/actions.
+    - *Italic (`*italic*`)* – Light emphasis or technical terms.
+    - [Links](URL) – Cite resources.
+    ### Lists & Steps
+    - **Bullets (`-`)** – For unordered points.
+    - **Numbers (`1.`)** – For steps or sequences.
+    ### Code
+    Use fenced blocks for syntax:
+    ```python
+    def hello():
+        print("Hello, Hive!")
+    ### Advanced Formatting
+    - Inline code (\code`) – Short technical refs.
+    - Task lists (- [ ]) – To-dos or checklists.
+    - Blockquotes (>) – Notes or tips.
+    - Headings (#–######) – Nested structure.
+    </formatting_rules>
+    """

logging_setup.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# -*- coding: utf-8 -*-
+import asyncio
+import logging
+import os
+import sys
+import coloredlogs
+from loguru import logger
+if os.name == "nt":
+    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+logger.remove()
+logger.add(sys.stderr, level=logging.ERROR)
+class InterceptHandler(logging.Handler):
+    """Intercept standard logging messages toward your Loguru sinks.
+    Code was taken from https://github.com/Delgan/loguru#entirely-compatible-with-standard-logging
+    """
+    loggers = {}
+    def emit(self, record):
+        # Get corresponding Loguru level if it exists
+        """
+        Intercept a standard logging message and log it through Loguru.
+        :param record: standard logging record
+        :type record: logging.LogRecord
+        """
+        try:
+            level = logger.level(record.levelname).name
+        except ValueError:
+            level = record.levelno
+        # Find caller from where originated the logged message
+        frame, depth = sys._getframe(2), 2
+        while frame.f_code.co_filename == logging.__file__:
+            frame = frame.f_back
+            depth += 1
+        if record.name not in self.loggers:
+            self.loggers[record.name] = logger.bind(name=record.name)
+        self.loggers[record.name].opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
+coloredlogs.DEFAULT_LEVEL_STYLES = {
+    **coloredlogs.DEFAULT_LEVEL_STYLES,
+    "critical": {"background": "red"},
+    "debug": coloredlogs.DEFAULT_LEVEL_STYLES["info"],
+}
+log_level = logging.DEBUG if os.environ.get("PRODUCTION", False) == "DEBUG" else logging.INFO
+if isinstance(log_level, str):
+    log_level = logging.INFO
+format_string = "%(asctime)s | %(name)s | %(levelname)s | %(message)s"
+coloredlogs.install(stream=sys.stdout, level=log_level, fmt=format_string)
+logging.basicConfig(level=log_level, format=format_string)
+logging.getLogger().addHandler(InterceptHandler(level=log_level))
+logging.getLogger("github.Requester").setLevel(logging.WARNING)
+logging.getLogger("multipart").setLevel(logging.WARNING)
+logging.getLogger("openai").setLevel(logging.INFO)
+logging.getLogger("PIL").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+logging.getLogger("websockets").setLevel(logging.WARNING)
+logging.getLogger("werkzeug").setLevel(logging.WARNING)
+logging.getLogger('pdfminer').setLevel(logging.ERROR)

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+# MCP
+fastapi
+uvicorn[standard]
+fastmcp>=2.12.3
+httpx
+pydantic
+# Compute Agent
+coloredlogs>=15.0.1
+dotenv>=0.9.9
+gradio==5.49.1
+gradio-client==1.13.3
+langchain>=1.0.7
+langchain-core>=1.0.5
+langchain-mcp-adapters>=0.1.13
+langchain-openai>=1.0.3
+langgraph>=1.0.3
+python-dotenv>=1.2.1
+requests>=2.32.5
+transformers>=4.57.1
+aiohttp

run.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+# With stdio transport, MCP server is spawned by the MCP client automatically
+# No need to start it separately
+AGENT_DIR="/home/hivenet/ComputeAgent"
+AGENT_PID="/home/hivenet/agent.pid"
+CURR_DIR=$(pwd)
+GRADIO_DIR=${CURR_DIR}
+# Start Compute Agent (MCP client will spawn MCP server via stdio)
+cd ${AGENT_DIR}
+python main.py & echo $! > ${AGENT_PID}
+sleep 5
+# Start Gradio Web Server
+cd ${GRADIO_DIR}
+python Gradio_interface.py
+# Cleanup on exit
+pkill -F ${AGENT_PID}
+rm ${AGENT_PID}