initial commit

4 weeks ago · 8b361316cc
8 changed files with 461 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 .claude
 venv
--- a/README.md
+++ b/README.md
@ -0,0 +1,77 @@
 # AI model at the Edge with MCP support
 This is a Proof of Concept of a generative AI model (LLM) at the edge, with MCP server support.
 The idea is for the client to call the AI model to know the weather in Paris.
 The weather data is provided by an MCP server.
 There are three components in this project:
 - the vLLM server, serving the Qwen/Qwen3-8B model.
 - the Python client, calling vLLM.
 - the MCP server serves over stdio the weather for a city (responses are hardcoded).
 - the MCP server has to be declared to the AI model and called by the client when the AI model needs it.
 ## Prerequisites
 - Python 3.8 or higher
 - 8GB+ RAM (for the model)
 - Internet connection (for first-time model download)
 ## Step 1: Setup
 ```bash
 ./setup.sh
 ```
 Wait for all dependencies to install.
 ## Step 2: Start the vLLM Server
 Open a terminal and run:
 ```bash
 ./start_server.sh
 ```
 Wait for the model to download and the server to start. You'll see:
 ```
 INFO:     Application startup complete.
 INFO:     Uvicorn running on http://127.0.0.1:8000
 ```
 **Keep this terminal open!**
 ## Step 3: Run the Client
 Open a **new terminal** and run:
 ```bash
 ./run_demo.sh
 ```
 You should see the client:
 1. Connect to the MCP weather server
 2. Ask about weather in Paris
 3. The LLM calls the `get_weather` tool
 4. Returns a natural language response with the weather data
 ## Example Output
 ```
 Connected to MCP server. Available tools:
  - get_weather: Get the current weather for a specific city.
 User: What's the weather like in Paris?
 Assistant wants to call 1 tool(s):
  Calling tool: get_weather
  Arguments: {'city': 'Paris'}
  Result: {"city": "Paris", "temperature": 15, ...}
 ```
 ## Authors
 - Claude Code
 - Nicolas Massé
--- a/client/mcp_client.py
+++ b/client/mcp_client.py
@ -0,0 +1,217 @@
 #!/usr/bin/env python3
 """
 MCP Client - Integrates vLLM with MCP weather server
 """
 import asyncio
 import json
 import os
 from typing import Optional
 from contextlib import asynccontextmanager
 from mcp import ClientSession, StdioServerParameters
 from mcp.client.stdio import stdio_client
 from openai import OpenAI
 class MCPWeatherClient:
    """Client that connects vLLM to MCP weather server"""
    def __init__(self, vllm_url: str = "http://127.0.0.1:8000/v1", model: str = "qwen"):
        self.vllm_url = vllm_url
        self.model = model
        self.openai_client = OpenAI(
            api_key="EMPTY",  # vLLM doesn't need API key
            base_url=vllm_url
        )
        self.mcp_session: Optional[ClientSession] = None
        self.available_tools = []
    @asynccontextmanager
    async def connect_to_mcp(self, server_script_path: str):
        """Connect to MCP server via stdio"""
        server_params = StdioServerParameters(
            command="python",
            args=[server_script_path],
            env=None
        )
        async with stdio_client(server_params) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                self.mcp_session = session
                # List available tools
                tools_list = await session.list_tools()
                print(f"\nConnected to MCP server. Available tools:")
                for tool in tools_list.tools:
                    print(f"  - {tool.name}: {tool.description}")
                    self.available_tools.append(tool)
                yield session
    def mcp_tools_to_openai_format(self):
        """Convert MCP tools to OpenAI function calling format"""
        openai_tools = []
        for tool in self.available_tools:
            # Convert MCP tool schema to OpenAI format
            tool_def = {
                "type": "function",
                "function": {
                    "name": tool.name,
                    "description": tool.description or "",
                    "parameters": tool.inputSchema if tool.inputSchema else {
                        "type": "object",
                        "properties": {},
                        "required": []
                    }
                }
            }
            openai_tools.append(tool_def)
        return openai_tools
    async def call_mcp_tool(self, tool_name: str, arguments: dict):
        """Call an MCP tool and return the result"""
        if not self.mcp_session:
            raise RuntimeError("MCP session not initialized")
        result = await self.mcp_session.call_tool(tool_name, arguments)
        return result
    async def chat(self, user_message: str, max_iterations: int = 5):
        """
        Run a chat interaction with the model, handling tool calls via MCP
        Args:
            user_message: The user's question/message
            max_iterations: Maximum number of turns to prevent infinite loops
        """
        messages = [
            {
                "role": "system",
                "content": "You are a helpful assistant that can get weather information for cities. When asked about weather, use the get_weather tool."
            },
            {
                "role": "user",
                "content": user_message
            }
        ]
        tools = self.mcp_tools_to_openai_format()
        print(f"\nUser: {user_message}\n")
        for iteration in range(max_iterations):
            # Call the model
            response = self.openai_client.chat.completions.create(
                model=self.model,
                messages=messages,
                tools=tools if tools else None,
                tool_choice="auto" if tools else None
            )
            assistant_message = response.choices[0].message
            # Add assistant response to messages
            messages.append({
                "role": "assistant",
                "content": assistant_message.content,
                "tool_calls": [
                    {
                        "id": tc.id,
                        "type": tc.type,
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments
                        }
                    }
                    for tc in (assistant_message.tool_calls or [])
                ]
            })
            # Check if model wants to call tools
            if assistant_message.tool_calls:
                print(f"Assistant wants to call {len(assistant_message.tool_calls)} tool(s):\n")
                # Process each tool call
                for tool_call in assistant_message.tool_calls:
                    function_name = tool_call.function.name
                    function_args = json.loads(tool_call.function.arguments)
                    print(f"  Calling tool: {function_name}")
                    print(f"  Arguments: {function_args}")
                    # Call the MCP tool
                    mcp_result = await self.call_mcp_tool(function_name, function_args)
                    # Extract content from MCP result
                    if mcp_result.content:
                        # Handle different content types
                        result_text = ""
                        for content in mcp_result.content:
                            if hasattr(content, 'text'):
                                result_text += content.text
                            else:
                                result_text += str(content)
                        print(f"  Result: {result_text}\n")
                        # Add tool result to messages
                        messages.append({
                            "role": "tool",
                            "tool_call_id": tool_call.id,
                            "content": result_text
                        })
                    else:
                        print(f"  Result: No content returned\n")
                        messages.append({
                            "role": "tool",
                            "tool_call_id": tool_call.id,
                            "content": "No result"
                        })
                # Continue the loop to get final response
                continue
            # No tool calls, this is the final response
            if assistant_message.content:
                print(f"Assistant: {assistant_message.content}\n")
                return assistant_message.content
            else:
                print("Assistant: (no response)")
                return None
        print("\nReached maximum iterations")
        return None
 async def main():
    """Main function to run the client"""
    import sys
    # Get the MCP server script path
    script_dir = os.path.dirname(os.path.abspath(__file__))
    project_root = os.path.dirname(script_dir)
    mcp_server_path = os.path.join(project_root, "mcp-server", "weather_server.py")
    if not os.path.exists(mcp_server_path):
        print(f"Error: MCP server script not found at {mcp_server_path}")
        sys.exit(1)
    # Create client
    client = MCPWeatherClient()
    # Connect to MCP server and run chat
    async with client.connect_to_mcp(mcp_server_path):
        # Example question about weather
        question = "What's the weather like in Paris?"
        if len(sys.argv) > 1:
            question = " ".join(sys.argv[1:])
        await client.chat(question)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/mcp-server/weather_server.py
+++ b/mcp-server/weather_server.py
@ -0,0 +1,70 @@
 #!/usr/bin/env python3
 """
 MCP Weather Server - Provides hardcoded weather data for cities via stdio
 """
 from fastmcp import FastMCP
 # Initialize MCP server
 mcp = FastMCP("weather-server")
 # Hardcoded weather data
 WEATHER_DATA = {
    "paris": {
        "city": "Paris",
        "temperature": 15,
        "condition": "Partly cloudy",
        "humidity": 65,
        "wind_speed": 12
    },
    "london": {
        "city": "London",
        "temperature": 12,
        "condition": "Rainy",
        "humidity": 80,
        "wind_speed": 18
    },
    "new york": {
        "city": "New York",
        "temperature": 20,
        "condition": "Sunny",
        "humidity": 55,
        "wind_speed": 10
    },
    "tokyo": {
        "city": "Tokyo",
        "temperature": 18,
        "condition": "Clear",
        "humidity": 60,
        "wind_speed": 8
    }
 }
@mcp.tool()
 def get_weather(city: str) -> dict:
    """
    Get the current weather for a specific city.
    Args:
        city: The name of the city to get weather for
    Returns:
        A dictionary containing weather information including temperature,
        condition, humidity, and wind speed
    """
    city_lower = city.lower()
    if city_lower in WEATHER_DATA:
        return WEATHER_DATA[city_lower]
    else:
        return {
            "city": city,
            "error": f"Weather data not available for {city}",
            "available_cities": list(WEATHER_DATA.keys())
        }
 if __name__ == "__main__":
    # Run the MCP server using stdio transport
    mcp.run(transport="stdio")
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 vllm>=0.6.0
 openai>=1.0.0
 fastmcp>=0.1.0
 mcp>=1.0.0
 huggingface-hub>=0.20.0
--- a/run_demo.sh
+++ b/run_demo.sh
@ -0,0 +1,35 @@
 #!/bin/bash
 # Demo script to run the MCP-enabled LLM client
 # Make sure the vLLM server is running first!
 cd "$(dirname "$0")"
 # Activate virtual environment if it exists
 if [ -d "venv" ]; then
    source venv/bin/activate
 fi
 # Check if vLLM server is running
 if ! curl -s http://127.0.0.1:8000/health > /dev/null 2>&1; then
    echo "⚠️  Warning: vLLM server doesn't seem to be running on port 8000"
    echo "Please start it first with: ./start_server.sh"
    echo ""
    read -p "Continue anyway? (y/n) " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        exit 1
    fi
 fi
 # Run the client
 echo "Starting MCP client..."
 echo ""
 if [ $# -eq 0 ]; then
    # Default question
    python client/mcp_client.py "What's the weather like in Paris?"
 else
    # Custom question
    python client/mcp_client.py "$@"
 fi
--- a/setup.sh
+++ b/setup.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 set -e
 # Vérification de Python
 if ! command -v python3 &> /dev/null; then
    echo "❌ Python 3 n'est pas installé"
    exit 1
 fi
 PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
 echo "✓ Python version: $PYTHON_VERSION"
 # Création de l'environnement virtuel
 if [ ! -d "venv" ]; then
    echo "📦 Création de l'environnement virtuel..."
    python3 -m venv venv
 fi
 # Activation de l'environnement
 source venv/bin/activate
 # Installation des dépendances
 echo "📥 Installation des dépendances..."
 pip install --upgrade pip
 pip install -r requirements.txt
 echo ""
 echo "✅ Installation terminée !"
 echo ""
--- a/start_server.sh
+++ b/start_server.sh
@ -0,0 +1,25 @@
 #!/bin/bash
 set -Eeuo pipefail
 # Start vLLM server with Qwen model
 # This script starts the vLLM OpenAI-compatible API server
 MODEL="${1:-Qwen/Qwen3-8B}"
 HOST="${2:-127.0.0.1}"
 PORT="${3:-8000}"
 echo "Starting vLLM server..."
 echo "Model: $MODEL"
 echo "Host: $HOST"
 echo "Port: $PORT"
 echo "Hugging Face Token: $HF_TOKEN"
 echo ""
 echo "Note: This will download the model if not already cached."
 echo "Press Ctrl+C to stop the server."
 echo ""
 mkdir -p ~/.cache/vllm
 # see https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm
 podman run --name vllm --rm --device nvidia.com/gpu=all -v ~/.cache/huggingface:/root/.cache/huggingface -v ~/.cache/vllm:/root/.cache/vllm --env "HF_TOKEN=$HF_TOKEN" -p $HOST:$PORT:8000 --ipc=host docker.io/vllm/vllm-openai:latest "$MODEL" --gpu-memory-utilization 0.95 --max-model-len 16384 --served-model-name qwen --enable-auto-tool-choice --tool-call-parser hermes --reasoning-parser deepseek_r1