commit 8b361316cca929dabfcfd0730ba608089ca61f8f
Author: Nicolas Massé <nicolas.masse@itix.fr>
Date:   Mon Jan 5 15:34:25 2026 -0500

    initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..37696d5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.claude
+venv
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b81da4f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,77 @@
+# AI model at the Edge with MCP support
+
+This is a Proof of Concept of a generative AI model (LLM) at the edge, with MCP server support.
+
+The idea is for the client to call the AI model to know the weather in Paris.
+The weather data is provided by an MCP server.
+
+There are three components in this project:
+
+- the vLLM server, serving the Qwen/Qwen3-8B model.
+- the Python client, calling vLLM.
+- the MCP server serves over stdio the weather for a city (responses are hardcoded).
+- the MCP server has to be declared to the AI model and called by the client when the AI model needs it.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- 8GB+ RAM (for the model)
+- Internet connection (for first-time model download)
+
+## Step 1: Setup
+
+```bash
+./setup.sh
+```
+
+Wait for all dependencies to install.
+
+## Step 2: Start the vLLM Server
+
+Open a terminal and run:
+
+```bash
+./start_server.sh
+```
+
+Wait for the model to download and the server to start. You'll see:
+```
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://127.0.0.1:8000
+```
+
+**Keep this terminal open!**
+
+## Step 3: Run the Client
+
+Open a **new terminal** and run:
+
+```bash
+./run_demo.sh
+```
+
+You should see the client:
+1. Connect to the MCP weather server
+2. Ask about weather in Paris
+3. The LLM calls the `get_weather` tool
+4. Returns a natural language response with the weather data
+
+## Example Output
+
+```
+Connected to MCP server. Available tools:
+  - get_weather: Get the current weather for a specific city.
+
+User: What's the weather like in Paris?
+
+Assistant wants to call 1 tool(s):
+
+  Calling tool: get_weather
+  Arguments: {'city': 'Paris'}
+  Result: {"city": "Paris", "temperature": 15, ...}
+```
+
+## Authors
+
+- Claude Code
+- Nicolas Massé
diff --git a/client/mcp_client.py b/client/mcp_client.py
new file mode 100755
index 0000000..c42143e
--- /dev/null
+++ b/client/mcp_client.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+MCP Client - Integrates vLLM with MCP weather server
+"""
+
+import asyncio
+import json
+import os
+from typing import Optional
+from contextlib import asynccontextmanager
+
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+from openai import OpenAI
+
+
+class MCPWeatherClient:
+    """Client that connects vLLM to MCP weather server"""
+
+    def __init__(self, vllm_url: str = "http://127.0.0.1:8000/v1", model: str = "qwen"):
+        self.vllm_url = vllm_url
+        self.model = model
+        self.openai_client = OpenAI(
+            api_key="EMPTY",  # vLLM doesn't need API key
+            base_url=vllm_url
+        )
+        self.mcp_session: Optional[ClientSession] = None
+        self.available_tools = []
+
+    @asynccontextmanager
+    async def connect_to_mcp(self, server_script_path: str):
+        """Connect to MCP server via stdio"""
+        server_params = StdioServerParameters(
+            command="python",
+            args=[server_script_path],
+            env=None
+        )
+
+        async with stdio_client(server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                self.mcp_session = session
+
+                # List available tools
+                tools_list = await session.list_tools()
+                print(f"\nConnected to MCP server. Available tools:")
+                for tool in tools_list.tools:
+                    print(f"  - {tool.name}: {tool.description}")
+                    self.available_tools.append(tool)
+
+                yield session
+
+    def mcp_tools_to_openai_format(self):
+        """Convert MCP tools to OpenAI function calling format"""
+        openai_tools = []
+
+        for tool in self.available_tools:
+            # Convert MCP tool schema to OpenAI format
+            tool_def = {
+                "type": "function",
+                "function": {
+                    "name": tool.name,
+                    "description": tool.description or "",
+                    "parameters": tool.inputSchema if tool.inputSchema else {
+                        "type": "object",
+                        "properties": {},
+                        "required": []
+                    }
+                }
+            }
+            openai_tools.append(tool_def)
+
+        return openai_tools
+
+    async def call_mcp_tool(self, tool_name: str, arguments: dict):
+        """Call an MCP tool and return the result"""
+        if not self.mcp_session:
+            raise RuntimeError("MCP session not initialized")
+
+        result = await self.mcp_session.call_tool(tool_name, arguments)
+        return result
+
+    async def chat(self, user_message: str, max_iterations: int = 5):
+        """
+        Run a chat interaction with the model, handling tool calls via MCP
+
+        Args:
+            user_message: The user's question/message
+            max_iterations: Maximum number of turns to prevent infinite loops
+        """
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that can get weather information for cities. When asked about weather, use the get_weather tool."
+            },
+            {
+                "role": "user",
+                "content": user_message
+            }
+        ]
+
+        tools = self.mcp_tools_to_openai_format()
+        print(f"\nUser: {user_message}\n")
+
+        for iteration in range(max_iterations):
+            # Call the model
+            response = self.openai_client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                tools=tools if tools else None,
+                tool_choice="auto" if tools else None
+            )
+
+            assistant_message = response.choices[0].message
+
+            # Add assistant response to messages
+            messages.append({
+                "role": "assistant",
+                "content": assistant_message.content,
+                "tool_calls": [
+                    {
+                        "id": tc.id,
+                        "type": tc.type,
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments
+                        }
+                    }
+                    for tc in (assistant_message.tool_calls or [])
+                ]
+            })
+
+            # Check if model wants to call tools
+            if assistant_message.tool_calls:
+                print(f"Assistant wants to call {len(assistant_message.tool_calls)} tool(s):\n")
+
+                # Process each tool call
+                for tool_call in assistant_message.tool_calls:
+                    function_name = tool_call.function.name
+                    function_args = json.loads(tool_call.function.arguments)
+
+                    print(f"  Calling tool: {function_name}")
+                    print(f"  Arguments: {function_args}")
+
+                    # Call the MCP tool
+                    mcp_result = await self.call_mcp_tool(function_name, function_args)
+
+                    # Extract content from MCP result
+                    if mcp_result.content:
+                        # Handle different content types
+                        result_text = ""
+                        for content in mcp_result.content:
+                            if hasattr(content, 'text'):
+                                result_text += content.text
+                            else:
+                                result_text += str(content)
+
+                        print(f"  Result: {result_text}\n")
+
+                        # Add tool result to messages
+                        messages.append({
+                            "role": "tool",
+                            "tool_call_id": tool_call.id,
+                            "content": result_text
+                        })
+                    else:
+                        print(f"  Result: No content returned\n")
+                        messages.append({
+                            "role": "tool",
+                            "tool_call_id": tool_call.id,
+                            "content": "No result"
+                        })
+
+                # Continue the loop to get final response
+                continue
+
+            # No tool calls, this is the final response
+            if assistant_message.content:
+                print(f"Assistant: {assistant_message.content}\n")
+                return assistant_message.content
+            else:
+                print("Assistant: (no response)")
+                return None
+
+        print("\nReached maximum iterations")
+        return None
+
+
+async def main():
+    """Main function to run the client"""
+    import sys
+
+    # Get the MCP server script path
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    project_root = os.path.dirname(script_dir)
+    mcp_server_path = os.path.join(project_root, "mcp-server", "weather_server.py")
+
+    if not os.path.exists(mcp_server_path):
+        print(f"Error: MCP server script not found at {mcp_server_path}")
+        sys.exit(1)
+
+    # Create client
+    client = MCPWeatherClient()
+
+    # Connect to MCP server and run chat
+    async with client.connect_to_mcp(mcp_server_path):
+        # Example question about weather
+        question = "What's the weather like in Paris?"
+
+        if len(sys.argv) > 1:
+            question = " ".join(sys.argv[1:])
+
+        await client.chat(question)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/mcp-server/weather_server.py b/mcp-server/weather_server.py
new file mode 100755
index 0000000..1ddae17
--- /dev/null
+++ b/mcp-server/weather_server.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""
+MCP Weather Server - Provides hardcoded weather data for cities via stdio
+"""
+
+from fastmcp import FastMCP
+
+# Initialize MCP server
+mcp = FastMCP("weather-server")
+
+# Hardcoded weather data
+WEATHER_DATA = {
+    "paris": {
+        "city": "Paris",
+        "temperature": 15,
+        "condition": "Partly cloudy",
+        "humidity": 65,
+        "wind_speed": 12
+    },
+    "london": {
+        "city": "London",
+        "temperature": 12,
+        "condition": "Rainy",
+        "humidity": 80,
+        "wind_speed": 18
+    },
+    "new york": {
+        "city": "New York",
+        "temperature": 20,
+        "condition": "Sunny",
+        "humidity": 55,
+        "wind_speed": 10
+    },
+    "tokyo": {
+        "city": "Tokyo",
+        "temperature": 18,
+        "condition": "Clear",
+        "humidity": 60,
+        "wind_speed": 8
+    }
+}
+
+
+@mcp.tool()
+def get_weather(city: str) -> dict:
+    """
+    Get the current weather for a specific city.
+
+    Args:
+        city: The name of the city to get weather for
+
+    Returns:
+        A dictionary containing weather information including temperature,
+        condition, humidity, and wind speed
+    """
+    city_lower = city.lower()
+
+    if city_lower in WEATHER_DATA:
+        return WEATHER_DATA[city_lower]
+    else:
+        return {
+            "city": city,
+            "error": f"Weather data not available for {city}",
+            "available_cities": list(WEATHER_DATA.keys())
+        }
+
+
+if __name__ == "__main__":
+    # Run the MCP server using stdio transport
+    mcp.run(transport="stdio")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..630e2b0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+vllm>=0.6.0
+openai>=1.0.0
+fastmcp>=0.1.0
+mcp>=1.0.0
+huggingface-hub>=0.20.0
diff --git a/run_demo.sh b/run_demo.sh
new file mode 100755
index 0000000..2fcb778
--- /dev/null
+++ b/run_demo.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Demo script to run the MCP-enabled LLM client
+# Make sure the vLLM server is running first!
+
+cd "$(dirname "$0")"
+
+# Activate virtual environment if it exists
+if [ -d "venv" ]; then
+    source venv/bin/activate
+fi
+
+# Check if vLLM server is running
+if ! curl -s http://127.0.0.1:8000/health > /dev/null 2>&1; then
+    echo "⚠️  Warning: vLLM server doesn't seem to be running on port 8000"
+    echo "Please start it first with: ./start_server.sh"
+    echo ""
+    read -p "Continue anyway? (y/n) " -n 1 -r
+    echo
+    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+        exit 1
+    fi
+fi
+
+# Run the client
+echo "Starting MCP client..."
+echo ""
+
+if [ $# -eq 0 ]; then
+    # Default question
+    python client/mcp_client.py "What's the weather like in Paris?"
+else
+    # Custom question
+    python client/mcp_client.py "$@"
+fi
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..07d985c
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -e
+
+# Vérification de Python
+if ! command -v python3 &> /dev/null; then
+    echo "❌ Python 3 n'est pas installé"
+    exit 1
+fi
+
+PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
+echo "✓ Python version: $PYTHON_VERSION"
+
+# Création de l'environnement virtuel
+if [ ! -d "venv" ]; then
+    echo "📦 Création de l'environnement virtuel..."
+    python3 -m venv venv
+fi
+
+# Activation de l'environnement
+source venv/bin/activate
+
+# Installation des dépendances
+echo "📥 Installation des dépendances..."
+pip install --upgrade pip
+pip install -r requirements.txt
+
+echo ""
+echo "✅ Installation terminée !"
+echo ""
diff --git a/start_server.sh b/start_server.sh
new file mode 100755
index 0000000..d2dbd64
--- /dev/null
+++ b/start_server.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -Eeuo pipefail
+
+# Start vLLM server with Qwen model
+# This script starts the vLLM OpenAI-compatible API server
+
+MODEL="${1:-Qwen/Qwen3-8B}"
+HOST="${2:-127.0.0.1}"
+PORT="${3:-8000}"
+
+echo "Starting vLLM server..."
+echo "Model: $MODEL"
+echo "Host: $HOST"
+echo "Port: $PORT"
+echo "Hugging Face Token: $HF_TOKEN"
+echo ""
+echo "Note: This will download the model if not already cached."
+echo "Press Ctrl+C to stop the server."
+echo ""
+
+mkdir -p ~/.cache/vllm
+
+# see https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm
+podman run --name vllm --rm --device nvidia.com/gpu=all -v ~/.cache/huggingface:/root/.cache/huggingface -v ~/.cache/vllm:/root/.cache/vllm --env "HF_TOKEN=$HF_TOKEN" -p $HOST:$PORT:8000 --ipc=host docker.io/vllm/vllm-openai:latest "$MODEL" --gpu-memory-utilization 0.95 --max-model-len 16384 --served-model-name qwen --enable-auto-tool-choice --tool-call-parser hermes --reasoning-parser deepseek_r1