commit 8b361316cca929dabfcfd0730ba608089ca61f8f Author: Nicolas Massé Date: Mon Jan 5 15:34:25 2026 -0500 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..37696d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.claude +venv diff --git a/README.md b/README.md new file mode 100644 index 0000000..b81da4f --- /dev/null +++ b/README.md @@ -0,0 +1,77 @@ +# AI model at the Edge with MCP support + +This is a Proof of Concept of a generative AI model (LLM) at the edge, with MCP server support. + +The idea is for the client to call the AI model to know the weather in Paris. +The weather data is provided by an MCP server. + +There are three components in this project: + +- the vLLM server, serving the Qwen/Qwen3-8B model. +- the Python client, calling vLLM. +- the MCP server serves over stdio the weather for a city (responses are hardcoded). +- the MCP server has to be declared to the AI model and called by the client when the AI model needs it. + +## Prerequisites + +- Python 3.8 or higher +- 8GB+ RAM (for the model) +- Internet connection (for first-time model download) + +## Step 1: Setup + +```bash +./setup.sh +``` + +Wait for all dependencies to install. + +## Step 2: Start the vLLM Server + +Open a terminal and run: + +```bash +./start_server.sh +``` + +Wait for the model to download and the server to start. You'll see: +``` +INFO: Application startup complete. +INFO: Uvicorn running on http://127.0.0.1:8000 +``` + +**Keep this terminal open!** + +## Step 3: Run the Client + +Open a **new terminal** and run: + +```bash +./run_demo.sh +``` + +You should see the client: +1. Connect to the MCP weather server +2. Ask about weather in Paris +3. The LLM calls the `get_weather` tool +4. Returns a natural language response with the weather data + +## Example Output + +``` +Connected to MCP server. Available tools: + - get_weather: Get the current weather for a specific city. + +User: What's the weather like in Paris? + +Assistant wants to call 1 tool(s): + + Calling tool: get_weather + Arguments: {'city': 'Paris'} + Result: {"city": "Paris", "temperature": 15, ...} +``` + +## Authors + +- Claude Code +- Nicolas Massé diff --git a/client/mcp_client.py b/client/mcp_client.py new file mode 100755 index 0000000..c42143e --- /dev/null +++ b/client/mcp_client.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +MCP Client - Integrates vLLM with MCP weather server +""" + +import asyncio +import json +import os +from typing import Optional +from contextlib import asynccontextmanager + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +from openai import OpenAI + + +class MCPWeatherClient: + """Client that connects vLLM to MCP weather server""" + + def __init__(self, vllm_url: str = "http://127.0.0.1:8000/v1", model: str = "qwen"): + self.vllm_url = vllm_url + self.model = model + self.openai_client = OpenAI( + api_key="EMPTY", # vLLM doesn't need API key + base_url=vllm_url + ) + self.mcp_session: Optional[ClientSession] = None + self.available_tools = [] + + @asynccontextmanager + async def connect_to_mcp(self, server_script_path: str): + """Connect to MCP server via stdio""" + server_params = StdioServerParameters( + command="python", + args=[server_script_path], + env=None + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + self.mcp_session = session + + # List available tools + tools_list = await session.list_tools() + print(f"\nConnected to MCP server. Available tools:") + for tool in tools_list.tools: + print(f" - {tool.name}: {tool.description}") + self.available_tools.append(tool) + + yield session + + def mcp_tools_to_openai_format(self): + """Convert MCP tools to OpenAI function calling format""" + openai_tools = [] + + for tool in self.available_tools: + # Convert MCP tool schema to OpenAI format + tool_def = { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description or "", + "parameters": tool.inputSchema if tool.inputSchema else { + "type": "object", + "properties": {}, + "required": [] + } + } + } + openai_tools.append(tool_def) + + return openai_tools + + async def call_mcp_tool(self, tool_name: str, arguments: dict): + """Call an MCP tool and return the result""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + result = await self.mcp_session.call_tool(tool_name, arguments) + return result + + async def chat(self, user_message: str, max_iterations: int = 5): + """ + Run a chat interaction with the model, handling tool calls via MCP + + Args: + user_message: The user's question/message + max_iterations: Maximum number of turns to prevent infinite loops + """ + messages = [ + { + "role": "system", + "content": "You are a helpful assistant that can get weather information for cities. When asked about weather, use the get_weather tool." + }, + { + "role": "user", + "content": user_message + } + ] + + tools = self.mcp_tools_to_openai_format() + print(f"\nUser: {user_message}\n") + + for iteration in range(max_iterations): + # Call the model + response = self.openai_client.chat.completions.create( + model=self.model, + messages=messages, + tools=tools if tools else None, + tool_choice="auto" if tools else None + ) + + assistant_message = response.choices[0].message + + # Add assistant response to messages + messages.append({ + "role": "assistant", + "content": assistant_message.content, + "tool_calls": [ + { + "id": tc.id, + "type": tc.type, + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments + } + } + for tc in (assistant_message.tool_calls or []) + ] + }) + + # Check if model wants to call tools + if assistant_message.tool_calls: + print(f"Assistant wants to call {len(assistant_message.tool_calls)} tool(s):\n") + + # Process each tool call + for tool_call in assistant_message.tool_calls: + function_name = tool_call.function.name + function_args = json.loads(tool_call.function.arguments) + + print(f" Calling tool: {function_name}") + print(f" Arguments: {function_args}") + + # Call the MCP tool + mcp_result = await self.call_mcp_tool(function_name, function_args) + + # Extract content from MCP result + if mcp_result.content: + # Handle different content types + result_text = "" + for content in mcp_result.content: + if hasattr(content, 'text'): + result_text += content.text + else: + result_text += str(content) + + print(f" Result: {result_text}\n") + + # Add tool result to messages + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": result_text + }) + else: + print(f" Result: No content returned\n") + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": "No result" + }) + + # Continue the loop to get final response + continue + + # No tool calls, this is the final response + if assistant_message.content: + print(f"Assistant: {assistant_message.content}\n") + return assistant_message.content + else: + print("Assistant: (no response)") + return None + + print("\nReached maximum iterations") + return None + + +async def main(): + """Main function to run the client""" + import sys + + # Get the MCP server script path + script_dir = os.path.dirname(os.path.abspath(__file__)) + project_root = os.path.dirname(script_dir) + mcp_server_path = os.path.join(project_root, "mcp-server", "weather_server.py") + + if not os.path.exists(mcp_server_path): + print(f"Error: MCP server script not found at {mcp_server_path}") + sys.exit(1) + + # Create client + client = MCPWeatherClient() + + # Connect to MCP server and run chat + async with client.connect_to_mcp(mcp_server_path): + # Example question about weather + question = "What's the weather like in Paris?" + + if len(sys.argv) > 1: + question = " ".join(sys.argv[1:]) + + await client.chat(question) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/mcp-server/weather_server.py b/mcp-server/weather_server.py new file mode 100755 index 0000000..1ddae17 --- /dev/null +++ b/mcp-server/weather_server.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +MCP Weather Server - Provides hardcoded weather data for cities via stdio +""" + +from fastmcp import FastMCP + +# Initialize MCP server +mcp = FastMCP("weather-server") + +# Hardcoded weather data +WEATHER_DATA = { + "paris": { + "city": "Paris", + "temperature": 15, + "condition": "Partly cloudy", + "humidity": 65, + "wind_speed": 12 + }, + "london": { + "city": "London", + "temperature": 12, + "condition": "Rainy", + "humidity": 80, + "wind_speed": 18 + }, + "new york": { + "city": "New York", + "temperature": 20, + "condition": "Sunny", + "humidity": 55, + "wind_speed": 10 + }, + "tokyo": { + "city": "Tokyo", + "temperature": 18, + "condition": "Clear", + "humidity": 60, + "wind_speed": 8 + } +} + + +@mcp.tool() +def get_weather(city: str) -> dict: + """ + Get the current weather for a specific city. + + Args: + city: The name of the city to get weather for + + Returns: + A dictionary containing weather information including temperature, + condition, humidity, and wind speed + """ + city_lower = city.lower() + + if city_lower in WEATHER_DATA: + return WEATHER_DATA[city_lower] + else: + return { + "city": city, + "error": f"Weather data not available for {city}", + "available_cities": list(WEATHER_DATA.keys()) + } + + +if __name__ == "__main__": + # Run the MCP server using stdio transport + mcp.run(transport="stdio") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..630e2b0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +vllm>=0.6.0 +openai>=1.0.0 +fastmcp>=0.1.0 +mcp>=1.0.0 +huggingface-hub>=0.20.0 diff --git a/run_demo.sh b/run_demo.sh new file mode 100755 index 0000000..2fcb778 --- /dev/null +++ b/run_demo.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Demo script to run the MCP-enabled LLM client +# Make sure the vLLM server is running first! + +cd "$(dirname "$0")" + +# Activate virtual environment if it exists +if [ -d "venv" ]; then + source venv/bin/activate +fi + +# Check if vLLM server is running +if ! curl -s http://127.0.0.1:8000/health > /dev/null 2>&1; then + echo "⚠️ Warning: vLLM server doesn't seem to be running on port 8000" + echo "Please start it first with: ./start_server.sh" + echo "" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +# Run the client +echo "Starting MCP client..." +echo "" + +if [ $# -eq 0 ]; then + # Default question + python client/mcp_client.py "What's the weather like in Paris?" +else + # Custom question + python client/mcp_client.py "$@" +fi diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..07d985c --- /dev/null +++ b/setup.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e + +# Vérification de Python +if ! command -v python3 &> /dev/null; then + echo "❌ Python 3 n'est pas installé" + exit 1 +fi + +PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') +echo "✓ Python version: $PYTHON_VERSION" + +# Création de l'environnement virtuel +if [ ! -d "venv" ]; then + echo "📦 Création de l'environnement virtuel..." + python3 -m venv venv +fi + +# Activation de l'environnement +source venv/bin/activate + +# Installation des dépendances +echo "📥 Installation des dépendances..." +pip install --upgrade pip +pip install -r requirements.txt + +echo "" +echo "✅ Installation terminée !" +echo "" diff --git a/start_server.sh b/start_server.sh new file mode 100755 index 0000000..d2dbd64 --- /dev/null +++ b/start_server.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -Eeuo pipefail + +# Start vLLM server with Qwen model +# This script starts the vLLM OpenAI-compatible API server + +MODEL="${1:-Qwen/Qwen3-8B}" +HOST="${2:-127.0.0.1}" +PORT="${3:-8000}" + +echo "Starting vLLM server..." +echo "Model: $MODEL" +echo "Host: $HOST" +echo "Port: $PORT" +echo "Hugging Face Token: $HF_TOKEN" +echo "" +echo "Note: This will download the model if not already cached." +echo "Press Ctrl+C to stop the server." +echo "" + +mkdir -p ~/.cache/vllm + +# see https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm +podman run --name vllm --rm --device nvidia.com/gpu=all -v ~/.cache/huggingface:/root/.cache/huggingface -v ~/.cache/vllm:/root/.cache/vllm --env "HF_TOKEN=$HF_TOKEN" -p $HOST:$PORT:8000 --ipc=host docker.io/vllm/vllm-openai:latest "$MODEL" --gpu-memory-utilization 0.95 --max-model-len 16384 --served-model-name qwen --enable-auto-tool-choice --tool-call-parser hermes --reasoning-parser deepseek_r1