#!/usr/bin/env python3 """ MCP Extractor Extract MCP tool definitions from dss-mcp-server.py. """ import ast import re from pathlib import Path from typing import Dict, List, Any, Optional import logging from .base_generator import DocGenerator logger = logging.getLogger(__name__) class MCPExtractor(DocGenerator): """ Extract MCP tool definitions from dss-mcp-server.py. Extracts: - Tool names and descriptions - Input parameters and schemas - Tool handlers - Tool categories """ def extract(self, source_path: Path) -> Dict[str, Any]: """ Extract MCP tools from dss-mcp-server.py. Args: source_path: Path to dss-mcp-server.py Returns: Dictionary with extracted tool data """ logger.info(f"Extracting MCP tools from {source_path}") with open(source_path, 'r') as f: source_code = f.read() # Extract tool definitions (Tool objects) tools = self._extract_tool_definitions(source_code) # Extract tool handlers (elif name == "tool_name" blocks) handlers = self._extract_tool_handlers(source_code) # Match tools with handlers for tool in tools: if tool["name"] in handlers: tool["handler"] = handlers[tool["name"]] return { "source_file": str(source_path), "tools": tools, "total_tools": len(tools), "categories": self._categorize_tools(tools) } def _extract_tool_definitions(self, source_code: str) -> List[Dict[str, Any]]: """ Extract Tool() object definitions from source code. Args: source_code: Full source code Returns: List of tool dictionaries """ tools = [] # Pattern: Tool(name="...", description="...", inputSchema={...}) tool_pattern = re.compile( r'Tool\s*\(\s*name\s*=\s*["\']([^"\']+)["\']\s*,\s*description\s*=\s*["\']([^"\']+)["\']', re.MULTILINE | re.DOTALL ) for match in tool_pattern.finditer(source_code): tool_name = match.group(1) tool_description = match.group(2) # Extract input schema (complex, best effort) tool_start = match.start() tool_block = source_code[tool_start:tool_start + 2000] # Find inputSchema input_schema = self._extract_input_schema(tool_block) tools.append({ "name": tool_name, "description": tool_description, "input_schema": input_schema, "category": self._infer_category(tool_name) }) return tools def _extract_input_schema(self, tool_block: str) -> Dict[str, Any]: """ Extract inputSchema from Tool() definition. Args: tool_block: Code block containing Tool() definition Returns: Input schema dictionary (best effort) """ # Look for inputSchema={...} schema_match = re.search(r'inputSchema\s*=\s*\{', tool_block) if not schema_match: return {} # This is complex - just extract parameter names for now properties_match = re.search( r'"properties"\s*:\s*\{([^}]+)\}', tool_block, re.DOTALL ) if properties_match: properties_block = properties_match.group(1) # Extract parameter names (keys in properties) param_names = re.findall(r'"([^"]+)"\s*:', properties_block) return { "type": "object", "properties": {name: {"type": "string"} for name in param_names} } return {} def _extract_tool_handlers(self, source_code: str) -> Dict[str, Dict[str, Any]]: """ Extract tool handler code from call_tool() function. Args: source_code: Full source code Returns: Dictionary mapping tool name to handler info """ handlers = {} # Pattern: elif name == "tool_name": handler_pattern = re.compile( r'elif\s+name\s*==\s*["\']([^"\']+)["\']:', re.MULTILINE ) for match in handler_pattern.finditer(source_code): tool_name = match.group(1) line_number = source_code[:match.start()].count('\n') + 1 handlers[tool_name] = { "line_number": line_number, "implemented": True } return handlers def _infer_category(self, tool_name: str) -> str: """ Infer tool category from name. Args: tool_name: Tool name Returns: Category string """ if "project" in tool_name or "create" in tool_name: return "project_management" elif "figma" in tool_name: return "figma_integration" elif "token" in tool_name or "extract" in tool_name: return "token_ingestion" elif "analyze" in tool_name or "audit" in tool_name: return "analysis" elif "storybook" in tool_name: return "storybook" elif "devtools" in tool_name or "browser" in tool_name: return "browser_tools" elif "context" in tool_name or "resolve" in tool_name or "compiler" in tool_name: return "context_compiler" else: return "utilities" def _categorize_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, List[str]]: """ Group tools by category. Args: tools: List of tool dictionaries Returns: Dictionary mapping category to tool names """ categories = {} for tool in tools: category = tool["category"] if category not in categories: categories[category] = [] categories[category].append(tool["name"]) return categories def transform(self, extracted_data: Dict[str, Any]) -> Dict[str, Any]: """ Transform extracted MCP data to .knowledge/mcp-tools.json schema. Args: extracted_data: Raw extracted tool data Returns: Transformed data for knowledge base """ # Read existing mcp-tools.json target_path = self.project_root / ".knowledge" / "mcp-tools.json" existing = self.read_existing_target(target_path) if existing: # Merge: preserve manual sections, update extracted tools result = existing.copy() result["tools"] = self._format_tools(extracted_data["tools"]) result["total_tools"] = extracted_data["total_tools"] result["categories"] = extracted_data["categories"] result["last_updated"] = self.metadata["generated_at"] else: # Create new structure result = { "$schema": "dss-knowledge-v1", "type": "mcp_tools", "version": "1.0.0", "last_updated": self.metadata["generated_at"], "architecture": "MCP-first - All work via MCP tools, no REST endpoints", "tools": self._format_tools(extracted_data["tools"]), "total_tools": extracted_data["total_tools"], "categories": extracted_data["categories"] } return result def _format_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Format tools for knowledge base schema. Args: tools: Raw tool data Returns: Formatted tool list """ formatted = [] for tool in tools: formatted_tool = { "name": tool["name"], "description": tool["description"], "category": tool["category"], "parameters": list(tool["input_schema"].get("properties", {}).keys()) } if "handler" in tool: formatted_tool["handler_line"] = tool["handler"]["line_number"] formatted.append(formatted_tool) return formatted