Files
dss/.dss/doc-sync/generators/mcp_extractor.py
Digital Production Factory 276ed71f31 Initial commit: Clean DSS implementation
Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm

Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)

Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability

Migration completed: $(date)
🤖 Clean migration with full functionality preserved
2025-12-09 18:45:48 -03:00

274 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
MCP Extractor
Extract MCP tool definitions from dss-mcp-server.py.
"""
import ast
import re
from pathlib import Path
from typing import Dict, List, Any, Optional
import logging
from .base_generator import DocGenerator
logger = logging.getLogger(__name__)
class MCPExtractor(DocGenerator):
"""
Extract MCP tool definitions from dss-mcp-server.py.
Extracts:
- Tool names and descriptions
- Input parameters and schemas
- Tool handlers
- Tool categories
"""
def extract(self, source_path: Path) -> Dict[str, Any]:
"""
Extract MCP tools from dss-mcp-server.py.
Args:
source_path: Path to dss-mcp-server.py
Returns:
Dictionary with extracted tool data
"""
logger.info(f"Extracting MCP tools from {source_path}")
with open(source_path, 'r') as f:
source_code = f.read()
# Extract tool definitions (Tool objects)
tools = self._extract_tool_definitions(source_code)
# Extract tool handlers (elif name == "tool_name" blocks)
handlers = self._extract_tool_handlers(source_code)
# Match tools with handlers
for tool in tools:
if tool["name"] in handlers:
tool["handler"] = handlers[tool["name"]]
return {
"source_file": str(source_path),
"tools": tools,
"total_tools": len(tools),
"categories": self._categorize_tools(tools)
}
def _extract_tool_definitions(self, source_code: str) -> List[Dict[str, Any]]:
"""
Extract Tool() object definitions from source code.
Args:
source_code: Full source code
Returns:
List of tool dictionaries
"""
tools = []
# Pattern: Tool(name="...", description="...", inputSchema={...})
tool_pattern = re.compile(
r'Tool\s*\(\s*name\s*=\s*["\']([^"\']+)["\']\s*,\s*description\s*=\s*["\']([^"\']+)["\']',
re.MULTILINE | re.DOTALL
)
for match in tool_pattern.finditer(source_code):
tool_name = match.group(1)
tool_description = match.group(2)
# Extract input schema (complex, best effort)
tool_start = match.start()
tool_block = source_code[tool_start:tool_start + 2000]
# Find inputSchema
input_schema = self._extract_input_schema(tool_block)
tools.append({
"name": tool_name,
"description": tool_description,
"input_schema": input_schema,
"category": self._infer_category(tool_name)
})
return tools
def _extract_input_schema(self, tool_block: str) -> Dict[str, Any]:
"""
Extract inputSchema from Tool() definition.
Args:
tool_block: Code block containing Tool() definition
Returns:
Input schema dictionary (best effort)
"""
# Look for inputSchema={...}
schema_match = re.search(r'inputSchema\s*=\s*\{', tool_block)
if not schema_match:
return {}
# This is complex - just extract parameter names for now
properties_match = re.search(
r'"properties"\s*:\s*\{([^}]+)\}',
tool_block,
re.DOTALL
)
if properties_match:
properties_block = properties_match.group(1)
# Extract parameter names (keys in properties)
param_names = re.findall(r'"([^"]+)"\s*:', properties_block)
return {
"type": "object",
"properties": {name: {"type": "string"} for name in param_names}
}
return {}
def _extract_tool_handlers(self, source_code: str) -> Dict[str, Dict[str, Any]]:
"""
Extract tool handler code from call_tool() function.
Args:
source_code: Full source code
Returns:
Dictionary mapping tool name to handler info
"""
handlers = {}
# Pattern: elif name == "tool_name":
handler_pattern = re.compile(
r'elif\s+name\s*==\s*["\']([^"\']+)["\']:',
re.MULTILINE
)
for match in handler_pattern.finditer(source_code):
tool_name = match.group(1)
line_number = source_code[:match.start()].count('\n') + 1
handlers[tool_name] = {
"line_number": line_number,
"implemented": True
}
return handlers
def _infer_category(self, tool_name: str) -> str:
"""
Infer tool category from name.
Args:
tool_name: Tool name
Returns:
Category string
"""
if "project" in tool_name or "create" in tool_name:
return "project_management"
elif "figma" in tool_name:
return "figma_integration"
elif "token" in tool_name or "extract" in tool_name:
return "token_ingestion"
elif "analyze" in tool_name or "audit" in tool_name:
return "analysis"
elif "storybook" in tool_name:
return "storybook"
elif "devtools" in tool_name or "browser" in tool_name:
return "browser_tools"
elif "context" in tool_name or "resolve" in tool_name or "compiler" in tool_name:
return "context_compiler"
else:
return "utilities"
def _categorize_tools(self, tools: List[Dict[str, Any]]) -> Dict[str, List[str]]:
"""
Group tools by category.
Args:
tools: List of tool dictionaries
Returns:
Dictionary mapping category to tool names
"""
categories = {}
for tool in tools:
category = tool["category"]
if category not in categories:
categories[category] = []
categories[category].append(tool["name"])
return categories
def transform(self, extracted_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Transform extracted MCP data to .knowledge/mcp-tools.json schema.
Args:
extracted_data: Raw extracted tool data
Returns:
Transformed data for knowledge base
"""
# Read existing mcp-tools.json
target_path = self.project_root / ".knowledge" / "mcp-tools.json"
existing = self.read_existing_target(target_path)
if existing:
# Merge: preserve manual sections, update extracted tools
result = existing.copy()
result["tools"] = self._format_tools(extracted_data["tools"])
result["total_tools"] = extracted_data["total_tools"]
result["categories"] = extracted_data["categories"]
result["last_updated"] = self.metadata["generated_at"]
else:
# Create new structure
result = {
"$schema": "dss-knowledge-v1",
"type": "mcp_tools",
"version": "1.0.0",
"last_updated": self.metadata["generated_at"],
"architecture": "MCP-first - All work via MCP tools, no REST endpoints",
"tools": self._format_tools(extracted_data["tools"]),
"total_tools": extracted_data["total_tools"],
"categories": extracted_data["categories"]
}
return result
def _format_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Format tools for knowledge base schema.
Args:
tools: Raw tool data
Returns:
Formatted tool list
"""
formatted = []
for tool in tools:
formatted_tool = {
"name": tool["name"],
"description": tool["description"],
"category": tool["category"],
"parameters": list(tool["input_schema"].get("properties", {}).keys())
}
if "handler" in tool:
formatted_tool["handler_line"] = tool["handler"]["line_number"]
formatted.append(formatted_tool)
return formatted