Files
dss/tools/api/ingestion_parser.py
Digital Production Factory 276ed71f31 Initial commit: Clean DSS implementation
Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm

Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)

Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability

Migration completed: $(date)
🤖 Clean migration with full functionality preserved
2025-12-09 18:45:48 -03:00

418 lines
15 KiB
Python

"""
Natural Language Parser for Design System Ingestion.
This module parses natural language prompts to understand:
- Intent (ingest, search, compare, etc.)
- Design system names
- Alternative sources (Figma URLs, images, etc.)
- Configuration options
"""
import re
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any, Tuple
from enum import Enum
from design_system_registry import (
find_design_system,
search_design_systems,
get_alternative_ingestion_options,
DesignSystemInfo,
)
class IngestionIntent(Enum):
"""Types of user intents for design system operations."""
INGEST = "ingest" # Add/import a design system
SEARCH = "search" # Search for design systems
LIST = "list" # List available/known systems
INFO = "info" # Get info about a specific system
COMPARE = "compare" # Compare design systems
CONFIGURE = "configure" # Configure ingestion settings
HELP = "help" # Help with ingestion
UNKNOWN = "unknown"
class SourceType(Enum):
"""Types of sources detected in prompts."""
DESIGN_SYSTEM_NAME = "design_system_name"
NPM_PACKAGE = "npm_package"
FIGMA_URL = "figma_url"
GITHUB_URL = "github_url"
CSS_URL = "css_url"
IMAGE_URL = "image_url"
TEXT_DESCRIPTION = "text_description"
@dataclass
class ParsedSource:
"""A detected source from the prompt."""
source_type: SourceType
value: str
confidence: float = 1.0 # 0.0 to 1.0
matched_system: Optional[DesignSystemInfo] = None
@dataclass
class ParsedIngestionPrompt:
"""Result of parsing an ingestion prompt."""
original_prompt: str
intent: IngestionIntent
confidence: float = 1.0
sources: List[ParsedSource] = field(default_factory=list)
options: Dict[str, Any] = field(default_factory=dict)
suggestions: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for API responses."""
return {
"original_prompt": self.original_prompt,
"intent": self.intent.value,
"confidence": self.confidence,
"sources": [
{
"type": s.source_type.value,
"value": s.value,
"confidence": s.confidence,
"matched_system": s.matched_system.to_dict() if s.matched_system else None
}
for s in self.sources
],
"options": self.options,
"suggestions": self.suggestions,
}
# Intent detection patterns
INTENT_PATTERNS = {
IngestionIntent.INGEST: [
r'\b(ingest|import|add|use|install|load|get|fetch|download|setup|init|initialize)\b',
r'\b(i want|i need|give me|let\'s use|can you add|please add)\b',
r'\b(integrate|incorporate|bring in|pull in)\b',
],
IngestionIntent.SEARCH: [
r'\b(search|find|look for|looking for|discover|explore)\b',
r'\b(what.*available|show me.*options|any.*like)\b',
],
IngestionIntent.LIST: [
r'\b(list|show|display|what|which)\b.*(design systems?|available|supported|known)\b',
r'\b(what do you (know|have|support))\b',
],
IngestionIntent.INFO: [
r'\b(info|information|details|about|tell me about|what is)\b',
r'\b(how does|what\'s|describe)\b',
],
IngestionIntent.COMPARE: [
r'\b(compare|versus|vs|difference|between|or)\b.*\b(and|vs|versus|or)\b',
],
IngestionIntent.CONFIGURE: [
r'\b(configure|config|settings?|options?|customize)\b',
],
IngestionIntent.HELP: [
r'\b(help|how to|how do i|what can|guide|tutorial)\b',
],
}
# URL patterns
URL_PATTERNS = {
SourceType.FIGMA_URL: r'(https?://(?:www\.)?figma\.com/(?:file|design|community/file)/[^\s]+)',
SourceType.GITHUB_URL: r'(https?://(?:www\.)?github\.com/[^\s]+)',
SourceType.NPM_PACKAGE: r'(?:npm:)?(@?[a-z0-9][\w\-\.]*(?:/[a-z0-9][\w\-\.]*)?)',
SourceType.CSS_URL: r'(https?://[^\s]+\.(?:css|scss|sass)(?:\?[^\s]*)?)',
SourceType.IMAGE_URL: r'(https?://[^\s]+\.(?:png|jpg|jpeg|gif|webp|svg)(?:\?[^\s]*)?)',
}
def detect_intent(prompt: str) -> Tuple[IngestionIntent, float]:
"""
Detect the user's intent from their prompt.
Returns (intent, confidence).
"""
prompt_lower = prompt.lower()
# Score each intent
intent_scores = {}
for intent, patterns in INTENT_PATTERNS.items():
score = 0
for pattern in patterns:
matches = re.findall(pattern, prompt_lower)
score += len(matches)
intent_scores[intent] = score
# Find best match
if not any(intent_scores.values()):
# Default to INGEST if prompt contains a design system name
return IngestionIntent.INGEST, 0.5
best_intent = max(intent_scores, key=intent_scores.get)
max_score = intent_scores[best_intent]
# Calculate confidence based on match strength
confidence = min(1.0, max_score * 0.3 + 0.4)
return best_intent, confidence
def extract_urls(prompt: str) -> List[ParsedSource]:
"""Extract URLs from the prompt."""
sources = []
for source_type, pattern in URL_PATTERNS.items():
if source_type == SourceType.NPM_PACKAGE:
continue # Handle separately
matches = re.findall(pattern, prompt, re.IGNORECASE)
for match in matches:
sources.append(ParsedSource(
source_type=source_type,
value=match,
confidence=0.95
))
return sources
def extract_design_systems(prompt: str) -> List[ParsedSource]:
"""
Extract design system names from the prompt.
Uses the registry to match known systems.
"""
sources = []
# Remove URLs first to avoid false positives
cleaned_prompt = re.sub(r'https?://[^\s]+', '', prompt)
# Remove common noise words
noise_words = ['the', 'a', 'an', 'from', 'to', 'with', 'for', 'and', 'or', 'in', 'on', 'at']
words = cleaned_prompt.lower().split()
# Try different n-grams (1-3 words)
for n in range(3, 0, -1):
for i in range(len(words) - n + 1):
phrase = ' '.join(words[i:i+n])
# Skip if mostly noise words
if all(w in noise_words for w in phrase.split()):
continue
# Try to find matching design system
system = find_design_system(phrase)
if system:
# Check if we already found this system
if not any(s.matched_system and s.matched_system.id == system.id for s in sources):
sources.append(ParsedSource(
source_type=SourceType.DESIGN_SYSTEM_NAME,
value=phrase,
confidence=0.9 if n > 1 else 0.7,
matched_system=system
))
return sources
def extract_npm_packages(prompt: str) -> List[ParsedSource]:
"""Extract explicit npm package references."""
sources = []
# Match @scope/package or package-name patterns
# Only if they look like npm packages (not URLs or common words)
npm_pattern = r'(?:npm[:\s]+)?(@[a-z0-9][\w\-\.]+/[\w\-\.]+|[a-z][\w\-\.]*(?:/[\w\-\.]+)?)'
matches = re.findall(npm_pattern, prompt.lower())
for match in matches:
# Filter out common words that might match
if match in ['design', 'system', 'use', 'the', 'and', 'for', 'from']:
continue
# Check if it looks like an npm package (has @, /, or -)
if '@' in match or '/' in match or '-' in match:
sources.append(ParsedSource(
source_type=SourceType.NPM_PACKAGE,
value=match,
confidence=0.8
))
return sources
def generate_suggestions(parsed: ParsedIngestionPrompt) -> List[str]:
"""Generate helpful suggestions based on parsed prompt."""
suggestions = []
if parsed.intent == IngestionIntent.INGEST:
if not parsed.sources:
suggestions.append("No design system detected. Try specifying a name like 'heroui', 'shadcn', or 'mui'")
suggestions.append("You can also provide a Figma URL, npm package, or GitHub repository")
else:
for source in parsed.sources:
if source.matched_system:
system = source.matched_system
suggestions.append(f"Found '{system.name}' - {system.description}")
if system.npm_packages:
suggestions.append(f"Will install: {', '.join(system.npm_packages)}")
if system.figma_community_url:
suggestions.append(f"Figma kit available: {system.figma_community_url}")
elif parsed.intent == IngestionIntent.SEARCH:
suggestions.append("I can search npm registry for design systems")
suggestions.append("Try being more specific, like 'search for material design components'")
elif parsed.intent == IngestionIntent.HELP:
suggestions.append("I can ingest design systems from: npm packages, Figma, GitHub, CSS files, or images")
suggestions.append("Try: 'add heroui' or 'ingest from figma.com/file/...'")
return suggestions
def parse_ingestion_prompt(prompt: str) -> ParsedIngestionPrompt:
"""
Parse a natural language prompt for design system ingestion.
Examples:
"add heroui" -> Detects HeroUI design system
"ingest material ui for our project" -> Detects MUI
"import from figma.com/file/abc123" -> Extracts Figma URL
"use @chakra-ui/react" -> Detects npm package
"what design systems do you support?" -> LIST intent
"""
# Detect intent
intent, intent_confidence = detect_intent(prompt)
# Initialize result
result = ParsedIngestionPrompt(
original_prompt=prompt,
intent=intent,
confidence=intent_confidence,
)
# Extract sources
result.sources.extend(extract_urls(prompt))
result.sources.extend(extract_design_systems(prompt))
result.sources.extend(extract_npm_packages(prompt))
# Remove duplicates (prefer higher confidence)
seen_values = {}
unique_sources = []
for source in sorted(result.sources, key=lambda s: s.confidence, reverse=True):
key = (source.source_type, source.value.lower())
if key not in seen_values:
seen_values[key] = True
unique_sources.append(source)
result.sources = unique_sources
# Generate suggestions
result.suggestions = generate_suggestions(result)
# Adjust confidence based on source quality
if result.sources:
max_source_confidence = max(s.confidence for s in result.sources)
result.confidence = (intent_confidence + max_source_confidence) / 2
return result
def parse_and_suggest(prompt: str) -> Dict[str, Any]:
"""
Parse a prompt and provide suggestions for next steps.
This is the main entry point for the ingestion parser.
"""
parsed = parse_ingestion_prompt(prompt)
response = parsed.to_dict()
# Add next steps based on what was found
next_steps = []
if parsed.intent == IngestionIntent.INGEST:
if parsed.sources:
# Found something to ingest
for source in parsed.sources:
if source.source_type == SourceType.DESIGN_SYSTEM_NAME and source.matched_system:
system = source.matched_system
next_steps.append({
"action": "confirm_ingestion",
"system": system.to_dict(),
"message": f"Ready to ingest '{system.name}'. Confirm to proceed?"
})
elif source.source_type == SourceType.FIGMA_URL:
next_steps.append({
"action": "ingest_figma",
"url": source.value,
"message": "Figma URL detected. Ready to extract design tokens?"
})
elif source.source_type == SourceType.NPM_PACKAGE:
next_steps.append({
"action": "search_npm",
"package": source.value,
"message": f"Will search npm for '{source.value}'"
})
else:
# Nothing found - offer alternatives
alternatives = get_alternative_ingestion_options()
next_steps.append({
"action": "request_source",
"alternatives": alternatives["alternatives"],
"message": "No design system detected. Please provide more details:"
})
elif parsed.intent == IngestionIntent.SEARCH:
# Extract search terms
search_terms = re.sub(r'\b(search|find|look for)\b', '', prompt.lower()).strip()
if search_terms:
matches = search_design_systems(search_terms)
if matches:
next_steps.append({
"action": "show_search_results",
"results": [m.to_dict() for m in matches],
"message": f"Found {len(matches)} matching design systems"
})
else:
next_steps.append({
"action": "search_npm",
"query": search_terms,
"message": f"No built-in match. Will search npm for '{search_terms}'"
})
elif parsed.intent == IngestionIntent.LIST:
from design_system_registry import get_all_systems
all_systems = get_all_systems()
next_steps.append({
"action": "show_all_systems",
"count": len(all_systems),
"categories": list(set(s.category for s in all_systems)),
"message": f"I know about {len(all_systems)} design systems"
})
elif parsed.intent == IngestionIntent.INFO:
for source in parsed.sources:
if source.matched_system:
system = source.matched_system
alternatives = get_alternative_ingestion_options(system)
next_steps.append({
"action": "show_info",
"system": system.to_dict(),
"alternatives": alternatives,
"message": f"Information about {system.name}"
})
response["next_steps"] = next_steps
return response
# Convenience function for quick parsing
def quick_parse(prompt: str) -> Tuple[Optional[DesignSystemInfo], IngestionIntent, float]:
"""
Quick parse that returns the most likely design system and intent.
Useful for simple lookups.
"""
parsed = parse_ingestion_prompt(prompt)
# Find the best design system match
best_system = None
for source in parsed.sources:
if source.matched_system:
best_system = source.matched_system
break
return best_system, parsed.intent, parsed.confidence