""" Natural Language Parser for Design System Ingestion. This module parses natural language prompts to understand: - Intent (ingest, search, compare, etc.) - Design system names - Alternative sources (Figma URLs, images, etc.) - Configuration options """ import re from dataclasses import dataclass, field from typing import List, Optional, Dict, Any, Tuple from enum import Enum from design_system_registry import ( find_design_system, search_design_systems, get_alternative_ingestion_options, DesignSystemInfo, ) class IngestionIntent(Enum): """Types of user intents for design system operations.""" INGEST = "ingest" # Add/import a design system SEARCH = "search" # Search for design systems LIST = "list" # List available/known systems INFO = "info" # Get info about a specific system COMPARE = "compare" # Compare design systems CONFIGURE = "configure" # Configure ingestion settings HELP = "help" # Help with ingestion UNKNOWN = "unknown" class SourceType(Enum): """Types of sources detected in prompts.""" DESIGN_SYSTEM_NAME = "design_system_name" NPM_PACKAGE = "npm_package" FIGMA_URL = "figma_url" GITHUB_URL = "github_url" CSS_URL = "css_url" IMAGE_URL = "image_url" TEXT_DESCRIPTION = "text_description" @dataclass class ParsedSource: """A detected source from the prompt.""" source_type: SourceType value: str confidence: float = 1.0 # 0.0 to 1.0 matched_system: Optional[DesignSystemInfo] = None @dataclass class ParsedIngestionPrompt: """Result of parsing an ingestion prompt.""" original_prompt: str intent: IngestionIntent confidence: float = 1.0 sources: List[ParsedSource] = field(default_factory=list) options: Dict[str, Any] = field(default_factory=dict) suggestions: List[str] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for API responses.""" return { "original_prompt": self.original_prompt, "intent": self.intent.value, "confidence": self.confidence, "sources": [ { "type": s.source_type.value, "value": s.value, "confidence": s.confidence, "matched_system": s.matched_system.to_dict() if s.matched_system else None } for s in self.sources ], "options": self.options, "suggestions": self.suggestions, } # Intent detection patterns INTENT_PATTERNS = { IngestionIntent.INGEST: [ r'\b(ingest|import|add|use|install|load|get|fetch|download|setup|init|initialize)\b', r'\b(i want|i need|give me|let\'s use|can you add|please add)\b', r'\b(integrate|incorporate|bring in|pull in)\b', ], IngestionIntent.SEARCH: [ r'\b(search|find|look for|looking for|discover|explore)\b', r'\b(what.*available|show me.*options|any.*like)\b', ], IngestionIntent.LIST: [ r'\b(list|show|display|what|which)\b.*(design systems?|available|supported|known)\b', r'\b(what do you (know|have|support))\b', ], IngestionIntent.INFO: [ r'\b(info|information|details|about|tell me about|what is)\b', r'\b(how does|what\'s|describe)\b', ], IngestionIntent.COMPARE: [ r'\b(compare|versus|vs|difference|between|or)\b.*\b(and|vs|versus|or)\b', ], IngestionIntent.CONFIGURE: [ r'\b(configure|config|settings?|options?|customize)\b', ], IngestionIntent.HELP: [ r'\b(help|how to|how do i|what can|guide|tutorial)\b', ], } # URL patterns URL_PATTERNS = { SourceType.FIGMA_URL: r'(https?://(?:www\.)?figma\.com/(?:file|design|community/file)/[^\s]+)', SourceType.GITHUB_URL: r'(https?://(?:www\.)?github\.com/[^\s]+)', SourceType.NPM_PACKAGE: r'(?:npm:)?(@?[a-z0-9][\w\-\.]*(?:/[a-z0-9][\w\-\.]*)?)', SourceType.CSS_URL: r'(https?://[^\s]+\.(?:css|scss|sass)(?:\?[^\s]*)?)', SourceType.IMAGE_URL: r'(https?://[^\s]+\.(?:png|jpg|jpeg|gif|webp|svg)(?:\?[^\s]*)?)', } def detect_intent(prompt: str) -> Tuple[IngestionIntent, float]: """ Detect the user's intent from their prompt. Returns (intent, confidence). """ prompt_lower = prompt.lower() # Score each intent intent_scores = {} for intent, patterns in INTENT_PATTERNS.items(): score = 0 for pattern in patterns: matches = re.findall(pattern, prompt_lower) score += len(matches) intent_scores[intent] = score # Find best match if not any(intent_scores.values()): # Default to INGEST if prompt contains a design system name return IngestionIntent.INGEST, 0.5 best_intent = max(intent_scores, key=intent_scores.get) max_score = intent_scores[best_intent] # Calculate confidence based on match strength confidence = min(1.0, max_score * 0.3 + 0.4) return best_intent, confidence def extract_urls(prompt: str) -> List[ParsedSource]: """Extract URLs from the prompt.""" sources = [] for source_type, pattern in URL_PATTERNS.items(): if source_type == SourceType.NPM_PACKAGE: continue # Handle separately matches = re.findall(pattern, prompt, re.IGNORECASE) for match in matches: sources.append(ParsedSource( source_type=source_type, value=match, confidence=0.95 )) return sources def extract_design_systems(prompt: str) -> List[ParsedSource]: """ Extract design system names from the prompt. Uses the registry to match known systems. """ sources = [] # Remove URLs first to avoid false positives cleaned_prompt = re.sub(r'https?://[^\s]+', '', prompt) # Remove common noise words noise_words = ['the', 'a', 'an', 'from', 'to', 'with', 'for', 'and', 'or', 'in', 'on', 'at'] words = cleaned_prompt.lower().split() # Try different n-grams (1-3 words) for n in range(3, 0, -1): for i in range(len(words) - n + 1): phrase = ' '.join(words[i:i+n]) # Skip if mostly noise words if all(w in noise_words for w in phrase.split()): continue # Try to find matching design system system = find_design_system(phrase) if system: # Check if we already found this system if not any(s.matched_system and s.matched_system.id == system.id for s in sources): sources.append(ParsedSource( source_type=SourceType.DESIGN_SYSTEM_NAME, value=phrase, confidence=0.9 if n > 1 else 0.7, matched_system=system )) return sources def extract_npm_packages(prompt: str) -> List[ParsedSource]: """Extract explicit npm package references.""" sources = [] # Match @scope/package or package-name patterns # Only if they look like npm packages (not URLs or common words) npm_pattern = r'(?:npm[:\s]+)?(@[a-z0-9][\w\-\.]+/[\w\-\.]+|[a-z][\w\-\.]*(?:/[\w\-\.]+)?)' matches = re.findall(npm_pattern, prompt.lower()) for match in matches: # Filter out common words that might match if match in ['design', 'system', 'use', 'the', 'and', 'for', 'from']: continue # Check if it looks like an npm package (has @, /, or -) if '@' in match or '/' in match or '-' in match: sources.append(ParsedSource( source_type=SourceType.NPM_PACKAGE, value=match, confidence=0.8 )) return sources def generate_suggestions(parsed: ParsedIngestionPrompt) -> List[str]: """Generate helpful suggestions based on parsed prompt.""" suggestions = [] if parsed.intent == IngestionIntent.INGEST: if not parsed.sources: suggestions.append("No design system detected. Try specifying a name like 'heroui', 'shadcn', or 'mui'") suggestions.append("You can also provide a Figma URL, npm package, or GitHub repository") else: for source in parsed.sources: if source.matched_system: system = source.matched_system suggestions.append(f"Found '{system.name}' - {system.description}") if system.npm_packages: suggestions.append(f"Will install: {', '.join(system.npm_packages)}") if system.figma_community_url: suggestions.append(f"Figma kit available: {system.figma_community_url}") elif parsed.intent == IngestionIntent.SEARCH: suggestions.append("I can search npm registry for design systems") suggestions.append("Try being more specific, like 'search for material design components'") elif parsed.intent == IngestionIntent.HELP: suggestions.append("I can ingest design systems from: npm packages, Figma, GitHub, CSS files, or images") suggestions.append("Try: 'add heroui' or 'ingest from figma.com/file/...'") return suggestions def parse_ingestion_prompt(prompt: str) -> ParsedIngestionPrompt: """ Parse a natural language prompt for design system ingestion. Examples: "add heroui" -> Detects HeroUI design system "ingest material ui for our project" -> Detects MUI "import from figma.com/file/abc123" -> Extracts Figma URL "use @chakra-ui/react" -> Detects npm package "what design systems do you support?" -> LIST intent """ # Detect intent intent, intent_confidence = detect_intent(prompt) # Initialize result result = ParsedIngestionPrompt( original_prompt=prompt, intent=intent, confidence=intent_confidence, ) # Extract sources result.sources.extend(extract_urls(prompt)) result.sources.extend(extract_design_systems(prompt)) result.sources.extend(extract_npm_packages(prompt)) # Remove duplicates (prefer higher confidence) seen_values = {} unique_sources = [] for source in sorted(result.sources, key=lambda s: s.confidence, reverse=True): key = (source.source_type, source.value.lower()) if key not in seen_values: seen_values[key] = True unique_sources.append(source) result.sources = unique_sources # Generate suggestions result.suggestions = generate_suggestions(result) # Adjust confidence based on source quality if result.sources: max_source_confidence = max(s.confidence for s in result.sources) result.confidence = (intent_confidence + max_source_confidence) / 2 return result def parse_and_suggest(prompt: str) -> Dict[str, Any]: """ Parse a prompt and provide suggestions for next steps. This is the main entry point for the ingestion parser. """ parsed = parse_ingestion_prompt(prompt) response = parsed.to_dict() # Add next steps based on what was found next_steps = [] if parsed.intent == IngestionIntent.INGEST: if parsed.sources: # Found something to ingest for source in parsed.sources: if source.source_type == SourceType.DESIGN_SYSTEM_NAME and source.matched_system: system = source.matched_system next_steps.append({ "action": "confirm_ingestion", "system": system.to_dict(), "message": f"Ready to ingest '{system.name}'. Confirm to proceed?" }) elif source.source_type == SourceType.FIGMA_URL: next_steps.append({ "action": "ingest_figma", "url": source.value, "message": "Figma URL detected. Ready to extract design tokens?" }) elif source.source_type == SourceType.NPM_PACKAGE: next_steps.append({ "action": "search_npm", "package": source.value, "message": f"Will search npm for '{source.value}'" }) else: # Nothing found - offer alternatives alternatives = get_alternative_ingestion_options() next_steps.append({ "action": "request_source", "alternatives": alternatives["alternatives"], "message": "No design system detected. Please provide more details:" }) elif parsed.intent == IngestionIntent.SEARCH: # Extract search terms search_terms = re.sub(r'\b(search|find|look for)\b', '', prompt.lower()).strip() if search_terms: matches = search_design_systems(search_terms) if matches: next_steps.append({ "action": "show_search_results", "results": [m.to_dict() for m in matches], "message": f"Found {len(matches)} matching design systems" }) else: next_steps.append({ "action": "search_npm", "query": search_terms, "message": f"No built-in match. Will search npm for '{search_terms}'" }) elif parsed.intent == IngestionIntent.LIST: from design_system_registry import get_all_systems all_systems = get_all_systems() next_steps.append({ "action": "show_all_systems", "count": len(all_systems), "categories": list(set(s.category for s in all_systems)), "message": f"I know about {len(all_systems)} design systems" }) elif parsed.intent == IngestionIntent.INFO: for source in parsed.sources: if source.matched_system: system = source.matched_system alternatives = get_alternative_ingestion_options(system) next_steps.append({ "action": "show_info", "system": system.to_dict(), "alternatives": alternatives, "message": f"Information about {system.name}" }) response["next_steps"] = next_steps return response # Convenience function for quick parsing def quick_parse(prompt: str) -> Tuple[Optional[DesignSystemInfo], IngestionIntent, float]: """ Quick parse that returns the most likely design system and intent. Useful for simple lookups. """ parsed = parse_ingestion_prompt(prompt) # Find the best design system match best_system = None for source in parsed.sources: if source.matched_system: best_system = source.matched_system break return best_system, parsed.intent, parsed.confidence