""" CSS Token Source Extracts design tokens from CSS custom properties (CSS variables). Parses :root declarations and other CSS variable definitions. """ import re from pathlib import Path from typing import List, Optional, Tuple from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory class CSSTokenSource(TokenSource): """ Extract tokens from CSS files. Parses CSS custom properties defined in :root or other selectors. Supports: - :root { --color-primary: #3B82F6; } - [data-theme="dark"] { --color-primary: #60A5FA; } - Comments as descriptions """ @property def source_type(self) -> str: return "css" async def extract(self, source: str) -> TokenCollection: """ Extract tokens from CSS file or content. Args: source: File path or CSS content string Returns: TokenCollection with extracted tokens """ # Determine if source is file path or content if self._is_file_path(source): file_path = Path(source) if not file_path.exists(): raise FileNotFoundError(f"CSS file not found: {source}") content = file_path.read_text(encoding="utf-8") source_file = str(file_path.absolute()) else: content = source source_file = "" tokens = self._parse_css(content, source_file) return TokenCollection( tokens=tokens, name=f"CSS Tokens from {Path(source_file).name if source_file != '' else 'inline'}", sources=[self._create_source_id(source_file)], ) def _is_file_path(self, source: str) -> bool: """Check if source looks like a file path.""" # If it contains CSS syntax, it's content if '{' in source or ':' in source and ';' in source: return False # If it ends with .css, it's a file if source.endswith('.css'): return True # If path exists, it's a file return Path(source).exists() def _parse_css(self, content: str, source_file: str) -> List[DesignToken]: """Parse CSS content and extract custom properties.""" tokens = [] # Track line numbers lines = content.split('\n') line_map = self._build_line_map(content) # Find all CSS variable declarations # Pattern matches: --var-name: value; var_pattern = re.compile( r'(\/\*[^*]*\*\/\s*)?' # Optional preceding comment r'(--[\w-]+)\s*:\s*' # Variable name r'([^;]+);', # Value re.MULTILINE ) # Find variables in all rule blocks for match in var_pattern.finditer(content): comment = match.group(1) var_name = match.group(2) var_value = match.group(3).strip() # Get line number pos = match.start() line_num = self._get_line_number(pos, line_map) # Extract description from comment description = "" if comment: description = self._clean_comment(comment) # Get context (selector) context = self._get_selector_context(content, pos) # Create token token = DesignToken( name=self._normalize_var_name(var_name), value=var_value, description=description, source=self._create_source_id(source_file, line_num), source_file=source_file, source_line=line_num, original_name=var_name, original_value=var_value, ) # Add context as tag if not :root if context and context != ":root": token.tags.append(f"context:{context}") tokens.append(token) return tokens def _build_line_map(self, content: str) -> List[int]: """Build map of character positions to line numbers.""" line_map = [] pos = 0 for i, line in enumerate(content.split('\n'), 1): line_map.append(pos) pos += len(line) + 1 # +1 for newline return line_map def _get_line_number(self, pos: int, line_map: List[int]) -> int: """Get line number for character position.""" for i, line_start in enumerate(line_map): if i + 1 < len(line_map): if line_start <= pos < line_map[i + 1]: return i + 1 else: return i + 1 return 1 def _normalize_var_name(self, var_name: str) -> str: """Convert CSS variable name to token name.""" # Remove -- prefix name = var_name.lstrip('-') # Convert kebab-case to dot notation name = name.replace('-', '.') return name def _clean_comment(self, comment: str) -> str: """Extract text from CSS comment.""" if not comment: return "" # Remove /* and */ text = re.sub(r'/\*|\*/', '', comment) # Clean whitespace text = ' '.join(text.split()) return text.strip() def _get_selector_context(self, content: str, pos: int) -> str: """Get the CSS selector context for a variable.""" # Find the opening brace before this position before = content[:pos] last_open = before.rfind('{') if last_open == -1: return "" # Find the selector before the brace selector_part = before[:last_open] # Get last selector (after } or start) last_close = selector_part.rfind('}') if last_close != -1: selector_part = selector_part[last_close + 1:] # Clean up selector = selector_part.strip() # Handle multi-line selectors selector = ' '.join(selector.split()) return selector class CSSInlineExtractor: """ Extract inline styles from HTML/JSX for token candidate identification. Finds style="" attributes and extracts values that could become tokens. """ # Patterns for extracting inline styles STYLE_ATTR_PATTERN = re.compile( r'style\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE ) # JSX style object pattern JSX_STYLE_PATTERN = re.compile( r'style\s*=\s*\{\{([^}]+)\}\}', re.MULTILINE ) async def extract_candidates(self, source: str) -> List[Tuple[str, str, int]]: """ Extract inline style values as token candidates. Returns list of (property, value, line_number) tuples. """ candidates = [] # Determine if file or content if Path(source).exists(): content = Path(source).read_text(encoding="utf-8") else: content = source lines = content.split('\n') for i, line in enumerate(lines, 1): # Check HTML style attribute for match in self.STYLE_ATTR_PATTERN.finditer(line): style_content = match.group(1) for prop, value in self._parse_style_string(style_content): if self._is_token_candidate(value): candidates.append((prop, value, i)) # Check JSX style object for match in self.JSX_STYLE_PATTERN.finditer(line): style_content = match.group(1) for prop, value in self._parse_jsx_style(style_content): if self._is_token_candidate(value): candidates.append((prop, value, i)) return candidates def _parse_style_string(self, style: str) -> List[Tuple[str, str]]: """Parse CSS style string into property-value pairs.""" pairs = [] for declaration in style.split(';'): if ':' in declaration: prop, value = declaration.split(':', 1) pairs.append((prop.strip(), value.strip())) return pairs def _parse_jsx_style(self, style: str) -> List[Tuple[str, str]]: """Parse JSX style object into property-value pairs.""" pairs = [] # Simple parsing for common cases for part in style.split(','): if ':' in part: prop, value = part.split(':', 1) prop = prop.strip().strip('"\'') value = value.strip().strip('"\'') # Convert camelCase to kebab-case prop = re.sub(r'([a-z])([A-Z])', r'\1-\2', prop).lower() pairs.append((prop, value)) return pairs def _is_token_candidate(self, value: str) -> bool: """Check if value should be extracted as a token.""" value = value.strip().lower() # Colors are always candidates if re.match(r'^#[0-9a-f]{3,8}$', value): return True if re.match(r'^rgb[a]?\s*\(', value): return True if re.match(r'^hsl[a]?\s*\(', value): return True # Dimensions with common units if re.match(r'^\d+(\.\d+)?(px|rem|em|%)$', value): return True # Skip variable references if value.startswith('var('): return False # Skip inherit/initial/etc if value in ('inherit', 'initial', 'unset', 'auto', 'none'): return False return False