""" Base classes for token ingestion. Defines the DesignToken model following W3C Design Tokens format and the TokenSource abstract class for all ingestors. """ from abc import ABC, abstractmethod from dataclasses import dataclass, field from datetime import datetime from enum import Enum from typing import Any, Dict, List, Optional, Set import json import re class TokenType(str, Enum): """W3C Design Token types.""" COLOR = "color" DIMENSION = "dimension" FONT_FAMILY = "fontFamily" FONT_WEIGHT = "fontWeight" FONT_SIZE = "fontSize" LINE_HEIGHT = "lineHeight" LETTER_SPACING = "letterSpacing" DURATION = "duration" CUBIC_BEZIER = "cubicBezier" NUMBER = "number" STRING = "string" SHADOW = "shadow" BORDER = "border" GRADIENT = "gradient" TRANSITION = "transition" COMPOSITE = "composite" UNKNOWN = "unknown" class TokenCategory(str, Enum): """Token categories for organization.""" COLORS = "colors" SPACING = "spacing" TYPOGRAPHY = "typography" SIZING = "sizing" BORDERS = "borders" SHADOWS = "shadows" EFFECTS = "effects" MOTION = "motion" BREAKPOINTS = "breakpoints" Z_INDEX = "z-index" OPACITY = "opacity" OTHER = "other" @dataclass class DesignToken: """ W3C Design Token representation. Follows the W3C Design Tokens Community Group format with additional metadata for source tracking and enterprise use. """ # Core properties (W3C spec) name: str # e.g., "color.primary.500" value: Any # e.g., "#3B82F6" or {"r": 59, "g": 130, "b": 246} type: TokenType = TokenType.UNKNOWN description: str = "" # Source attribution source: str = "" # e.g., "figma:abc123", "css:tokens.css:12" source_file: str = "" # Original file path source_line: int = 0 # Line number in source original_name: str = "" # Name before normalization original_value: str = "" # Value before processing # Organization category: TokenCategory = TokenCategory.OTHER tags: List[str] = field(default_factory=list) group: str = "" # Logical grouping (e.g., "brand", "semantic") # State deprecated: bool = False deprecated_message: str = "" # Versioning version: str = "1.0.0" created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) # Extensions (for custom metadata) extensions: Dict[str, Any] = field(default_factory=dict) def __post_init__(self): """Normalize and validate token after creation.""" if not self.original_name: self.original_name = self.name if not self.original_value: self.original_value = str(self.value) # Auto-detect type if unknown if self.type == TokenType.UNKNOWN: self.type = self._detect_type() # Auto-detect category if other if self.category == TokenCategory.OTHER: self.category = self._detect_category() def _detect_type(self) -> TokenType: """Detect token type from value.""" value_str = str(self.value).lower().strip() # Color patterns if re.match(r'^#[0-9a-f]{3,8}$', value_str): return TokenType.COLOR if re.match(r'^rgb[a]?\s*\(', value_str): return TokenType.COLOR if re.match(r'^hsl[a]?\s*\(', value_str): return TokenType.COLOR if value_str in ('transparent', 'currentcolor', 'inherit'): return TokenType.COLOR # Dimension patterns if re.match(r'^-?\d+(\.\d+)?(px|rem|em|%|vh|vw|ch|ex|vmin|vmax)$', value_str): return TokenType.DIMENSION # Duration patterns if re.match(r'^\d+(\.\d+)?(ms|s)$', value_str): return TokenType.DURATION # Number patterns if re.match(r'^-?\d+(\.\d+)?$', value_str): return TokenType.NUMBER # Font family (contains quotes or commas) if ',' in value_str or '"' in value_str or "'" in value_str: if 'sans' in value_str or 'serif' in value_str or 'mono' in value_str: return TokenType.FONT_FAMILY # Font weight if value_str in ('normal', 'bold', 'lighter', 'bolder') or \ re.match(r'^[1-9]00$', value_str): return TokenType.FONT_WEIGHT # Shadow if 'shadow' in self.name.lower() or \ re.match(r'^-?\d+.*\s+-?\d+.*\s+-?\d+', value_str): return TokenType.SHADOW return TokenType.STRING def _detect_category(self) -> TokenCategory: """Detect category from token name.""" name_lower = self.name.lower() # Check name patterns patterns = { TokenCategory.COLORS: ['color', 'bg', 'background', 'text', 'border-color', 'fill', 'stroke'], TokenCategory.SPACING: ['space', 'spacing', 'gap', 'margin', 'padding', 'inset'], TokenCategory.TYPOGRAPHY: ['font', 'text', 'line-height', 'letter-spacing', 'typography'], TokenCategory.SIZING: ['size', 'width', 'height', 'min-', 'max-'], TokenCategory.BORDERS: ['border', 'radius', 'outline'], TokenCategory.SHADOWS: ['shadow', 'elevation'], TokenCategory.EFFECTS: ['blur', 'opacity', 'filter', 'backdrop'], TokenCategory.MOTION: ['transition', 'animation', 'duration', 'delay', 'timing', 'ease'], TokenCategory.BREAKPOINTS: ['breakpoint', 'screen', 'media'], TokenCategory.Z_INDEX: ['z-index', 'z-', 'layer'], } for category, keywords in patterns.items(): if any(kw in name_lower for kw in keywords): return category # Check by type if self.type == TokenType.COLOR: return TokenCategory.COLORS if self.type in (TokenType.FONT_FAMILY, TokenType.FONT_WEIGHT, TokenType.FONT_SIZE, TokenType.LINE_HEIGHT): return TokenCategory.TYPOGRAPHY if self.type == TokenType.DURATION: return TokenCategory.MOTION if self.type == TokenType.SHADOW: return TokenCategory.SHADOWS return TokenCategory.OTHER def normalize_name(self, separator: str = ".") -> str: """ Normalize token name to consistent format. Converts various formats to dot-notation: - kebab-case: color-primary-500 -> color.primary.500 - snake_case: color_primary_500 -> color.primary.500 - camelCase: colorPrimary500 -> color.primary.500 """ name = self.name # Handle camelCase name = re.sub(r'([a-z])([A-Z])', r'\1.\2', name) # Replace separators name = name.replace('-', separator) name = name.replace('_', separator) name = name.replace('/', separator) # Clean up multiple separators while separator * 2 in name: name = name.replace(separator * 2, separator) return name.lower().strip(separator) def to_css_var_name(self) -> str: """Convert to CSS custom property name.""" normalized = self.normalize_name("-") return f"--{normalized}" def to_scss_var_name(self) -> str: """Convert to SCSS variable name.""" normalized = self.normalize_name("-") return f"${normalized}" def to_js_name(self) -> str: """Convert to JavaScript object key (camelCase).""" parts = self.normalize_name(".").split(".") if not parts: return "" result = parts[0] for part in parts[1:]: result += part.capitalize() return result def to_dict(self) -> Dict[str, Any]: """Convert to dictionary (W3C format).""" result = { "$value": self.value, "$type": self.type.value, } if self.description: result["$description"] = self.description if self.extensions: result["$extensions"] = self.extensions # Add DSS metadata result["$extensions"] = result.get("$extensions", {}) result["$extensions"]["dss"] = { "source": self.source, "sourceFile": self.source_file, "sourceLine": self.source_line, "originalName": self.original_name, "category": self.category.value, "tags": self.tags, "deprecated": self.deprecated, "version": self.version, } return result def to_json(self) -> str: """Serialize to JSON.""" return json.dumps(self.to_dict(), indent=2) @dataclass class TokenCollection: """ Collection of design tokens with metadata. Represents a complete set of tokens from a single source or merged sources. """ tokens: List[DesignToken] = field(default_factory=list) name: str = "" description: str = "" version: str = "1.0.0" sources: List[str] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.now) def __len__(self) -> int: return len(self.tokens) def __iter__(self): return iter(self.tokens) def __getitem__(self, key): if isinstance(key, int): return self.tokens[key] # Allow access by token name for token in self.tokens: if token.name == key: return token raise KeyError(f"Token '{key}' not found") def add(self, token: DesignToken) -> None: """Add a token to the collection.""" self.tokens.append(token) def get(self, name: str) -> Optional[DesignToken]: """Get token by name.""" for token in self.tokens: if token.name == name: return token return None def filter_by_category(self, category: TokenCategory) -> 'TokenCollection': """Return new collection filtered by category.""" filtered = [t for t in self.tokens if t.category == category] return TokenCollection( tokens=filtered, name=f"{self.name} ({category.value})", sources=self.sources, ) def filter_by_type(self, token_type: TokenType) -> 'TokenCollection': """Return new collection filtered by type.""" filtered = [t for t in self.tokens if t.type == token_type] return TokenCollection( tokens=filtered, name=f"{self.name} ({token_type.value})", sources=self.sources, ) def filter_by_source(self, source: str) -> 'TokenCollection': """Return new collection filtered by source.""" filtered = [t for t in self.tokens if source in t.source] return TokenCollection( tokens=filtered, name=f"{self.name} (from {source})", sources=[source], ) def get_categories(self) -> Set[TokenCategory]: """Get all unique categories in collection.""" return {t.category for t in self.tokens} def get_types(self) -> Set[TokenType]: """Get all unique types in collection.""" return {t.type for t in self.tokens} def get_duplicates(self) -> Dict[str, List[DesignToken]]: """Find tokens with duplicate names.""" seen: Dict[str, List[DesignToken]] = {} for token in self.tokens: if token.name not in seen: seen[token.name] = [] seen[token.name].append(token) return {k: v for k, v in seen.items() if len(v) > 1} def to_css(self) -> str: """Export as CSS custom properties.""" lines = [":root {"] for token in sorted(self.tokens, key=lambda t: t.name): var_name = token.to_css_var_name() if token.description: lines.append(f" /* {token.description} */") lines.append(f" {var_name}: {token.value};") lines.append("}") return "\n".join(lines) def to_scss(self) -> str: """Export as SCSS variables.""" lines = [] for token in sorted(self.tokens, key=lambda t: t.name): var_name = token.to_scss_var_name() if token.description: lines.append(f"// {token.description}") lines.append(f"{var_name}: {token.value};") return "\n".join(lines) def to_json(self) -> str: """Export as W3C Design Tokens JSON.""" result = {} for token in self.tokens: parts = token.normalize_name().split(".") current = result for part in parts[:-1]: if part not in current: current[part] = {} current = current[part] current[parts[-1]] = token.to_dict() return json.dumps(result, indent=2) def to_typescript(self) -> str: """Export as TypeScript constants.""" lines = ["export const tokens = {"] for token in sorted(self.tokens, key=lambda t: t.name): js_name = token.to_js_name() value = f'"{token.value}"' if isinstance(token.value, str) else token.value if token.description: lines.append(f" /** {token.description} */") lines.append(f" {js_name}: {value},") lines.append("} as const;") lines.append("") lines.append("export type TokenKey = keyof typeof tokens;") return "\n".join(lines) def to_tailwind_config(self) -> str: """Export as Tailwind config extend object.""" # Group tokens by category for Tailwind structure colors = self.filter_by_category(TokenCategory.COLORS) spacing = self.filter_by_category(TokenCategory.SPACING) lines = ["module.exports = {", " theme: {", " extend: {"] if colors.tokens: lines.append(" colors: {") for token in colors.tokens: name = token.name.replace("color.", "").replace("colors.", "") lines.append(f' "{name}": "{token.value}",') lines.append(" },") if spacing.tokens: lines.append(" spacing: {") for token in spacing.tokens: name = token.name.replace("spacing.", "").replace("space.", "") lines.append(f' "{name}": "{token.value}",') lines.append(" },") lines.extend([" },", " },", "};"]) return "\n".join(lines) def summary(self) -> Dict[str, Any]: """Get collection summary.""" return { "total_tokens": len(self.tokens), "categories": {cat.value: len(self.filter_by_category(cat)) for cat in self.get_categories()}, "types": {t.value: len(self.filter_by_type(t)) for t in self.get_types()}, "sources": self.sources, "duplicates": len(self.get_duplicates()), } class TokenSource(ABC): """ Abstract base class for token sources. All token ingestors must implement this interface. """ @property @abstractmethod def source_type(self) -> str: """Return source type identifier (e.g., 'css', 'scss', 'figma').""" pass @abstractmethod async def extract(self, source: str) -> TokenCollection: """ Extract tokens from source. Args: source: File path, URL, or content depending on source type Returns: TokenCollection with extracted tokens """ pass def _create_source_id(self, file_path: str, line: int = 0) -> str: """Create source identifier string.""" if line: return f"{self.source_type}:{file_path}:{line}" return f"{self.source_type}:{file_path}"