Initial commit: Clean DSS implementation

Migrated from design-system-swarm with fresh git history. Old project history preserved in /home/overbits/apps/design-system-swarm Core components: - MCP Server (Python FastAPI with mcp 1.23.1) - Claude Plugin (agents, commands, skills, strategies, hooks, core) - DSS Backend (dss-mvp1 - token translation, Figma sync) - Admin UI (Node.js/React) - Server (Node.js/Express) - Storybook integration (dss-mvp1/.storybook) Self-contained configuration: - All paths relative or use DSS_BASE_PATH=/home/overbits/dss - PYTHONPATH configured for dss-mvp1 and dss-claude-plugin - .env file with all configuration - Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability Migration completed: $(date) 🤖 Clean migration with full functionality preserved
2025-12-09 18:45:48 -03:00
commit 276ed71f31
884 changed files with 373737 additions and 0 deletions
--- a/tools/ingest/base.py
+++ b/tools/ingest/base.py
@@ -0,0 +1,462 @@
+"""
+Base classes for token ingestion.
+
+Defines the DesignToken model following W3C Design Tokens format
+and the TokenSource abstract class for all ingestors.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set
+import json
+import re
+
+
+class TokenType(str, Enum):
+    """W3C Design Token types."""
+    COLOR = "color"
+    DIMENSION = "dimension"
+    FONT_FAMILY = "fontFamily"
+    FONT_WEIGHT = "fontWeight"
+    FONT_SIZE = "fontSize"
+    LINE_HEIGHT = "lineHeight"
+    LETTER_SPACING = "letterSpacing"
+    DURATION = "duration"
+    CUBIC_BEZIER = "cubicBezier"
+    NUMBER = "number"
+    STRING = "string"
+    SHADOW = "shadow"
+    BORDER = "border"
+    GRADIENT = "gradient"
+    TRANSITION = "transition"
+    COMPOSITE = "composite"
+    UNKNOWN = "unknown"
+
+
+class TokenCategory(str, Enum):
+    """Token categories for organization."""
+    COLORS = "colors"
+    SPACING = "spacing"
+    TYPOGRAPHY = "typography"
+    SIZING = "sizing"
+    BORDERS = "borders"
+    SHADOWS = "shadows"
+    EFFECTS = "effects"
+    MOTION = "motion"
+    BREAKPOINTS = "breakpoints"
+    Z_INDEX = "z-index"
+    OPACITY = "opacity"
+    OTHER = "other"
+
+
+@dataclass
+class DesignToken:
+    """
+    W3C Design Token representation.
+
+    Follows the W3C Design Tokens Community Group format with
+    additional metadata for source tracking and enterprise use.
+    """
+    # Core properties (W3C spec)
+    name: str                           # e.g., "color.primary.500"
+    value: Any                          # e.g., "#3B82F6" or {"r": 59, "g": 130, "b": 246}
+    type: TokenType = TokenType.UNKNOWN
+    description: str = ""
+
+    # Source attribution
+    source: str = ""                    # e.g., "figma:abc123", "css:tokens.css:12"
+    source_file: str = ""               # Original file path
+    source_line: int = 0                # Line number in source
+    original_name: str = ""             # Name before normalization
+    original_value: str = ""            # Value before processing
+
+    # Organization
+    category: TokenCategory = TokenCategory.OTHER
+    tags: List[str] = field(default_factory=list)
+    group: str = ""                     # Logical grouping (e.g., "brand", "semantic")
+
+    # State
+    deprecated: bool = False
+    deprecated_message: str = ""
+
+    # Versioning
+    version: str = "1.0.0"
+    created_at: datetime = field(default_factory=datetime.now)
+    updated_at: datetime = field(default_factory=datetime.now)
+
+    # Extensions (for custom metadata)
+    extensions: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        """Normalize and validate token after creation."""
+        if not self.original_name:
+            self.original_name = self.name
+        if not self.original_value:
+            self.original_value = str(self.value)
+
+        # Auto-detect type if unknown
+        if self.type == TokenType.UNKNOWN:
+            self.type = self._detect_type()
+
+        # Auto-detect category if other
+        if self.category == TokenCategory.OTHER:
+            self.category = self._detect_category()
+
+    def _detect_type(self) -> TokenType:
+        """Detect token type from value."""
+        value_str = str(self.value).lower().strip()
+
+        # Color patterns
+        if re.match(r'^#[0-9a-f]{3,8}$', value_str):
+            return TokenType.COLOR
+        if re.match(r'^rgb[a]?\s*\(', value_str):
+            return TokenType.COLOR
+        if re.match(r'^hsl[a]?\s*\(', value_str):
+            return TokenType.COLOR
+        if value_str in ('transparent', 'currentcolor', 'inherit'):
+            return TokenType.COLOR
+
+        # Dimension patterns
+        if re.match(r'^-?\d+(\.\d+)?(px|rem|em|%|vh|vw|ch|ex|vmin|vmax)$', value_str):
+            return TokenType.DIMENSION
+
+        # Duration patterns
+        if re.match(r'^\d+(\.\d+)?(ms|s)$', value_str):
+            return TokenType.DURATION
+
+        # Number patterns
+        if re.match(r'^-?\d+(\.\d+)?$', value_str):
+            return TokenType.NUMBER
+
+        # Font family (contains quotes or commas)
+        if ',' in value_str or '"' in value_str or "'" in value_str:
+            if 'sans' in value_str or 'serif' in value_str or 'mono' in value_str:
+                return TokenType.FONT_FAMILY
+
+        # Font weight
+        if value_str in ('normal', 'bold', 'lighter', 'bolder') or \
+           re.match(r'^[1-9]00$', value_str):
+            return TokenType.FONT_WEIGHT
+
+        # Shadow
+        if 'shadow' in self.name.lower() or \
+           re.match(r'^-?\d+.*\s+-?\d+.*\s+-?\d+', value_str):
+            return TokenType.SHADOW
+
+        return TokenType.STRING
+
+    def _detect_category(self) -> TokenCategory:
+        """Detect category from token name."""
+        name_lower = self.name.lower()
+
+        # Check name patterns
+        patterns = {
+            TokenCategory.COLORS: ['color', 'bg', 'background', 'text', 'border-color', 'fill', 'stroke'],
+            TokenCategory.SPACING: ['space', 'spacing', 'gap', 'margin', 'padding', 'inset'],
+            TokenCategory.TYPOGRAPHY: ['font', 'text', 'line-height', 'letter-spacing', 'typography'],
+            TokenCategory.SIZING: ['size', 'width', 'height', 'min-', 'max-'],
+            TokenCategory.BORDERS: ['border', 'radius', 'outline'],
+            TokenCategory.SHADOWS: ['shadow', 'elevation'],
+            TokenCategory.EFFECTS: ['blur', 'opacity', 'filter', 'backdrop'],
+            TokenCategory.MOTION: ['transition', 'animation', 'duration', 'delay', 'timing', 'ease'],
+            TokenCategory.BREAKPOINTS: ['breakpoint', 'screen', 'media'],
+            TokenCategory.Z_INDEX: ['z-index', 'z-', 'layer'],
+        }
+
+        for category, keywords in patterns.items():
+            if any(kw in name_lower for kw in keywords):
+                return category
+
+        # Check by type
+        if self.type == TokenType.COLOR:
+            return TokenCategory.COLORS
+        if self.type in (TokenType.FONT_FAMILY, TokenType.FONT_WEIGHT, TokenType.FONT_SIZE, TokenType.LINE_HEIGHT):
+            return TokenCategory.TYPOGRAPHY
+        if self.type == TokenType.DURATION:
+            return TokenCategory.MOTION
+        if self.type == TokenType.SHADOW:
+            return TokenCategory.SHADOWS
+
+        return TokenCategory.OTHER
+
+    def normalize_name(self, separator: str = ".") -> str:
+        """
+        Normalize token name to consistent format.
+
+        Converts various formats to dot-notation:
+        - kebab-case: color-primary-500 -> color.primary.500
+        - snake_case: color_primary_500 -> color.primary.500
+        - camelCase: colorPrimary500 -> color.primary.500
+        """
+        name = self.name
+
+        # Handle camelCase
+        name = re.sub(r'([a-z])([A-Z])', r'\1.\2', name)
+
+        # Replace separators
+        name = name.replace('-', separator)
+        name = name.replace('_', separator)
+        name = name.replace('/', separator)
+
+        # Clean up multiple separators
+        while separator * 2 in name:
+            name = name.replace(separator * 2, separator)
+
+        return name.lower().strip(separator)
+
+    def to_css_var_name(self) -> str:
+        """Convert to CSS custom property name."""
+        normalized = self.normalize_name("-")
+        return f"--{normalized}"
+
+    def to_scss_var_name(self) -> str:
+        """Convert to SCSS variable name."""
+        normalized = self.normalize_name("-")
+        return f"${normalized}"
+
+    def to_js_name(self) -> str:
+        """Convert to JavaScript object key (camelCase)."""
+        parts = self.normalize_name(".").split(".")
+        if not parts:
+            return ""
+        result = parts[0]
+        for part in parts[1:]:
+            result += part.capitalize()
+        return result
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary (W3C format)."""
+        result = {
+            "$value": self.value,
+            "$type": self.type.value,
+        }
+
+        if self.description:
+            result["$description"] = self.description
+
+        if self.extensions:
+            result["$extensions"] = self.extensions
+
+        # Add DSS metadata
+        result["$extensions"] = result.get("$extensions", {})
+        result["$extensions"]["dss"] = {
+            "source": self.source,
+            "sourceFile": self.source_file,
+            "sourceLine": self.source_line,
+            "originalName": self.original_name,
+            "category": self.category.value,
+            "tags": self.tags,
+            "deprecated": self.deprecated,
+            "version": self.version,
+        }
+
+        return result
+
+    def to_json(self) -> str:
+        """Serialize to JSON."""
+        return json.dumps(self.to_dict(), indent=2)
+
+
+@dataclass
+class TokenCollection:
+    """
+    Collection of design tokens with metadata.
+
+    Represents a complete set of tokens from a single source or merged sources.
+    """
+    tokens: List[DesignToken] = field(default_factory=list)
+    name: str = ""
+    description: str = ""
+    version: str = "1.0.0"
+    sources: List[str] = field(default_factory=list)
+    created_at: datetime = field(default_factory=datetime.now)
+
+    def __len__(self) -> int:
+        return len(self.tokens)
+
+    def __iter__(self):
+        return iter(self.tokens)
+
+    def __getitem__(self, key):
+        if isinstance(key, int):
+            return self.tokens[key]
+        # Allow access by token name
+        for token in self.tokens:
+            if token.name == key:
+                return token
+        raise KeyError(f"Token '{key}' not found")
+
+    def add(self, token: DesignToken) -> None:
+        """Add a token to the collection."""
+        self.tokens.append(token)
+
+    def get(self, name: str) -> Optional[DesignToken]:
+        """Get token by name."""
+        for token in self.tokens:
+            if token.name == name:
+                return token
+        return None
+
+    def filter_by_category(self, category: TokenCategory) -> 'TokenCollection':
+        """Return new collection filtered by category."""
+        filtered = [t for t in self.tokens if t.category == category]
+        return TokenCollection(
+            tokens=filtered,
+            name=f"{self.name} ({category.value})",
+            sources=self.sources,
+        )
+
+    def filter_by_type(self, token_type: TokenType) -> 'TokenCollection':
+        """Return new collection filtered by type."""
+        filtered = [t for t in self.tokens if t.type == token_type]
+        return TokenCollection(
+            tokens=filtered,
+            name=f"{self.name} ({token_type.value})",
+            sources=self.sources,
+        )
+
+    def filter_by_source(self, source: str) -> 'TokenCollection':
+        """Return new collection filtered by source."""
+        filtered = [t for t in self.tokens if source in t.source]
+        return TokenCollection(
+            tokens=filtered,
+            name=f"{self.name} (from {source})",
+            sources=[source],
+        )
+
+    def get_categories(self) -> Set[TokenCategory]:
+        """Get all unique categories in collection."""
+        return {t.category for t in self.tokens}
+
+    def get_types(self) -> Set[TokenType]:
+        """Get all unique types in collection."""
+        return {t.type for t in self.tokens}
+
+    def get_duplicates(self) -> Dict[str, List[DesignToken]]:
+        """Find tokens with duplicate names."""
+        seen: Dict[str, List[DesignToken]] = {}
+        for token in self.tokens:
+            if token.name not in seen:
+                seen[token.name] = []
+            seen[token.name].append(token)
+        return {k: v for k, v in seen.items() if len(v) > 1}
+
+    def to_css(self) -> str:
+        """Export as CSS custom properties."""
+        lines = [":root {"]
+        for token in sorted(self.tokens, key=lambda t: t.name):
+            var_name = token.to_css_var_name()
+            if token.description:
+                lines.append(f"  /* {token.description} */")
+            lines.append(f"  {var_name}: {token.value};")
+        lines.append("}")
+        return "\n".join(lines)
+
+    def to_scss(self) -> str:
+        """Export as SCSS variables."""
+        lines = []
+        for token in sorted(self.tokens, key=lambda t: t.name):
+            var_name = token.to_scss_var_name()
+            if token.description:
+                lines.append(f"// {token.description}")
+            lines.append(f"{var_name}: {token.value};")
+        return "\n".join(lines)
+
+    def to_json(self) -> str:
+        """Export as W3C Design Tokens JSON."""
+        result = {}
+        for token in self.tokens:
+            parts = token.normalize_name().split(".")
+            current = result
+            for part in parts[:-1]:
+                if part not in current:
+                    current[part] = {}
+                current = current[part]
+            current[parts[-1]] = token.to_dict()
+        return json.dumps(result, indent=2)
+
+    def to_typescript(self) -> str:
+        """Export as TypeScript constants."""
+        lines = ["export const tokens = {"]
+        for token in sorted(self.tokens, key=lambda t: t.name):
+            js_name = token.to_js_name()
+            value = f'"{token.value}"' if isinstance(token.value, str) else token.value
+            if token.description:
+                lines.append(f"  /** {token.description} */")
+            lines.append(f"  {js_name}: {value},")
+        lines.append("} as const;")
+        lines.append("")
+        lines.append("export type TokenKey = keyof typeof tokens;")
+        return "\n".join(lines)
+
+    def to_tailwind_config(self) -> str:
+        """Export as Tailwind config extend object."""
+        # Group tokens by category for Tailwind structure
+        colors = self.filter_by_category(TokenCategory.COLORS)
+        spacing = self.filter_by_category(TokenCategory.SPACING)
+
+        lines = ["module.exports = {", "  theme: {", "    extend: {"]
+
+        if colors.tokens:
+            lines.append("      colors: {")
+            for token in colors.tokens:
+                name = token.name.replace("color.", "").replace("colors.", "")
+                lines.append(f'        "{name}": "{token.value}",')
+            lines.append("      },")
+
+        if spacing.tokens:
+            lines.append("      spacing: {")
+            for token in spacing.tokens:
+                name = token.name.replace("spacing.", "").replace("space.", "")
+                lines.append(f'        "{name}": "{token.value}",')
+            lines.append("      },")
+
+        lines.extend(["    },", "  },", "};"])
+        return "\n".join(lines)
+
+    def summary(self) -> Dict[str, Any]:
+        """Get collection summary."""
+        return {
+            "total_tokens": len(self.tokens),
+            "categories": {cat.value: len(self.filter_by_category(cat))
+                          for cat in self.get_categories()},
+            "types": {t.value: len(self.filter_by_type(t))
+                     for t in self.get_types()},
+            "sources": self.sources,
+            "duplicates": len(self.get_duplicates()),
+        }
+
+
+class TokenSource(ABC):
+    """
+    Abstract base class for token sources.
+
+    All token ingestors must implement this interface.
+    """
+
+    @property
+    @abstractmethod
+    def source_type(self) -> str:
+        """Return source type identifier (e.g., 'css', 'scss', 'figma')."""
+        pass
+
+    @abstractmethod
+    async def extract(self, source: str) -> TokenCollection:
+        """
+        Extract tokens from source.
+
+        Args:
+            source: File path, URL, or content depending on source type
+
+        Returns:
+            TokenCollection with extracted tokens
+        """
+        pass
+
+    def _create_source_id(self, file_path: str, line: int = 0) -> str:
+        """Create source identifier string."""
+        if line:
+            return f"{self.source_type}:{file_path}:{line}"
+        return f"{self.source_type}:{file_path}"