Files
dss/tools/ingest/base.py
Digital Production Factory 276ed71f31 Initial commit: Clean DSS implementation
Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm

Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)

Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability

Migration completed: $(date)
🤖 Clean migration with full functionality preserved
2025-12-09 18:45:48 -03:00

463 lines
16 KiB
Python

"""
Base classes for token ingestion.
Defines the DesignToken model following W3C Design Tokens format
and the TokenSource abstract class for all ingestors.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional, Set
import json
import re
class TokenType(str, Enum):
"""W3C Design Token types."""
COLOR = "color"
DIMENSION = "dimension"
FONT_FAMILY = "fontFamily"
FONT_WEIGHT = "fontWeight"
FONT_SIZE = "fontSize"
LINE_HEIGHT = "lineHeight"
LETTER_SPACING = "letterSpacing"
DURATION = "duration"
CUBIC_BEZIER = "cubicBezier"
NUMBER = "number"
STRING = "string"
SHADOW = "shadow"
BORDER = "border"
GRADIENT = "gradient"
TRANSITION = "transition"
COMPOSITE = "composite"
UNKNOWN = "unknown"
class TokenCategory(str, Enum):
"""Token categories for organization."""
COLORS = "colors"
SPACING = "spacing"
TYPOGRAPHY = "typography"
SIZING = "sizing"
BORDERS = "borders"
SHADOWS = "shadows"
EFFECTS = "effects"
MOTION = "motion"
BREAKPOINTS = "breakpoints"
Z_INDEX = "z-index"
OPACITY = "opacity"
OTHER = "other"
@dataclass
class DesignToken:
"""
W3C Design Token representation.
Follows the W3C Design Tokens Community Group format with
additional metadata for source tracking and enterprise use.
"""
# Core properties (W3C spec)
name: str # e.g., "color.primary.500"
value: Any # e.g., "#3B82F6" or {"r": 59, "g": 130, "b": 246}
type: TokenType = TokenType.UNKNOWN
description: str = ""
# Source attribution
source: str = "" # e.g., "figma:abc123", "css:tokens.css:12"
source_file: str = "" # Original file path
source_line: int = 0 # Line number in source
original_name: str = "" # Name before normalization
original_value: str = "" # Value before processing
# Organization
category: TokenCategory = TokenCategory.OTHER
tags: List[str] = field(default_factory=list)
group: str = "" # Logical grouping (e.g., "brand", "semantic")
# State
deprecated: bool = False
deprecated_message: str = ""
# Versioning
version: str = "1.0.0"
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
# Extensions (for custom metadata)
extensions: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Normalize and validate token after creation."""
if not self.original_name:
self.original_name = self.name
if not self.original_value:
self.original_value = str(self.value)
# Auto-detect type if unknown
if self.type == TokenType.UNKNOWN:
self.type = self._detect_type()
# Auto-detect category if other
if self.category == TokenCategory.OTHER:
self.category = self._detect_category()
def _detect_type(self) -> TokenType:
"""Detect token type from value."""
value_str = str(self.value).lower().strip()
# Color patterns
if re.match(r'^#[0-9a-f]{3,8}$', value_str):
return TokenType.COLOR
if re.match(r'^rgb[a]?\s*\(', value_str):
return TokenType.COLOR
if re.match(r'^hsl[a]?\s*\(', value_str):
return TokenType.COLOR
if value_str in ('transparent', 'currentcolor', 'inherit'):
return TokenType.COLOR
# Dimension patterns
if re.match(r'^-?\d+(\.\d+)?(px|rem|em|%|vh|vw|ch|ex|vmin|vmax)$', value_str):
return TokenType.DIMENSION
# Duration patterns
if re.match(r'^\d+(\.\d+)?(ms|s)$', value_str):
return TokenType.DURATION
# Number patterns
if re.match(r'^-?\d+(\.\d+)?$', value_str):
return TokenType.NUMBER
# Font family (contains quotes or commas)
if ',' in value_str or '"' in value_str or "'" in value_str:
if 'sans' in value_str or 'serif' in value_str or 'mono' in value_str:
return TokenType.FONT_FAMILY
# Font weight
if value_str in ('normal', 'bold', 'lighter', 'bolder') or \
re.match(r'^[1-9]00$', value_str):
return TokenType.FONT_WEIGHT
# Shadow
if 'shadow' in self.name.lower() or \
re.match(r'^-?\d+.*\s+-?\d+.*\s+-?\d+', value_str):
return TokenType.SHADOW
return TokenType.STRING
def _detect_category(self) -> TokenCategory:
"""Detect category from token name."""
name_lower = self.name.lower()
# Check name patterns
patterns = {
TokenCategory.COLORS: ['color', 'bg', 'background', 'text', 'border-color', 'fill', 'stroke'],
TokenCategory.SPACING: ['space', 'spacing', 'gap', 'margin', 'padding', 'inset'],
TokenCategory.TYPOGRAPHY: ['font', 'text', 'line-height', 'letter-spacing', 'typography'],
TokenCategory.SIZING: ['size', 'width', 'height', 'min-', 'max-'],
TokenCategory.BORDERS: ['border', 'radius', 'outline'],
TokenCategory.SHADOWS: ['shadow', 'elevation'],
TokenCategory.EFFECTS: ['blur', 'opacity', 'filter', 'backdrop'],
TokenCategory.MOTION: ['transition', 'animation', 'duration', 'delay', 'timing', 'ease'],
TokenCategory.BREAKPOINTS: ['breakpoint', 'screen', 'media'],
TokenCategory.Z_INDEX: ['z-index', 'z-', 'layer'],
}
for category, keywords in patterns.items():
if any(kw in name_lower for kw in keywords):
return category
# Check by type
if self.type == TokenType.COLOR:
return TokenCategory.COLORS
if self.type in (TokenType.FONT_FAMILY, TokenType.FONT_WEIGHT, TokenType.FONT_SIZE, TokenType.LINE_HEIGHT):
return TokenCategory.TYPOGRAPHY
if self.type == TokenType.DURATION:
return TokenCategory.MOTION
if self.type == TokenType.SHADOW:
return TokenCategory.SHADOWS
return TokenCategory.OTHER
def normalize_name(self, separator: str = ".") -> str:
"""
Normalize token name to consistent format.
Converts various formats to dot-notation:
- kebab-case: color-primary-500 -> color.primary.500
- snake_case: color_primary_500 -> color.primary.500
- camelCase: colorPrimary500 -> color.primary.500
"""
name = self.name
# Handle camelCase
name = re.sub(r'([a-z])([A-Z])', r'\1.\2', name)
# Replace separators
name = name.replace('-', separator)
name = name.replace('_', separator)
name = name.replace('/', separator)
# Clean up multiple separators
while separator * 2 in name:
name = name.replace(separator * 2, separator)
return name.lower().strip(separator)
def to_css_var_name(self) -> str:
"""Convert to CSS custom property name."""
normalized = self.normalize_name("-")
return f"--{normalized}"
def to_scss_var_name(self) -> str:
"""Convert to SCSS variable name."""
normalized = self.normalize_name("-")
return f"${normalized}"
def to_js_name(self) -> str:
"""Convert to JavaScript object key (camelCase)."""
parts = self.normalize_name(".").split(".")
if not parts:
return ""
result = parts[0]
for part in parts[1:]:
result += part.capitalize()
return result
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary (W3C format)."""
result = {
"$value": self.value,
"$type": self.type.value,
}
if self.description:
result["$description"] = self.description
if self.extensions:
result["$extensions"] = self.extensions
# Add DSS metadata
result["$extensions"] = result.get("$extensions", {})
result["$extensions"]["dss"] = {
"source": self.source,
"sourceFile": self.source_file,
"sourceLine": self.source_line,
"originalName": self.original_name,
"category": self.category.value,
"tags": self.tags,
"deprecated": self.deprecated,
"version": self.version,
}
return result
def to_json(self) -> str:
"""Serialize to JSON."""
return json.dumps(self.to_dict(), indent=2)
@dataclass
class TokenCollection:
"""
Collection of design tokens with metadata.
Represents a complete set of tokens from a single source or merged sources.
"""
tokens: List[DesignToken] = field(default_factory=list)
name: str = ""
description: str = ""
version: str = "1.0.0"
sources: List[str] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
def __len__(self) -> int:
return len(self.tokens)
def __iter__(self):
return iter(self.tokens)
def __getitem__(self, key):
if isinstance(key, int):
return self.tokens[key]
# Allow access by token name
for token in self.tokens:
if token.name == key:
return token
raise KeyError(f"Token '{key}' not found")
def add(self, token: DesignToken) -> None:
"""Add a token to the collection."""
self.tokens.append(token)
def get(self, name: str) -> Optional[DesignToken]:
"""Get token by name."""
for token in self.tokens:
if token.name == name:
return token
return None
def filter_by_category(self, category: TokenCategory) -> 'TokenCollection':
"""Return new collection filtered by category."""
filtered = [t for t in self.tokens if t.category == category]
return TokenCollection(
tokens=filtered,
name=f"{self.name} ({category.value})",
sources=self.sources,
)
def filter_by_type(self, token_type: TokenType) -> 'TokenCollection':
"""Return new collection filtered by type."""
filtered = [t for t in self.tokens if t.type == token_type]
return TokenCollection(
tokens=filtered,
name=f"{self.name} ({token_type.value})",
sources=self.sources,
)
def filter_by_source(self, source: str) -> 'TokenCollection':
"""Return new collection filtered by source."""
filtered = [t for t in self.tokens if source in t.source]
return TokenCollection(
tokens=filtered,
name=f"{self.name} (from {source})",
sources=[source],
)
def get_categories(self) -> Set[TokenCategory]:
"""Get all unique categories in collection."""
return {t.category for t in self.tokens}
def get_types(self) -> Set[TokenType]:
"""Get all unique types in collection."""
return {t.type for t in self.tokens}
def get_duplicates(self) -> Dict[str, List[DesignToken]]:
"""Find tokens with duplicate names."""
seen: Dict[str, List[DesignToken]] = {}
for token in self.tokens:
if token.name not in seen:
seen[token.name] = []
seen[token.name].append(token)
return {k: v for k, v in seen.items() if len(v) > 1}
def to_css(self) -> str:
"""Export as CSS custom properties."""
lines = [":root {"]
for token in sorted(self.tokens, key=lambda t: t.name):
var_name = token.to_css_var_name()
if token.description:
lines.append(f" /* {token.description} */")
lines.append(f" {var_name}: {token.value};")
lines.append("}")
return "\n".join(lines)
def to_scss(self) -> str:
"""Export as SCSS variables."""
lines = []
for token in sorted(self.tokens, key=lambda t: t.name):
var_name = token.to_scss_var_name()
if token.description:
lines.append(f"// {token.description}")
lines.append(f"{var_name}: {token.value};")
return "\n".join(lines)
def to_json(self) -> str:
"""Export as W3C Design Tokens JSON."""
result = {}
for token in self.tokens:
parts = token.normalize_name().split(".")
current = result
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = token.to_dict()
return json.dumps(result, indent=2)
def to_typescript(self) -> str:
"""Export as TypeScript constants."""
lines = ["export const tokens = {"]
for token in sorted(self.tokens, key=lambda t: t.name):
js_name = token.to_js_name()
value = f'"{token.value}"' if isinstance(token.value, str) else token.value
if token.description:
lines.append(f" /** {token.description} */")
lines.append(f" {js_name}: {value},")
lines.append("} as const;")
lines.append("")
lines.append("export type TokenKey = keyof typeof tokens;")
return "\n".join(lines)
def to_tailwind_config(self) -> str:
"""Export as Tailwind config extend object."""
# Group tokens by category for Tailwind structure
colors = self.filter_by_category(TokenCategory.COLORS)
spacing = self.filter_by_category(TokenCategory.SPACING)
lines = ["module.exports = {", " theme: {", " extend: {"]
if colors.tokens:
lines.append(" colors: {")
for token in colors.tokens:
name = token.name.replace("color.", "").replace("colors.", "")
lines.append(f' "{name}": "{token.value}",')
lines.append(" },")
if spacing.tokens:
lines.append(" spacing: {")
for token in spacing.tokens:
name = token.name.replace("spacing.", "").replace("space.", "")
lines.append(f' "{name}": "{token.value}",')
lines.append(" },")
lines.extend([" },", " },", "};"])
return "\n".join(lines)
def summary(self) -> Dict[str, Any]:
"""Get collection summary."""
return {
"total_tokens": len(self.tokens),
"categories": {cat.value: len(self.filter_by_category(cat))
for cat in self.get_categories()},
"types": {t.value: len(self.filter_by_type(t))
for t in self.get_types()},
"sources": self.sources,
"duplicates": len(self.get_duplicates()),
}
class TokenSource(ABC):
"""
Abstract base class for token sources.
All token ingestors must implement this interface.
"""
@property
@abstractmethod
def source_type(self) -> str:
"""Return source type identifier (e.g., 'css', 'scss', 'figma')."""
pass
@abstractmethod
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from source.
Args:
source: File path, URL, or content depending on source type
Returns:
TokenCollection with extracted tokens
"""
pass
def _create_source_id(self, file_path: str, line: int = 0) -> str:
"""Create source identifier string."""
if line:
return f"{self.source_type}:{file_path}:{line}"
return f"{self.source_type}:{file_path}"