Initial commit: Clean DSS implementation

Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm

Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)

Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability

Migration completed: $(date)
🤖 Clean migration with full functionality preserved
This commit is contained in:
Digital Production Factory
2025-12-09 18:45:48 -03:00
commit 276ed71f31
884 changed files with 373737 additions and 0 deletions

25
tools/ingest/__init__.py Normal file
View File

@@ -0,0 +1,25 @@
"""
DSS Token Ingestion Module
Multi-source design token extraction and normalization.
Supports: Figma, CSS, SCSS, Tailwind, JSON/YAML, styled-components
"""
from .base import DesignToken, TokenSource, TokenCollection
from .css import CSSTokenSource
from .scss import SCSSTokenSource
from .tailwind import TailwindTokenSource
from .json_tokens import JSONTokenSource
from .merge import TokenMerger, MergeStrategy
__all__ = [
'DesignToken',
'TokenSource',
'TokenCollection',
'CSSTokenSource',
'SCSSTokenSource',
'TailwindTokenSource',
'JSONTokenSource',
'TokenMerger',
'MergeStrategy',
]

462
tools/ingest/base.py Normal file
View File

@@ -0,0 +1,462 @@
"""
Base classes for token ingestion.
Defines the DesignToken model following W3C Design Tokens format
and the TokenSource abstract class for all ingestors.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional, Set
import json
import re
class TokenType(str, Enum):
"""W3C Design Token types."""
COLOR = "color"
DIMENSION = "dimension"
FONT_FAMILY = "fontFamily"
FONT_WEIGHT = "fontWeight"
FONT_SIZE = "fontSize"
LINE_HEIGHT = "lineHeight"
LETTER_SPACING = "letterSpacing"
DURATION = "duration"
CUBIC_BEZIER = "cubicBezier"
NUMBER = "number"
STRING = "string"
SHADOW = "shadow"
BORDER = "border"
GRADIENT = "gradient"
TRANSITION = "transition"
COMPOSITE = "composite"
UNKNOWN = "unknown"
class TokenCategory(str, Enum):
"""Token categories for organization."""
COLORS = "colors"
SPACING = "spacing"
TYPOGRAPHY = "typography"
SIZING = "sizing"
BORDERS = "borders"
SHADOWS = "shadows"
EFFECTS = "effects"
MOTION = "motion"
BREAKPOINTS = "breakpoints"
Z_INDEX = "z-index"
OPACITY = "opacity"
OTHER = "other"
@dataclass
class DesignToken:
"""
W3C Design Token representation.
Follows the W3C Design Tokens Community Group format with
additional metadata for source tracking and enterprise use.
"""
# Core properties (W3C spec)
name: str # e.g., "color.primary.500"
value: Any # e.g., "#3B82F6" or {"r": 59, "g": 130, "b": 246}
type: TokenType = TokenType.UNKNOWN
description: str = ""
# Source attribution
source: str = "" # e.g., "figma:abc123", "css:tokens.css:12"
source_file: str = "" # Original file path
source_line: int = 0 # Line number in source
original_name: str = "" # Name before normalization
original_value: str = "" # Value before processing
# Organization
category: TokenCategory = TokenCategory.OTHER
tags: List[str] = field(default_factory=list)
group: str = "" # Logical grouping (e.g., "brand", "semantic")
# State
deprecated: bool = False
deprecated_message: str = ""
# Versioning
version: str = "1.0.0"
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
# Extensions (for custom metadata)
extensions: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Normalize and validate token after creation."""
if not self.original_name:
self.original_name = self.name
if not self.original_value:
self.original_value = str(self.value)
# Auto-detect type if unknown
if self.type == TokenType.UNKNOWN:
self.type = self._detect_type()
# Auto-detect category if other
if self.category == TokenCategory.OTHER:
self.category = self._detect_category()
def _detect_type(self) -> TokenType:
"""Detect token type from value."""
value_str = str(self.value).lower().strip()
# Color patterns
if re.match(r'^#[0-9a-f]{3,8}$', value_str):
return TokenType.COLOR
if re.match(r'^rgb[a]?\s*\(', value_str):
return TokenType.COLOR
if re.match(r'^hsl[a]?\s*\(', value_str):
return TokenType.COLOR
if value_str in ('transparent', 'currentcolor', 'inherit'):
return TokenType.COLOR
# Dimension patterns
if re.match(r'^-?\d+(\.\d+)?(px|rem|em|%|vh|vw|ch|ex|vmin|vmax)$', value_str):
return TokenType.DIMENSION
# Duration patterns
if re.match(r'^\d+(\.\d+)?(ms|s)$', value_str):
return TokenType.DURATION
# Number patterns
if re.match(r'^-?\d+(\.\d+)?$', value_str):
return TokenType.NUMBER
# Font family (contains quotes or commas)
if ',' in value_str or '"' in value_str or "'" in value_str:
if 'sans' in value_str or 'serif' in value_str or 'mono' in value_str:
return TokenType.FONT_FAMILY
# Font weight
if value_str in ('normal', 'bold', 'lighter', 'bolder') or \
re.match(r'^[1-9]00$', value_str):
return TokenType.FONT_WEIGHT
# Shadow
if 'shadow' in self.name.lower() or \
re.match(r'^-?\d+.*\s+-?\d+.*\s+-?\d+', value_str):
return TokenType.SHADOW
return TokenType.STRING
def _detect_category(self) -> TokenCategory:
"""Detect category from token name."""
name_lower = self.name.lower()
# Check name patterns
patterns = {
TokenCategory.COLORS: ['color', 'bg', 'background', 'text', 'border-color', 'fill', 'stroke'],
TokenCategory.SPACING: ['space', 'spacing', 'gap', 'margin', 'padding', 'inset'],
TokenCategory.TYPOGRAPHY: ['font', 'text', 'line-height', 'letter-spacing', 'typography'],
TokenCategory.SIZING: ['size', 'width', 'height', 'min-', 'max-'],
TokenCategory.BORDERS: ['border', 'radius', 'outline'],
TokenCategory.SHADOWS: ['shadow', 'elevation'],
TokenCategory.EFFECTS: ['blur', 'opacity', 'filter', 'backdrop'],
TokenCategory.MOTION: ['transition', 'animation', 'duration', 'delay', 'timing', 'ease'],
TokenCategory.BREAKPOINTS: ['breakpoint', 'screen', 'media'],
TokenCategory.Z_INDEX: ['z-index', 'z-', 'layer'],
}
for category, keywords in patterns.items():
if any(kw in name_lower for kw in keywords):
return category
# Check by type
if self.type == TokenType.COLOR:
return TokenCategory.COLORS
if self.type in (TokenType.FONT_FAMILY, TokenType.FONT_WEIGHT, TokenType.FONT_SIZE, TokenType.LINE_HEIGHT):
return TokenCategory.TYPOGRAPHY
if self.type == TokenType.DURATION:
return TokenCategory.MOTION
if self.type == TokenType.SHADOW:
return TokenCategory.SHADOWS
return TokenCategory.OTHER
def normalize_name(self, separator: str = ".") -> str:
"""
Normalize token name to consistent format.
Converts various formats to dot-notation:
- kebab-case: color-primary-500 -> color.primary.500
- snake_case: color_primary_500 -> color.primary.500
- camelCase: colorPrimary500 -> color.primary.500
"""
name = self.name
# Handle camelCase
name = re.sub(r'([a-z])([A-Z])', r'\1.\2', name)
# Replace separators
name = name.replace('-', separator)
name = name.replace('_', separator)
name = name.replace('/', separator)
# Clean up multiple separators
while separator * 2 in name:
name = name.replace(separator * 2, separator)
return name.lower().strip(separator)
def to_css_var_name(self) -> str:
"""Convert to CSS custom property name."""
normalized = self.normalize_name("-")
return f"--{normalized}"
def to_scss_var_name(self) -> str:
"""Convert to SCSS variable name."""
normalized = self.normalize_name("-")
return f"${normalized}"
def to_js_name(self) -> str:
"""Convert to JavaScript object key (camelCase)."""
parts = self.normalize_name(".").split(".")
if not parts:
return ""
result = parts[0]
for part in parts[1:]:
result += part.capitalize()
return result
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary (W3C format)."""
result = {
"$value": self.value,
"$type": self.type.value,
}
if self.description:
result["$description"] = self.description
if self.extensions:
result["$extensions"] = self.extensions
# Add DSS metadata
result["$extensions"] = result.get("$extensions", {})
result["$extensions"]["dss"] = {
"source": self.source,
"sourceFile": self.source_file,
"sourceLine": self.source_line,
"originalName": self.original_name,
"category": self.category.value,
"tags": self.tags,
"deprecated": self.deprecated,
"version": self.version,
}
return result
def to_json(self) -> str:
"""Serialize to JSON."""
return json.dumps(self.to_dict(), indent=2)
@dataclass
class TokenCollection:
"""
Collection of design tokens with metadata.
Represents a complete set of tokens from a single source or merged sources.
"""
tokens: List[DesignToken] = field(default_factory=list)
name: str = ""
description: str = ""
version: str = "1.0.0"
sources: List[str] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
def __len__(self) -> int:
return len(self.tokens)
def __iter__(self):
return iter(self.tokens)
def __getitem__(self, key):
if isinstance(key, int):
return self.tokens[key]
# Allow access by token name
for token in self.tokens:
if token.name == key:
return token
raise KeyError(f"Token '{key}' not found")
def add(self, token: DesignToken) -> None:
"""Add a token to the collection."""
self.tokens.append(token)
def get(self, name: str) -> Optional[DesignToken]:
"""Get token by name."""
for token in self.tokens:
if token.name == name:
return token
return None
def filter_by_category(self, category: TokenCategory) -> 'TokenCollection':
"""Return new collection filtered by category."""
filtered = [t for t in self.tokens if t.category == category]
return TokenCollection(
tokens=filtered,
name=f"{self.name} ({category.value})",
sources=self.sources,
)
def filter_by_type(self, token_type: TokenType) -> 'TokenCollection':
"""Return new collection filtered by type."""
filtered = [t for t in self.tokens if t.type == token_type]
return TokenCollection(
tokens=filtered,
name=f"{self.name} ({token_type.value})",
sources=self.sources,
)
def filter_by_source(self, source: str) -> 'TokenCollection':
"""Return new collection filtered by source."""
filtered = [t for t in self.tokens if source in t.source]
return TokenCollection(
tokens=filtered,
name=f"{self.name} (from {source})",
sources=[source],
)
def get_categories(self) -> Set[TokenCategory]:
"""Get all unique categories in collection."""
return {t.category for t in self.tokens}
def get_types(self) -> Set[TokenType]:
"""Get all unique types in collection."""
return {t.type for t in self.tokens}
def get_duplicates(self) -> Dict[str, List[DesignToken]]:
"""Find tokens with duplicate names."""
seen: Dict[str, List[DesignToken]] = {}
for token in self.tokens:
if token.name not in seen:
seen[token.name] = []
seen[token.name].append(token)
return {k: v for k, v in seen.items() if len(v) > 1}
def to_css(self) -> str:
"""Export as CSS custom properties."""
lines = [":root {"]
for token in sorted(self.tokens, key=lambda t: t.name):
var_name = token.to_css_var_name()
if token.description:
lines.append(f" /* {token.description} */")
lines.append(f" {var_name}: {token.value};")
lines.append("}")
return "\n".join(lines)
def to_scss(self) -> str:
"""Export as SCSS variables."""
lines = []
for token in sorted(self.tokens, key=lambda t: t.name):
var_name = token.to_scss_var_name()
if token.description:
lines.append(f"// {token.description}")
lines.append(f"{var_name}: {token.value};")
return "\n".join(lines)
def to_json(self) -> str:
"""Export as W3C Design Tokens JSON."""
result = {}
for token in self.tokens:
parts = token.normalize_name().split(".")
current = result
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = token.to_dict()
return json.dumps(result, indent=2)
def to_typescript(self) -> str:
"""Export as TypeScript constants."""
lines = ["export const tokens = {"]
for token in sorted(self.tokens, key=lambda t: t.name):
js_name = token.to_js_name()
value = f'"{token.value}"' if isinstance(token.value, str) else token.value
if token.description:
lines.append(f" /** {token.description} */")
lines.append(f" {js_name}: {value},")
lines.append("} as const;")
lines.append("")
lines.append("export type TokenKey = keyof typeof tokens;")
return "\n".join(lines)
def to_tailwind_config(self) -> str:
"""Export as Tailwind config extend object."""
# Group tokens by category for Tailwind structure
colors = self.filter_by_category(TokenCategory.COLORS)
spacing = self.filter_by_category(TokenCategory.SPACING)
lines = ["module.exports = {", " theme: {", " extend: {"]
if colors.tokens:
lines.append(" colors: {")
for token in colors.tokens:
name = token.name.replace("color.", "").replace("colors.", "")
lines.append(f' "{name}": "{token.value}",')
lines.append(" },")
if spacing.tokens:
lines.append(" spacing: {")
for token in spacing.tokens:
name = token.name.replace("spacing.", "").replace("space.", "")
lines.append(f' "{name}": "{token.value}",')
lines.append(" },")
lines.extend([" },", " },", "};"])
return "\n".join(lines)
def summary(self) -> Dict[str, Any]:
"""Get collection summary."""
return {
"total_tokens": len(self.tokens),
"categories": {cat.value: len(self.filter_by_category(cat))
for cat in self.get_categories()},
"types": {t.value: len(self.filter_by_type(t))
for t in self.get_types()},
"sources": self.sources,
"duplicates": len(self.get_duplicates()),
}
class TokenSource(ABC):
"""
Abstract base class for token sources.
All token ingestors must implement this interface.
"""
@property
@abstractmethod
def source_type(self) -> str:
"""Return source type identifier (e.g., 'css', 'scss', 'figma')."""
pass
@abstractmethod
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from source.
Args:
source: File path, URL, or content depending on source type
Returns:
TokenCollection with extracted tokens
"""
pass
def _create_source_id(self, file_path: str, line: int = 0) -> str:
"""Create source identifier string."""
if line:
return f"{self.source_type}:{file_path}:{line}"
return f"{self.source_type}:{file_path}"

282
tools/ingest/css.py Normal file
View File

@@ -0,0 +1,282 @@
"""
CSS Token Source
Extracts design tokens from CSS custom properties (CSS variables).
Parses :root declarations and other CSS variable definitions.
"""
import re
from pathlib import Path
from typing import List, Optional, Tuple
from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory
class CSSTokenSource(TokenSource):
"""
Extract tokens from CSS files.
Parses CSS custom properties defined in :root or other selectors.
Supports:
- :root { --color-primary: #3B82F6; }
- [data-theme="dark"] { --color-primary: #60A5FA; }
- Comments as descriptions
"""
@property
def source_type(self) -> str:
return "css"
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from CSS file or content.
Args:
source: File path or CSS content string
Returns:
TokenCollection with extracted tokens
"""
# Determine if source is file path or content
if self._is_file_path(source):
file_path = Path(source)
if not file_path.exists():
raise FileNotFoundError(f"CSS file not found: {source}")
content = file_path.read_text(encoding="utf-8")
source_file = str(file_path.absolute())
else:
content = source
source_file = "<inline>"
tokens = self._parse_css(content, source_file)
return TokenCollection(
tokens=tokens,
name=f"CSS Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
sources=[self._create_source_id(source_file)],
)
def _is_file_path(self, source: str) -> bool:
"""Check if source looks like a file path."""
# If it contains CSS syntax, it's content
if '{' in source or ':' in source and ';' in source:
return False
# If it ends with .css, it's a file
if source.endswith('.css'):
return True
# If path exists, it's a file
return Path(source).exists()
def _parse_css(self, content: str, source_file: str) -> List[DesignToken]:
"""Parse CSS content and extract custom properties."""
tokens = []
# Track line numbers
lines = content.split('\n')
line_map = self._build_line_map(content)
# Find all CSS variable declarations
# Pattern matches: --var-name: value;
var_pattern = re.compile(
r'(\/\*[^*]*\*\/\s*)?' # Optional preceding comment
r'(--[\w-]+)\s*:\s*' # Variable name
r'([^;]+);', # Value
re.MULTILINE
)
# Find variables in all rule blocks
for match in var_pattern.finditer(content):
comment = match.group(1)
var_name = match.group(2)
var_value = match.group(3).strip()
# Get line number
pos = match.start()
line_num = self._get_line_number(pos, line_map)
# Extract description from comment
description = ""
if comment:
description = self._clean_comment(comment)
# Get context (selector)
context = self._get_selector_context(content, pos)
# Create token
token = DesignToken(
name=self._normalize_var_name(var_name),
value=var_value,
description=description,
source=self._create_source_id(source_file, line_num),
source_file=source_file,
source_line=line_num,
original_name=var_name,
original_value=var_value,
)
# Add context as tag if not :root
if context and context != ":root":
token.tags.append(f"context:{context}")
tokens.append(token)
return tokens
def _build_line_map(self, content: str) -> List[int]:
"""Build map of character positions to line numbers."""
line_map = []
pos = 0
for i, line in enumerate(content.split('\n'), 1):
line_map.append(pos)
pos += len(line) + 1 # +1 for newline
return line_map
def _get_line_number(self, pos: int, line_map: List[int]) -> int:
"""Get line number for character position."""
for i, line_start in enumerate(line_map):
if i + 1 < len(line_map):
if line_start <= pos < line_map[i + 1]:
return i + 1
else:
return i + 1
return 1
def _normalize_var_name(self, var_name: str) -> str:
"""Convert CSS variable name to token name."""
# Remove -- prefix
name = var_name.lstrip('-')
# Convert kebab-case to dot notation
name = name.replace('-', '.')
return name
def _clean_comment(self, comment: str) -> str:
"""Extract text from CSS comment."""
if not comment:
return ""
# Remove /* and */
text = re.sub(r'/\*|\*/', '', comment)
# Clean whitespace
text = ' '.join(text.split())
return text.strip()
def _get_selector_context(self, content: str, pos: int) -> str:
"""Get the CSS selector context for a variable."""
# Find the opening brace before this position
before = content[:pos]
last_open = before.rfind('{')
if last_open == -1:
return ""
# Find the selector before the brace
selector_part = before[:last_open]
# Get last selector (after } or start)
last_close = selector_part.rfind('}')
if last_close != -1:
selector_part = selector_part[last_close + 1:]
# Clean up
selector = selector_part.strip()
# Handle multi-line selectors
selector = ' '.join(selector.split())
return selector
class CSSInlineExtractor:
"""
Extract inline styles from HTML/JSX for token candidate identification.
Finds style="" attributes and extracts values that could become tokens.
"""
# Patterns for extracting inline styles
STYLE_ATTR_PATTERN = re.compile(
r'style\s*=\s*["\']([^"\']+)["\']',
re.IGNORECASE
)
# JSX style object pattern
JSX_STYLE_PATTERN = re.compile(
r'style\s*=\s*\{\{([^}]+)\}\}',
re.MULTILINE
)
async def extract_candidates(self, source: str) -> List[Tuple[str, str, int]]:
"""
Extract inline style values as token candidates.
Returns list of (property, value, line_number) tuples.
"""
candidates = []
# Determine if file or content
if Path(source).exists():
content = Path(source).read_text(encoding="utf-8")
else:
content = source
lines = content.split('\n')
for i, line in enumerate(lines, 1):
# Check HTML style attribute
for match in self.STYLE_ATTR_PATTERN.finditer(line):
style_content = match.group(1)
for prop, value in self._parse_style_string(style_content):
if self._is_token_candidate(value):
candidates.append((prop, value, i))
# Check JSX style object
for match in self.JSX_STYLE_PATTERN.finditer(line):
style_content = match.group(1)
for prop, value in self._parse_jsx_style(style_content):
if self._is_token_candidate(value):
candidates.append((prop, value, i))
return candidates
def _parse_style_string(self, style: str) -> List[Tuple[str, str]]:
"""Parse CSS style string into property-value pairs."""
pairs = []
for declaration in style.split(';'):
if ':' in declaration:
prop, value = declaration.split(':', 1)
pairs.append((prop.strip(), value.strip()))
return pairs
def _parse_jsx_style(self, style: str) -> List[Tuple[str, str]]:
"""Parse JSX style object into property-value pairs."""
pairs = []
# Simple parsing for common cases
for part in style.split(','):
if ':' in part:
prop, value = part.split(':', 1)
prop = prop.strip().strip('"\'')
value = value.strip().strip('"\'')
# Convert camelCase to kebab-case
prop = re.sub(r'([a-z])([A-Z])', r'\1-\2', prop).lower()
pairs.append((prop, value))
return pairs
def _is_token_candidate(self, value: str) -> bool:
"""Check if value should be extracted as a token."""
value = value.strip().lower()
# Colors are always candidates
if re.match(r'^#[0-9a-f]{3,8}$', value):
return True
if re.match(r'^rgb[a]?\s*\(', value):
return True
if re.match(r'^hsl[a]?\s*\(', value):
return True
# Dimensions with common units
if re.match(r'^\d+(\.\d+)?(px|rem|em|%)$', value):
return True
# Skip variable references
if value.startswith('var('):
return False
# Skip inherit/initial/etc
if value in ('inherit', 'initial', 'unset', 'auto', 'none'):
return False
return False

432
tools/ingest/json_tokens.py Normal file
View File

@@ -0,0 +1,432 @@
"""
JSON Token Source
Extracts design tokens from JSON/YAML files.
Supports W3C Design Tokens format and Style Dictionary format.
"""
import json
import re
from pathlib import Path
from typing import List, Dict, Any, Optional
from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory
class JSONTokenSource(TokenSource):
"""
Extract tokens from JSON/YAML token files.
Supports:
- W3C Design Tokens Community Group format
- Style Dictionary format
- Tokens Studio format
- Figma Tokens plugin format
- Generic nested JSON with $value
"""
@property
def source_type(self) -> str:
return "json"
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from JSON file or content.
Args:
source: File path or JSON content string
Returns:
TokenCollection with extracted tokens
"""
if self._is_file_path(source):
file_path = Path(source)
if not file_path.exists():
raise FileNotFoundError(f"Token file not found: {source}")
content = file_path.read_text(encoding="utf-8")
source_file = str(file_path.absolute())
else:
content = source
source_file = "<inline>"
# Parse JSON
try:
data = json.loads(content)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {e}")
# Detect format and extract
tokens = self._extract_tokens(data, source_file)
return TokenCollection(
tokens=tokens,
name=f"JSON Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
sources=[self._create_source_id(source_file)],
)
def _is_file_path(self, source: str) -> bool:
"""Check if source looks like a file path."""
if source.strip().startswith('{'):
return False
if source.endswith('.json') or source.endswith('.tokens.json'):
return True
return Path(source).exists()
def _extract_tokens(self, data: Dict, source_file: str) -> List[DesignToken]:
"""Extract tokens from parsed JSON."""
tokens = []
# Detect format
if self._is_w3c_format(data):
tokens = self._extract_w3c_tokens(data, source_file)
elif self._is_style_dictionary_format(data):
tokens = self._extract_style_dictionary_tokens(data, source_file)
elif self._is_tokens_studio_format(data):
tokens = self._extract_tokens_studio(data, source_file)
else:
# Generic nested format
tokens = self._extract_nested_tokens(data, source_file)
return tokens
def _is_w3c_format(self, data: Dict) -> bool:
"""Check if data follows W3C Design Tokens format."""
# W3C format uses $value and $type
def check_node(node: Any) -> bool:
if isinstance(node, dict):
if '$value' in node:
return True
return any(check_node(v) for v in node.values())
return False
return check_node(data)
def _is_style_dictionary_format(self, data: Dict) -> bool:
"""Check if data follows Style Dictionary format."""
# Style Dictionary uses 'value' without $
def check_node(node: Any) -> bool:
if isinstance(node, dict):
if 'value' in node and '$value' not in node:
return True
return any(check_node(v) for v in node.values())
return False
return check_node(data)
def _is_tokens_studio_format(self, data: Dict) -> bool:
"""Check if data follows Tokens Studio format."""
# Tokens Studio has specific structure with sets
return '$themes' in data or '$metadata' in data
def _extract_w3c_tokens(
self,
data: Dict,
source_file: str,
prefix: str = ""
) -> List[DesignToken]:
"""Extract tokens in W3C Design Tokens format."""
tokens = []
for key, value in data.items():
# Skip metadata keys
if key.startswith('$'):
continue
current_path = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
if '$value' in value:
# This is a token
token = self._create_w3c_token(
current_path, value, source_file
)
tokens.append(token)
else:
# Nested group
tokens.extend(
self._extract_w3c_tokens(value, source_file, current_path)
)
return tokens
def _create_w3c_token(
self,
name: str,
data: Dict,
source_file: str
) -> DesignToken:
"""Create token from W3C format node."""
value = data.get('$value')
token_type = self._parse_w3c_type(data.get('$type', ''))
description = data.get('$description', '')
# Handle aliases/references
if isinstance(value, str) and value.startswith('{') and value.endswith('}'):
# This is a reference like {colors.primary}
pass # Keep as-is for now
# Get extensions
extensions = {}
if '$extensions' in data:
extensions = data['$extensions']
token = DesignToken(
name=name,
value=value,
type=token_type,
description=description,
source=self._create_source_id(source_file),
source_file=source_file,
extensions=extensions,
)
# Check for deprecated
if extensions.get('deprecated'):
token.deprecated = True
token.deprecated_message = extensions.get('deprecatedMessage', '')
return token
def _parse_w3c_type(self, type_str: str) -> TokenType:
"""Convert W3C type string to TokenType."""
type_map = {
'color': TokenType.COLOR,
'dimension': TokenType.DIMENSION,
'fontFamily': TokenType.FONT_FAMILY,
'fontWeight': TokenType.FONT_WEIGHT,
'duration': TokenType.DURATION,
'cubicBezier': TokenType.CUBIC_BEZIER,
'number': TokenType.NUMBER,
'shadow': TokenType.SHADOW,
'border': TokenType.BORDER,
'gradient': TokenType.GRADIENT,
'transition': TokenType.TRANSITION,
}
return type_map.get(type_str, TokenType.UNKNOWN)
def _extract_style_dictionary_tokens(
self,
data: Dict,
source_file: str,
prefix: str = ""
) -> List[DesignToken]:
"""Extract tokens in Style Dictionary format."""
tokens = []
for key, value in data.items():
current_path = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
if 'value' in value:
# This is a token
token = DesignToken(
name=current_path,
value=value['value'],
description=value.get('comment', value.get('description', '')),
source=self._create_source_id(source_file),
source_file=source_file,
)
# Handle attributes
if 'attributes' in value:
attrs = value['attributes']
if 'category' in attrs:
token.tags.append(f"category:{attrs['category']}")
token.tags.append("style-dictionary")
tokens.append(token)
else:
# Nested group
tokens.extend(
self._extract_style_dictionary_tokens(
value, source_file, current_path
)
)
return tokens
def _extract_tokens_studio(
self,
data: Dict,
source_file: str
) -> List[DesignToken]:
"""Extract tokens from Tokens Studio format."""
tokens = []
# Tokens Studio has token sets as top-level keys
# Skip metadata keys
for set_name, set_data in data.items():
if set_name.startswith('$'):
continue
if isinstance(set_data, dict):
set_tokens = self._extract_tokens_studio_set(
set_data, source_file, set_name
)
for token in set_tokens:
token.group = set_name
tokens.extend(set_tokens)
return tokens
def _extract_tokens_studio_set(
self,
data: Dict,
source_file: str,
prefix: str = ""
) -> List[DesignToken]:
"""Extract tokens from a Tokens Studio set."""
tokens = []
for key, value in data.items():
current_path = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
if 'value' in value and 'type' in value:
# This is a token
token = DesignToken(
name=current_path,
value=value['value'],
type=self._parse_tokens_studio_type(value.get('type', '')),
description=value.get('description', ''),
source=self._create_source_id(source_file),
source_file=source_file,
)
token.tags.append("tokens-studio")
tokens.append(token)
else:
# Nested group
tokens.extend(
self._extract_tokens_studio_set(
value, source_file, current_path
)
)
return tokens
def _parse_tokens_studio_type(self, type_str: str) -> TokenType:
"""Convert Tokens Studio type to TokenType."""
type_map = {
'color': TokenType.COLOR,
'sizing': TokenType.DIMENSION,
'spacing': TokenType.DIMENSION,
'borderRadius': TokenType.DIMENSION,
'borderWidth': TokenType.DIMENSION,
'fontFamilies': TokenType.FONT_FAMILY,
'fontWeights': TokenType.FONT_WEIGHT,
'fontSizes': TokenType.FONT_SIZE,
'lineHeights': TokenType.LINE_HEIGHT,
'letterSpacing': TokenType.LETTER_SPACING,
'paragraphSpacing': TokenType.DIMENSION,
'boxShadow': TokenType.SHADOW,
'opacity': TokenType.NUMBER,
'dimension': TokenType.DIMENSION,
'text': TokenType.STRING,
'other': TokenType.STRING,
}
return type_map.get(type_str, TokenType.UNKNOWN)
def _extract_nested_tokens(
self,
data: Dict,
source_file: str,
prefix: str = ""
) -> List[DesignToken]:
"""Extract tokens from generic nested JSON."""
tokens = []
for key, value in data.items():
current_path = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
# Check if this looks like a token (has primitive values)
has_nested = any(isinstance(v, dict) for v in value.values())
if not has_nested and len(value) <= 3:
# Might be a simple token object
if 'value' in value:
tokens.append(DesignToken(
name=current_path,
value=value['value'],
source=self._create_source_id(source_file),
source_file=source_file,
))
else:
# Recurse
tokens.extend(
self._extract_nested_tokens(value, source_file, current_path)
)
else:
# Recurse into nested object
tokens.extend(
self._extract_nested_tokens(value, source_file, current_path)
)
elif isinstance(value, (str, int, float, bool)):
# Simple value - treat as token
tokens.append(DesignToken(
name=current_path,
value=value,
source=self._create_source_id(source_file),
source_file=source_file,
))
return tokens
class TokenExporter:
"""
Export tokens to various JSON formats.
"""
@staticmethod
def to_w3c(collection: TokenCollection) -> str:
"""Export to W3C Design Tokens format."""
result = {}
for token in collection.tokens:
parts = token.normalize_name().split('.')
current = result
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = {
"$value": token.value,
"$type": token.type.value,
}
if token.description:
current[parts[-1]]["$description"] = token.description
return json.dumps(result, indent=2)
@staticmethod
def to_style_dictionary(collection: TokenCollection) -> str:
"""Export to Style Dictionary format."""
result = {}
for token in collection.tokens:
parts = token.normalize_name().split('.')
current = result
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = {
"value": token.value,
}
if token.description:
current[parts[-1]]["comment"] = token.description
return json.dumps(result, indent=2)
@staticmethod
def to_flat(collection: TokenCollection) -> str:
"""Export to flat JSON object."""
result = {}
for token in collection.tokens:
result[token.name] = token.value
return json.dumps(result, indent=2)

447
tools/ingest/merge.py Normal file
View File

@@ -0,0 +1,447 @@
"""
Token Merge Module
Merge tokens from multiple sources with conflict resolution strategies.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import List, Dict, Optional, Callable, Tuple
from .base import DesignToken, TokenCollection, TokenCategory
class MergeStrategy(str, Enum):
"""Token merge conflict resolution strategies."""
# Simple strategies
FIRST = "first" # Keep first occurrence
LAST = "last" # Keep last occurrence (override)
ERROR = "error" # Raise error on conflict
# Value-based strategies
PREFER_FIGMA = "prefer_figma" # Prefer Figma source
PREFER_CODE = "prefer_code" # Prefer code sources (CSS, SCSS)
PREFER_SPECIFIC = "prefer_specific" # Prefer more specific values
# Smart strategies
MERGE_METADATA = "merge_metadata" # Merge metadata, keep latest value
INTERACTIVE = "interactive" # Require user decision
@dataclass
class MergeConflict:
"""Represents a token name conflict during merge."""
token_name: str
existing: DesignToken
incoming: DesignToken
resolution: Optional[str] = None
resolved_token: Optional[DesignToken] = None
@dataclass
class MergeResult:
"""Result of a token merge operation."""
collection: TokenCollection
conflicts: List[MergeConflict] = field(default_factory=list)
stats: Dict[str, int] = field(default_factory=dict)
warnings: List[str] = field(default_factory=list)
def __post_init__(self):
if not self.stats:
self.stats = {
"total_tokens": 0,
"new_tokens": 0,
"updated_tokens": 0,
"conflicts_resolved": 0,
"conflicts_unresolved": 0,
}
class TokenMerger:
"""
Merge multiple token collections with conflict resolution.
Usage:
merger = TokenMerger(strategy=MergeStrategy.LAST)
result = merger.merge([collection1, collection2, collection3])
"""
# Source priority for PREFER_* strategies
SOURCE_PRIORITY = {
"figma": 100,
"css": 80,
"scss": 80,
"tailwind": 70,
"json": 60,
}
def __init__(
self,
strategy: MergeStrategy = MergeStrategy.LAST,
custom_resolver: Optional[Callable[[MergeConflict], DesignToken]] = None
):
"""
Initialize merger.
Args:
strategy: Default conflict resolution strategy
custom_resolver: Optional custom conflict resolver function
"""
self.strategy = strategy
self.custom_resolver = custom_resolver
def merge(
self,
collections: List[TokenCollection],
normalize_names: bool = True
) -> MergeResult:
"""
Merge multiple token collections.
Args:
collections: List of TokenCollections to merge
normalize_names: Whether to normalize token names before merging
Returns:
MergeResult with merged collection and conflict information
"""
result = MergeResult(
collection=TokenCollection(
name="Merged Tokens",
sources=[],
)
)
# Track tokens by normalized name
tokens_by_name: Dict[str, DesignToken] = {}
for collection in collections:
result.collection.sources.extend(collection.sources)
for token in collection.tokens:
# Normalize name if requested
name = token.normalize_name() if normalize_names else token.name
if name in tokens_by_name:
# Conflict detected
existing = tokens_by_name[name]
conflict = MergeConflict(
token_name=name,
existing=existing,
incoming=token,
)
# Resolve conflict
resolved = self._resolve_conflict(conflict)
conflict.resolved_token = resolved
if resolved:
tokens_by_name[name] = resolved
result.stats["conflicts_resolved"] += 1
result.stats["updated_tokens"] += 1
else:
result.stats["conflicts_unresolved"] += 1
result.warnings.append(
f"Unresolved conflict for token: {name}"
)
result.conflicts.append(conflict)
else:
# New token
tokens_by_name[name] = token
result.stats["new_tokens"] += 1
# Build final collection
result.collection.tokens = list(tokens_by_name.values())
result.stats["total_tokens"] = len(result.collection.tokens)
return result
def _resolve_conflict(self, conflict: MergeConflict) -> Optional[DesignToken]:
"""Resolve a single conflict based on strategy."""
# Try custom resolver first
if self.custom_resolver:
return self.custom_resolver(conflict)
# Apply strategy
if self.strategy == MergeStrategy.FIRST:
conflict.resolution = "kept_first"
return conflict.existing
elif self.strategy == MergeStrategy.LAST:
conflict.resolution = "used_last"
return self._update_token(conflict.incoming, conflict.existing)
elif self.strategy == MergeStrategy.ERROR:
conflict.resolution = "error"
raise ValueError(
f"Token conflict: {conflict.token_name} "
f"(existing: {conflict.existing.source}, "
f"incoming: {conflict.incoming.source})"
)
elif self.strategy == MergeStrategy.PREFER_FIGMA:
return self._prefer_source(conflict, "figma")
elif self.strategy == MergeStrategy.PREFER_CODE:
return self._prefer_code_source(conflict)
elif self.strategy == MergeStrategy.PREFER_SPECIFIC:
return self._prefer_specific_value(conflict)
elif self.strategy == MergeStrategy.MERGE_METADATA:
return self._merge_metadata(conflict)
elif self.strategy == MergeStrategy.INTERACTIVE:
# For interactive, we can't resolve automatically
conflict.resolution = "needs_input"
return None
return conflict.incoming
def _update_token(
self,
source: DesignToken,
base: DesignToken
) -> DesignToken:
"""Create updated token preserving some base metadata."""
# Create new token with source's value but enhanced metadata
updated = DesignToken(
name=source.name,
value=source.value,
type=source.type,
description=source.description or base.description,
source=source.source,
source_file=source.source_file,
source_line=source.source_line,
original_name=source.original_name,
original_value=source.original_value,
category=source.category,
tags=list(set(source.tags + base.tags)),
deprecated=source.deprecated or base.deprecated,
deprecated_message=source.deprecated_message or base.deprecated_message,
version=source.version,
updated_at=datetime.now(),
extensions={**base.extensions, **source.extensions},
)
return updated
def _prefer_source(
self,
conflict: MergeConflict,
preferred_source: str
) -> DesignToken:
"""Prefer token from specific source type."""
existing_source = conflict.existing.source.split(':')[0]
incoming_source = conflict.incoming.source.split(':')[0]
if incoming_source == preferred_source:
conflict.resolution = f"preferred_{preferred_source}"
return self._update_token(conflict.incoming, conflict.existing)
elif existing_source == preferred_source:
conflict.resolution = f"kept_{preferred_source}"
return conflict.existing
else:
# Neither is preferred, use last
conflict.resolution = "fallback_last"
return self._update_token(conflict.incoming, conflict.existing)
def _prefer_code_source(self, conflict: MergeConflict) -> DesignToken:
"""Prefer code sources (CSS, SCSS) over design sources."""
code_sources = {"css", "scss", "tailwind"}
existing_source = conflict.existing.source.split(':')[0]
incoming_source = conflict.incoming.source.split(':')[0]
existing_is_code = existing_source in code_sources
incoming_is_code = incoming_source in code_sources
if incoming_is_code and not existing_is_code:
conflict.resolution = "preferred_code"
return self._update_token(conflict.incoming, conflict.existing)
elif existing_is_code and not incoming_is_code:
conflict.resolution = "kept_code"
return conflict.existing
else:
# Both or neither are code, use priority
return self._prefer_by_priority(conflict)
def _prefer_by_priority(self, conflict: MergeConflict) -> DesignToken:
"""Choose based on source priority."""
existing_source = conflict.existing.source.split(':')[0]
incoming_source = conflict.incoming.source.split(':')[0]
existing_priority = self.SOURCE_PRIORITY.get(existing_source, 0)
incoming_priority = self.SOURCE_PRIORITY.get(incoming_source, 0)
if incoming_priority > existing_priority:
conflict.resolution = "higher_priority"
return self._update_token(conflict.incoming, conflict.existing)
else:
conflict.resolution = "kept_priority"
return conflict.existing
def _prefer_specific_value(self, conflict: MergeConflict) -> DesignToken:
"""Prefer more specific/concrete values."""
existing_value = str(conflict.existing.value).lower()
incoming_value = str(conflict.incoming.value).lower()
# Prefer concrete values over variables/references
existing_is_var = existing_value.startswith('var(') or \
existing_value.startswith('$') or \
existing_value.startswith('{')
incoming_is_var = incoming_value.startswith('var(') or \
incoming_value.startswith('$') or \
incoming_value.startswith('{')
if incoming_is_var and not existing_is_var:
conflict.resolution = "kept_concrete"
return conflict.existing
elif existing_is_var and not incoming_is_var:
conflict.resolution = "preferred_concrete"
return self._update_token(conflict.incoming, conflict.existing)
# Prefer hex colors over named colors
existing_is_hex = existing_value.startswith('#')
incoming_is_hex = incoming_value.startswith('#')
if incoming_is_hex and not existing_is_hex:
conflict.resolution = "preferred_hex"
return self._update_token(conflict.incoming, conflict.existing)
elif existing_is_hex and not incoming_is_hex:
conflict.resolution = "kept_hex"
return conflict.existing
# Default to last
conflict.resolution = "fallback_last"
return self._update_token(conflict.incoming, conflict.existing)
def _merge_metadata(self, conflict: MergeConflict) -> DesignToken:
"""Merge metadata from both tokens, keep latest value."""
conflict.resolution = "merged_metadata"
# Use incoming value but merge all metadata
merged_tags = list(set(
conflict.existing.tags + conflict.incoming.tags
))
merged_extensions = {
**conflict.existing.extensions,
**conflict.incoming.extensions
}
# Track both sources
merged_extensions['dss'] = merged_extensions.get('dss', {})
merged_extensions['dss']['previousSources'] = [
conflict.existing.source,
conflict.incoming.source
]
return DesignToken(
name=conflict.incoming.name,
value=conflict.incoming.value,
type=conflict.incoming.type or conflict.existing.type,
description=conflict.incoming.description or conflict.existing.description,
source=conflict.incoming.source,
source_file=conflict.incoming.source_file,
source_line=conflict.incoming.source_line,
original_name=conflict.incoming.original_name,
original_value=conflict.incoming.original_value,
category=conflict.incoming.category or conflict.existing.category,
tags=merged_tags,
deprecated=conflict.incoming.deprecated or conflict.existing.deprecated,
deprecated_message=conflict.incoming.deprecated_message or conflict.existing.deprecated_message,
version=conflict.incoming.version,
updated_at=datetime.now(),
extensions=merged_extensions,
)
class TokenDiff:
"""
Compare two token collections and find differences.
"""
@staticmethod
def diff(
source: TokenCollection,
target: TokenCollection
) -> Dict[str, List]:
"""
Compare two token collections.
Returns:
Dict with 'added', 'removed', 'changed', 'unchanged' lists
"""
source_by_name = {t.normalize_name(): t for t in source.tokens}
target_by_name = {t.normalize_name(): t for t in target.tokens}
source_names = set(source_by_name.keys())
target_names = set(target_by_name.keys())
result = {
'added': [], # In target but not source
'removed': [], # In source but not target
'changed': [], # In both but different value
'unchanged': [], # In both with same value
}
# Find added (in target, not in source)
for name in target_names - source_names:
result['added'].append(target_by_name[name])
# Find removed (in source, not in target)
for name in source_names - target_names:
result['removed'].append(source_by_name[name])
# Find changed/unchanged (in both)
for name in source_names & target_names:
source_token = source_by_name[name]
target_token = target_by_name[name]
if str(source_token.value) != str(target_token.value):
result['changed'].append({
'name': name,
'old_value': source_token.value,
'new_value': target_token.value,
'source_token': source_token,
'target_token': target_token,
})
else:
result['unchanged'].append(source_token)
return result
@staticmethod
def summary(diff_result: Dict[str, List]) -> str:
"""Generate human-readable diff summary."""
lines = ["Token Diff Summary:", "=" * 40]
if diff_result['added']:
lines.append(f"\n+ Added ({len(diff_result['added'])}):")
for token in diff_result['added'][:10]:
lines.append(f" + {token.name}: {token.value}")
if len(diff_result['added']) > 10:
lines.append(f" ... and {len(diff_result['added']) - 10} more")
if diff_result['removed']:
lines.append(f"\n- Removed ({len(diff_result['removed'])}):")
for token in diff_result['removed'][:10]:
lines.append(f" - {token.name}: {token.value}")
if len(diff_result['removed']) > 10:
lines.append(f" ... and {len(diff_result['removed']) - 10} more")
if diff_result['changed']:
lines.append(f"\n~ Changed ({len(diff_result['changed'])}):")
for change in diff_result['changed'][:10]:
lines.append(
f" ~ {change['name']}: {change['old_value']}{change['new_value']}"
)
if len(diff_result['changed']) > 10:
lines.append(f" ... and {len(diff_result['changed']) - 10} more")
lines.append(f"\n Unchanged: {len(diff_result['unchanged'])}")
return "\n".join(lines)

289
tools/ingest/scss.py Normal file
View File

@@ -0,0 +1,289 @@
"""
SCSS Token Source
Extracts design tokens from SCSS/Sass variables.
Supports $variable declarations and @use module variables.
"""
import re
from pathlib import Path
from typing import List, Dict, Optional
from .base import DesignToken, TokenCollection, TokenSource
class SCSSTokenSource(TokenSource):
"""
Extract tokens from SCSS/Sass files.
Parses:
- $variable: value;
- $variable: value !default;
- // Comment descriptions
- @use module variables
- Maps: $colors: (primary: #3B82F6, secondary: #10B981);
"""
@property
def source_type(self) -> str:
return "scss"
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from SCSS file or content.
Args:
source: File path or SCSS content string
Returns:
TokenCollection with extracted tokens
"""
if self._is_file_path(source):
file_path = Path(source)
if not file_path.exists():
raise FileNotFoundError(f"SCSS file not found: {source}")
content = file_path.read_text(encoding="utf-8")
source_file = str(file_path.absolute())
else:
content = source
source_file = "<inline>"
tokens = []
# Extract simple variables
tokens.extend(self._parse_variables(content, source_file))
# Extract map variables
tokens.extend(self._parse_maps(content, source_file))
return TokenCollection(
tokens=tokens,
name=f"SCSS Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
sources=[self._create_source_id(source_file)],
)
def _is_file_path(self, source: str) -> bool:
"""Check if source looks like a file path."""
if '$' in source and ':' in source:
return False
if source.endswith('.scss') or source.endswith('.sass'):
return True
return Path(source).exists()
def _parse_variables(self, content: str, source_file: str) -> List[DesignToken]:
"""Parse simple $variable declarations."""
tokens = []
lines = content.split('\n')
# Pattern for variable declarations
var_pattern = re.compile(
r'^\s*'
r'(\$[\w-]+)\s*:\s*' # Variable name
r'([^;!]+)' # Value
r'(\s*!default)?' # Optional !default
r'\s*;',
re.MULTILINE
)
# Track comments for descriptions
prev_comment = ""
for i, line in enumerate(lines, 1):
# Check for comment
comment_match = re.match(r'^\s*//\s*(.+)$', line)
if comment_match:
prev_comment = comment_match.group(1).strip()
continue
# Check for variable
var_match = var_pattern.match(line)
if var_match:
var_name = var_match.group(1)
var_value = var_match.group(2).strip()
is_default = bool(var_match.group(3))
# Skip if value is a map (handled separately)
if var_value.startswith('(') and var_value.endswith(')'):
prev_comment = ""
continue
# Skip if value references another variable that we can't resolve
if var_value.startswith('$') and '(' not in var_value:
# It's a simple variable reference, try to extract
pass
token = DesignToken(
name=self._normalize_var_name(var_name),
value=self._process_value(var_value),
description=prev_comment,
source=self._create_source_id(source_file, i),
source_file=source_file,
source_line=i,
original_name=var_name,
original_value=var_value,
)
if is_default:
token.tags.append("default")
tokens.append(token)
prev_comment = ""
else:
# Reset comment if line doesn't match
if line.strip() and not line.strip().startswith('//'):
prev_comment = ""
return tokens
def _parse_maps(self, content: str, source_file: str) -> List[DesignToken]:
"""Parse SCSS map declarations."""
tokens = []
# Pattern for map declarations (handles multi-line)
map_pattern = re.compile(
r'\$(\w[\w-]*)\s*:\s*\(([\s\S]*?)\)\s*;',
re.MULTILINE
)
for match in map_pattern.finditer(content):
map_name = match.group(1)
map_content = match.group(2)
# Get line number
line_num = content[:match.start()].count('\n') + 1
# Parse map entries
entries = self._parse_map_entries(map_content)
for key, value in entries.items():
token = DesignToken(
name=f"{self._normalize_var_name('$' + map_name)}.{key}",
value=self._process_value(value),
source=self._create_source_id(source_file, line_num),
source_file=source_file,
source_line=line_num,
original_name=f"${map_name}.{key}",
original_value=value,
)
token.tags.append("from-map")
tokens.append(token)
return tokens
def _parse_map_entries(self, map_content: str) -> Dict[str, str]:
"""Parse entries from a SCSS map."""
entries = {}
# Handle nested maps and simple key-value pairs
# This is a simplified parser for common cases
# Remove comments
map_content = re.sub(r'//[^\n]*', '', map_content)
# Split by comma (not inside parentheses)
depth = 0
current = ""
parts = []
for char in map_content:
if char == '(':
depth += 1
current += char
elif char == ')':
depth -= 1
current += char
elif char == ',' and depth == 0:
parts.append(current.strip())
current = ""
else:
current += char
if current.strip():
parts.append(current.strip())
# Parse each part
for part in parts:
if ':' in part:
key, value = part.split(':', 1)
key = key.strip().strip('"\'')
value = value.strip()
entries[key] = value
return entries
def _normalize_var_name(self, var_name: str) -> str:
"""Convert SCSS variable name to token name."""
# Remove $ prefix
name = var_name.lstrip('$')
# Convert kebab-case and underscores to dots
name = re.sub(r'[-_]', '.', name)
return name.lower()
def _process_value(self, value: str) -> str:
"""Process SCSS value for token storage."""
value = value.strip()
# Handle function calls (keep as-is for now)
if '(' in value and ')' in value:
return value
# Handle quotes
if (value.startswith('"') and value.endswith('"')) or \
(value.startswith("'") and value.endswith("'")):
return value[1:-1]
return value
class SCSSVariableResolver:
"""
Resolve SCSS variable references.
Builds a dependency graph and resolves $var references to actual values.
"""
def __init__(self):
self.variables: Dict[str, str] = {}
self.resolved: Dict[str, str] = {}
def add_variable(self, name: str, value: str) -> None:
"""Add a variable to the resolver."""
self.variables[name] = value
def resolve(self, name: str) -> Optional[str]:
"""Resolve a variable to its final value."""
if name in self.resolved:
return self.resolved[name]
value = self.variables.get(name)
if not value:
return None
# Check if value references other variables
if '$' in value:
resolved_value = self._resolve_references(value)
self.resolved[name] = resolved_value
return resolved_value
self.resolved[name] = value
return value
def _resolve_references(self, value: str, depth: int = 0) -> str:
"""Recursively resolve variable references in a value."""
if depth > 10: # Prevent infinite loops
return value
# Find variable references
var_pattern = re.compile(r'\$[\w-]+')
def replace_var(match):
var_name = match.group(0)
resolved = self.resolve(var_name.lstrip('$'))
return resolved if resolved else var_name
return var_pattern.sub(replace_var, value)
def resolve_all(self) -> Dict[str, str]:
"""Resolve all variables."""
for name in self.variables:
self.resolve(name)
return self.resolved

330
tools/ingest/tailwind.py Normal file
View File

@@ -0,0 +1,330 @@
"""
Tailwind Token Source
Extracts design tokens from Tailwind CSS configuration files.
Supports tailwind.config.js/ts and CSS-based Tailwind v4 configurations.
"""
import re
import json
from pathlib import Path
from typing import List, Dict, Any, Optional
from .base import DesignToken, TokenCollection, TokenSource, TokenCategory
class TailwindTokenSource(TokenSource):
"""
Extract tokens from Tailwind CSS configuration.
Parses:
- tailwind.config.js/ts (theme and extend sections)
- Tailwind v4 CSS-based configuration
- CSS custom properties from Tailwind output
"""
# Tailwind category mappings
TAILWIND_CATEGORIES = {
'colors': TokenCategory.COLORS,
'backgroundColor': TokenCategory.COLORS,
'textColor': TokenCategory.COLORS,
'borderColor': TokenCategory.COLORS,
'spacing': TokenCategory.SPACING,
'padding': TokenCategory.SPACING,
'margin': TokenCategory.SPACING,
'gap': TokenCategory.SPACING,
'fontSize': TokenCategory.TYPOGRAPHY,
'fontFamily': TokenCategory.TYPOGRAPHY,
'fontWeight': TokenCategory.TYPOGRAPHY,
'lineHeight': TokenCategory.TYPOGRAPHY,
'letterSpacing': TokenCategory.TYPOGRAPHY,
'width': TokenCategory.SIZING,
'height': TokenCategory.SIZING,
'maxWidth': TokenCategory.SIZING,
'maxHeight': TokenCategory.SIZING,
'minWidth': TokenCategory.SIZING,
'minHeight': TokenCategory.SIZING,
'borderRadius': TokenCategory.BORDERS,
'borderWidth': TokenCategory.BORDERS,
'boxShadow': TokenCategory.SHADOWS,
'dropShadow': TokenCategory.SHADOWS,
'opacity': TokenCategory.OPACITY,
'zIndex': TokenCategory.Z_INDEX,
'transitionDuration': TokenCategory.MOTION,
'transitionTimingFunction': TokenCategory.MOTION,
'animation': TokenCategory.MOTION,
'screens': TokenCategory.BREAKPOINTS,
}
@property
def source_type(self) -> str:
return "tailwind"
async def extract(self, source: str) -> TokenCollection:
"""
Extract tokens from Tailwind config.
Args:
source: Path to tailwind.config.js/ts or directory containing it
Returns:
TokenCollection with extracted tokens
"""
config_path = self._find_config(source)
if not config_path:
raise FileNotFoundError(f"Tailwind config not found in: {source}")
content = config_path.read_text(encoding="utf-8")
source_file = str(config_path.absolute())
# Parse based on file type
if config_path.suffix in ('.js', '.cjs', '.mjs', '.ts'):
tokens = self._parse_js_config(content, source_file)
elif config_path.suffix == '.css':
tokens = self._parse_css_config(content, source_file)
else:
tokens = []
return TokenCollection(
tokens=tokens,
name=f"Tailwind Tokens from {config_path.name}",
sources=[self._create_source_id(source_file)],
)
def _find_config(self, source: str) -> Optional[Path]:
"""Find Tailwind config file."""
path = Path(source)
# If it's a file, use it directly
if path.is_file():
return path
# If it's a directory, look for config files
if path.is_dir():
config_names = [
'tailwind.config.js',
'tailwind.config.cjs',
'tailwind.config.mjs',
'tailwind.config.ts',
]
for name in config_names:
config_path = path / name
if config_path.exists():
return config_path
return None
def _parse_js_config(self, content: str, source_file: str) -> List[DesignToken]:
"""Parse JavaScript/TypeScript Tailwind config."""
tokens = []
# Extract theme object using regex (simplified parsing)
# This handles common patterns but may not cover all edge cases
# Look for theme: { ... } or theme.extend: { ... }
theme_match = re.search(
r'theme\s*:\s*\{([\s\S]*?)\n\s*\}(?=\s*[,}])',
content
)
extend_match = re.search(
r'extend\s*:\s*\{([\s\S]*?)\n\s{4}\}',
content
)
if extend_match:
theme_content = extend_match.group(1)
tokens.extend(self._parse_theme_object(theme_content, source_file, "extend"))
if theme_match and not extend_match:
theme_content = theme_match.group(1)
tokens.extend(self._parse_theme_object(theme_content, source_file, "theme"))
return tokens
def _parse_theme_object(self, content: str, source_file: str, prefix: str) -> List[DesignToken]:
"""Parse theme object content."""
tokens = []
# Find property blocks like: colors: { primary: '#3B82F6', ... }
prop_pattern = re.compile(
r"(\w+)\s*:\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}",
re.MULTILINE
)
for match in prop_pattern.finditer(content):
category_name = match.group(1)
category_content = match.group(2)
category = self.TAILWIND_CATEGORIES.get(
category_name, TokenCategory.OTHER
)
# Parse values in this category
tokens.extend(
self._parse_category_values(
category_name,
category_content,
source_file,
category
)
)
return tokens
def _parse_category_values(
self,
category_name: str,
content: str,
source_file: str,
category: TokenCategory
) -> List[DesignToken]:
"""Parse values within a category."""
tokens = []
# Match key: value pairs
# Handles: key: 'value', key: "value", key: value, 'key': value
value_pattern = re.compile(
r"['\"]?(\w[\w-]*)['\"]?\s*:\s*['\"]?([^,'\"}\n]+)['\"]?",
)
for match in value_pattern.finditer(content):
key = match.group(1)
value = match.group(2).strip()
# Skip function calls and complex values for now
if '(' in value or '{' in value:
continue
# Skip references to other values
if value.startswith('colors.') or value.startswith('theme('):
continue
token = DesignToken(
name=f"{category_name}.{key}",
value=value,
source=self._create_source_id(source_file),
source_file=source_file,
original_name=f"{category_name}.{key}",
original_value=value,
category=category,
)
token.tags.append("tailwind")
tokens.append(token)
return tokens
def _parse_css_config(self, content: str, source_file: str) -> List[DesignToken]:
"""Parse Tailwind v4 CSS-based configuration."""
tokens = []
# Tailwind v4 uses @theme directive
theme_match = re.search(
r'@theme\s*\{([\s\S]*?)\}',
content
)
if theme_match:
theme_content = theme_match.group(1)
# Parse CSS custom properties
var_pattern = re.compile(
r'(--[\w-]+)\s*:\s*([^;]+);'
)
for match in var_pattern.finditer(theme_content):
var_name = match.group(1)
var_value = match.group(2).strip()
# Determine category from variable name
category = self._category_from_var_name(var_name)
token = DesignToken(
name=self._normalize_var_name(var_name),
value=var_value,
source=self._create_source_id(source_file),
source_file=source_file,
original_name=var_name,
original_value=var_value,
category=category,
)
token.tags.append("tailwind-v4")
tokens.append(token)
return tokens
def _normalize_var_name(self, var_name: str) -> str:
"""Convert CSS variable name to token name."""
name = var_name.lstrip('-')
name = name.replace('-', '.')
return name.lower()
def _category_from_var_name(self, var_name: str) -> TokenCategory:
"""Determine category from variable name."""
name_lower = var_name.lower()
if 'color' in name_lower or 'bg' in name_lower:
return TokenCategory.COLORS
if 'spacing' in name_lower or 'gap' in name_lower:
return TokenCategory.SPACING
if 'font' in name_lower or 'text' in name_lower:
return TokenCategory.TYPOGRAPHY
if 'radius' in name_lower or 'border' in name_lower:
return TokenCategory.BORDERS
if 'shadow' in name_lower:
return TokenCategory.SHADOWS
return TokenCategory.OTHER
class TailwindClassExtractor:
"""
Extract Tailwind class usage from source files.
Identifies Tailwind utility classes for analysis and migration.
"""
# Common Tailwind class prefixes
TAILWIND_PREFIXES = [
'bg-', 'text-', 'border-', 'ring-',
'p-', 'px-', 'py-', 'pt-', 'pr-', 'pb-', 'pl-',
'm-', 'mx-', 'my-', 'mt-', 'mr-', 'mb-', 'ml-',
'w-', 'h-', 'min-w-', 'min-h-', 'max-w-', 'max-h-',
'flex-', 'grid-', 'gap-',
'font-', 'text-', 'leading-', 'tracking-',
'rounded-', 'shadow-', 'opacity-',
'z-', 'transition-', 'duration-', 'ease-',
]
async def extract_usage(self, source: str) -> Dict[str, List[str]]:
"""
Extract Tailwind class usage from file.
Returns dict mapping class categories to list of used classes.
"""
if Path(source).exists():
content = Path(source).read_text(encoding="utf-8")
else:
content = source
usage: Dict[str, List[str]] = {}
# Find className or class attributes
class_pattern = re.compile(
r'(?:className|class)\s*=\s*["\']([^"\']+)["\']'
)
for match in class_pattern.finditer(content):
classes = match.group(1).split()
for cls in classes:
# Check if it's a Tailwind class
for prefix in self.TAILWIND_PREFIXES:
if cls.startswith(prefix):
category = prefix.rstrip('-')
if category not in usage:
usage[category] = []
if cls not in usage[category]:
usage[category].append(cls)
break
return usage