Initial commit: Clean DSS implementation
Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm
Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)
Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability
Migration completed: $(date)
🤖 Clean migration with full functionality preserved
This commit is contained in:
25
tools/ingest/__init__.py
Normal file
25
tools/ingest/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
DSS Token Ingestion Module
|
||||
|
||||
Multi-source design token extraction and normalization.
|
||||
Supports: Figma, CSS, SCSS, Tailwind, JSON/YAML, styled-components
|
||||
"""
|
||||
|
||||
from .base import DesignToken, TokenSource, TokenCollection
|
||||
from .css import CSSTokenSource
|
||||
from .scss import SCSSTokenSource
|
||||
from .tailwind import TailwindTokenSource
|
||||
from .json_tokens import JSONTokenSource
|
||||
from .merge import TokenMerger, MergeStrategy
|
||||
|
||||
__all__ = [
|
||||
'DesignToken',
|
||||
'TokenSource',
|
||||
'TokenCollection',
|
||||
'CSSTokenSource',
|
||||
'SCSSTokenSource',
|
||||
'TailwindTokenSource',
|
||||
'JSONTokenSource',
|
||||
'TokenMerger',
|
||||
'MergeStrategy',
|
||||
]
|
||||
462
tools/ingest/base.py
Normal file
462
tools/ingest/base.py
Normal file
@@ -0,0 +1,462 @@
|
||||
"""
|
||||
Base classes for token ingestion.
|
||||
|
||||
Defines the DesignToken model following W3C Design Tokens format
|
||||
and the TokenSource abstract class for all ingestors.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
class TokenType(str, Enum):
|
||||
"""W3C Design Token types."""
|
||||
COLOR = "color"
|
||||
DIMENSION = "dimension"
|
||||
FONT_FAMILY = "fontFamily"
|
||||
FONT_WEIGHT = "fontWeight"
|
||||
FONT_SIZE = "fontSize"
|
||||
LINE_HEIGHT = "lineHeight"
|
||||
LETTER_SPACING = "letterSpacing"
|
||||
DURATION = "duration"
|
||||
CUBIC_BEZIER = "cubicBezier"
|
||||
NUMBER = "number"
|
||||
STRING = "string"
|
||||
SHADOW = "shadow"
|
||||
BORDER = "border"
|
||||
GRADIENT = "gradient"
|
||||
TRANSITION = "transition"
|
||||
COMPOSITE = "composite"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class TokenCategory(str, Enum):
|
||||
"""Token categories for organization."""
|
||||
COLORS = "colors"
|
||||
SPACING = "spacing"
|
||||
TYPOGRAPHY = "typography"
|
||||
SIZING = "sizing"
|
||||
BORDERS = "borders"
|
||||
SHADOWS = "shadows"
|
||||
EFFECTS = "effects"
|
||||
MOTION = "motion"
|
||||
BREAKPOINTS = "breakpoints"
|
||||
Z_INDEX = "z-index"
|
||||
OPACITY = "opacity"
|
||||
OTHER = "other"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DesignToken:
|
||||
"""
|
||||
W3C Design Token representation.
|
||||
|
||||
Follows the W3C Design Tokens Community Group format with
|
||||
additional metadata for source tracking and enterprise use.
|
||||
"""
|
||||
# Core properties (W3C spec)
|
||||
name: str # e.g., "color.primary.500"
|
||||
value: Any # e.g., "#3B82F6" or {"r": 59, "g": 130, "b": 246}
|
||||
type: TokenType = TokenType.UNKNOWN
|
||||
description: str = ""
|
||||
|
||||
# Source attribution
|
||||
source: str = "" # e.g., "figma:abc123", "css:tokens.css:12"
|
||||
source_file: str = "" # Original file path
|
||||
source_line: int = 0 # Line number in source
|
||||
original_name: str = "" # Name before normalization
|
||||
original_value: str = "" # Value before processing
|
||||
|
||||
# Organization
|
||||
category: TokenCategory = TokenCategory.OTHER
|
||||
tags: List[str] = field(default_factory=list)
|
||||
group: str = "" # Logical grouping (e.g., "brand", "semantic")
|
||||
|
||||
# State
|
||||
deprecated: bool = False
|
||||
deprecated_message: str = ""
|
||||
|
||||
# Versioning
|
||||
version: str = "1.0.0"
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
updated_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
# Extensions (for custom metadata)
|
||||
extensions: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Normalize and validate token after creation."""
|
||||
if not self.original_name:
|
||||
self.original_name = self.name
|
||||
if not self.original_value:
|
||||
self.original_value = str(self.value)
|
||||
|
||||
# Auto-detect type if unknown
|
||||
if self.type == TokenType.UNKNOWN:
|
||||
self.type = self._detect_type()
|
||||
|
||||
# Auto-detect category if other
|
||||
if self.category == TokenCategory.OTHER:
|
||||
self.category = self._detect_category()
|
||||
|
||||
def _detect_type(self) -> TokenType:
|
||||
"""Detect token type from value."""
|
||||
value_str = str(self.value).lower().strip()
|
||||
|
||||
# Color patterns
|
||||
if re.match(r'^#[0-9a-f]{3,8}$', value_str):
|
||||
return TokenType.COLOR
|
||||
if re.match(r'^rgb[a]?\s*\(', value_str):
|
||||
return TokenType.COLOR
|
||||
if re.match(r'^hsl[a]?\s*\(', value_str):
|
||||
return TokenType.COLOR
|
||||
if value_str in ('transparent', 'currentcolor', 'inherit'):
|
||||
return TokenType.COLOR
|
||||
|
||||
# Dimension patterns
|
||||
if re.match(r'^-?\d+(\.\d+)?(px|rem|em|%|vh|vw|ch|ex|vmin|vmax)$', value_str):
|
||||
return TokenType.DIMENSION
|
||||
|
||||
# Duration patterns
|
||||
if re.match(r'^\d+(\.\d+)?(ms|s)$', value_str):
|
||||
return TokenType.DURATION
|
||||
|
||||
# Number patterns
|
||||
if re.match(r'^-?\d+(\.\d+)?$', value_str):
|
||||
return TokenType.NUMBER
|
||||
|
||||
# Font family (contains quotes or commas)
|
||||
if ',' in value_str or '"' in value_str or "'" in value_str:
|
||||
if 'sans' in value_str or 'serif' in value_str or 'mono' in value_str:
|
||||
return TokenType.FONT_FAMILY
|
||||
|
||||
# Font weight
|
||||
if value_str in ('normal', 'bold', 'lighter', 'bolder') or \
|
||||
re.match(r'^[1-9]00$', value_str):
|
||||
return TokenType.FONT_WEIGHT
|
||||
|
||||
# Shadow
|
||||
if 'shadow' in self.name.lower() or \
|
||||
re.match(r'^-?\d+.*\s+-?\d+.*\s+-?\d+', value_str):
|
||||
return TokenType.SHADOW
|
||||
|
||||
return TokenType.STRING
|
||||
|
||||
def _detect_category(self) -> TokenCategory:
|
||||
"""Detect category from token name."""
|
||||
name_lower = self.name.lower()
|
||||
|
||||
# Check name patterns
|
||||
patterns = {
|
||||
TokenCategory.COLORS: ['color', 'bg', 'background', 'text', 'border-color', 'fill', 'stroke'],
|
||||
TokenCategory.SPACING: ['space', 'spacing', 'gap', 'margin', 'padding', 'inset'],
|
||||
TokenCategory.TYPOGRAPHY: ['font', 'text', 'line-height', 'letter-spacing', 'typography'],
|
||||
TokenCategory.SIZING: ['size', 'width', 'height', 'min-', 'max-'],
|
||||
TokenCategory.BORDERS: ['border', 'radius', 'outline'],
|
||||
TokenCategory.SHADOWS: ['shadow', 'elevation'],
|
||||
TokenCategory.EFFECTS: ['blur', 'opacity', 'filter', 'backdrop'],
|
||||
TokenCategory.MOTION: ['transition', 'animation', 'duration', 'delay', 'timing', 'ease'],
|
||||
TokenCategory.BREAKPOINTS: ['breakpoint', 'screen', 'media'],
|
||||
TokenCategory.Z_INDEX: ['z-index', 'z-', 'layer'],
|
||||
}
|
||||
|
||||
for category, keywords in patterns.items():
|
||||
if any(kw in name_lower for kw in keywords):
|
||||
return category
|
||||
|
||||
# Check by type
|
||||
if self.type == TokenType.COLOR:
|
||||
return TokenCategory.COLORS
|
||||
if self.type in (TokenType.FONT_FAMILY, TokenType.FONT_WEIGHT, TokenType.FONT_SIZE, TokenType.LINE_HEIGHT):
|
||||
return TokenCategory.TYPOGRAPHY
|
||||
if self.type == TokenType.DURATION:
|
||||
return TokenCategory.MOTION
|
||||
if self.type == TokenType.SHADOW:
|
||||
return TokenCategory.SHADOWS
|
||||
|
||||
return TokenCategory.OTHER
|
||||
|
||||
def normalize_name(self, separator: str = ".") -> str:
|
||||
"""
|
||||
Normalize token name to consistent format.
|
||||
|
||||
Converts various formats to dot-notation:
|
||||
- kebab-case: color-primary-500 -> color.primary.500
|
||||
- snake_case: color_primary_500 -> color.primary.500
|
||||
- camelCase: colorPrimary500 -> color.primary.500
|
||||
"""
|
||||
name = self.name
|
||||
|
||||
# Handle camelCase
|
||||
name = re.sub(r'([a-z])([A-Z])', r'\1.\2', name)
|
||||
|
||||
# Replace separators
|
||||
name = name.replace('-', separator)
|
||||
name = name.replace('_', separator)
|
||||
name = name.replace('/', separator)
|
||||
|
||||
# Clean up multiple separators
|
||||
while separator * 2 in name:
|
||||
name = name.replace(separator * 2, separator)
|
||||
|
||||
return name.lower().strip(separator)
|
||||
|
||||
def to_css_var_name(self) -> str:
|
||||
"""Convert to CSS custom property name."""
|
||||
normalized = self.normalize_name("-")
|
||||
return f"--{normalized}"
|
||||
|
||||
def to_scss_var_name(self) -> str:
|
||||
"""Convert to SCSS variable name."""
|
||||
normalized = self.normalize_name("-")
|
||||
return f"${normalized}"
|
||||
|
||||
def to_js_name(self) -> str:
|
||||
"""Convert to JavaScript object key (camelCase)."""
|
||||
parts = self.normalize_name(".").split(".")
|
||||
if not parts:
|
||||
return ""
|
||||
result = parts[0]
|
||||
for part in parts[1:]:
|
||||
result += part.capitalize()
|
||||
return result
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary (W3C format)."""
|
||||
result = {
|
||||
"$value": self.value,
|
||||
"$type": self.type.value,
|
||||
}
|
||||
|
||||
if self.description:
|
||||
result["$description"] = self.description
|
||||
|
||||
if self.extensions:
|
||||
result["$extensions"] = self.extensions
|
||||
|
||||
# Add DSS metadata
|
||||
result["$extensions"] = result.get("$extensions", {})
|
||||
result["$extensions"]["dss"] = {
|
||||
"source": self.source,
|
||||
"sourceFile": self.source_file,
|
||||
"sourceLine": self.source_line,
|
||||
"originalName": self.original_name,
|
||||
"category": self.category.value,
|
||||
"tags": self.tags,
|
||||
"deprecated": self.deprecated,
|
||||
"version": self.version,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenCollection:
|
||||
"""
|
||||
Collection of design tokens with metadata.
|
||||
|
||||
Represents a complete set of tokens from a single source or merged sources.
|
||||
"""
|
||||
tokens: List[DesignToken] = field(default_factory=list)
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
version: str = "1.0.0"
|
||||
sources: List[str] = field(default_factory=list)
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.tokens)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.tokens)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
return self.tokens[key]
|
||||
# Allow access by token name
|
||||
for token in self.tokens:
|
||||
if token.name == key:
|
||||
return token
|
||||
raise KeyError(f"Token '{key}' not found")
|
||||
|
||||
def add(self, token: DesignToken) -> None:
|
||||
"""Add a token to the collection."""
|
||||
self.tokens.append(token)
|
||||
|
||||
def get(self, name: str) -> Optional[DesignToken]:
|
||||
"""Get token by name."""
|
||||
for token in self.tokens:
|
||||
if token.name == name:
|
||||
return token
|
||||
return None
|
||||
|
||||
def filter_by_category(self, category: TokenCategory) -> 'TokenCollection':
|
||||
"""Return new collection filtered by category."""
|
||||
filtered = [t for t in self.tokens if t.category == category]
|
||||
return TokenCollection(
|
||||
tokens=filtered,
|
||||
name=f"{self.name} ({category.value})",
|
||||
sources=self.sources,
|
||||
)
|
||||
|
||||
def filter_by_type(self, token_type: TokenType) -> 'TokenCollection':
|
||||
"""Return new collection filtered by type."""
|
||||
filtered = [t for t in self.tokens if t.type == token_type]
|
||||
return TokenCollection(
|
||||
tokens=filtered,
|
||||
name=f"{self.name} ({token_type.value})",
|
||||
sources=self.sources,
|
||||
)
|
||||
|
||||
def filter_by_source(self, source: str) -> 'TokenCollection':
|
||||
"""Return new collection filtered by source."""
|
||||
filtered = [t for t in self.tokens if source in t.source]
|
||||
return TokenCollection(
|
||||
tokens=filtered,
|
||||
name=f"{self.name} (from {source})",
|
||||
sources=[source],
|
||||
)
|
||||
|
||||
def get_categories(self) -> Set[TokenCategory]:
|
||||
"""Get all unique categories in collection."""
|
||||
return {t.category for t in self.tokens}
|
||||
|
||||
def get_types(self) -> Set[TokenType]:
|
||||
"""Get all unique types in collection."""
|
||||
return {t.type for t in self.tokens}
|
||||
|
||||
def get_duplicates(self) -> Dict[str, List[DesignToken]]:
|
||||
"""Find tokens with duplicate names."""
|
||||
seen: Dict[str, List[DesignToken]] = {}
|
||||
for token in self.tokens:
|
||||
if token.name not in seen:
|
||||
seen[token.name] = []
|
||||
seen[token.name].append(token)
|
||||
return {k: v for k, v in seen.items() if len(v) > 1}
|
||||
|
||||
def to_css(self) -> str:
|
||||
"""Export as CSS custom properties."""
|
||||
lines = [":root {"]
|
||||
for token in sorted(self.tokens, key=lambda t: t.name):
|
||||
var_name = token.to_css_var_name()
|
||||
if token.description:
|
||||
lines.append(f" /* {token.description} */")
|
||||
lines.append(f" {var_name}: {token.value};")
|
||||
lines.append("}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_scss(self) -> str:
|
||||
"""Export as SCSS variables."""
|
||||
lines = []
|
||||
for token in sorted(self.tokens, key=lambda t: t.name):
|
||||
var_name = token.to_scss_var_name()
|
||||
if token.description:
|
||||
lines.append(f"// {token.description}")
|
||||
lines.append(f"{var_name}: {token.value};")
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Export as W3C Design Tokens JSON."""
|
||||
result = {}
|
||||
for token in self.tokens:
|
||||
parts = token.normalize_name().split(".")
|
||||
current = result
|
||||
for part in parts[:-1]:
|
||||
if part not in current:
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
current[parts[-1]] = token.to_dict()
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
def to_typescript(self) -> str:
|
||||
"""Export as TypeScript constants."""
|
||||
lines = ["export const tokens = {"]
|
||||
for token in sorted(self.tokens, key=lambda t: t.name):
|
||||
js_name = token.to_js_name()
|
||||
value = f'"{token.value}"' if isinstance(token.value, str) else token.value
|
||||
if token.description:
|
||||
lines.append(f" /** {token.description} */")
|
||||
lines.append(f" {js_name}: {value},")
|
||||
lines.append("} as const;")
|
||||
lines.append("")
|
||||
lines.append("export type TokenKey = keyof typeof tokens;")
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_tailwind_config(self) -> str:
|
||||
"""Export as Tailwind config extend object."""
|
||||
# Group tokens by category for Tailwind structure
|
||||
colors = self.filter_by_category(TokenCategory.COLORS)
|
||||
spacing = self.filter_by_category(TokenCategory.SPACING)
|
||||
|
||||
lines = ["module.exports = {", " theme: {", " extend: {"]
|
||||
|
||||
if colors.tokens:
|
||||
lines.append(" colors: {")
|
||||
for token in colors.tokens:
|
||||
name = token.name.replace("color.", "").replace("colors.", "")
|
||||
lines.append(f' "{name}": "{token.value}",')
|
||||
lines.append(" },")
|
||||
|
||||
if spacing.tokens:
|
||||
lines.append(" spacing: {")
|
||||
for token in spacing.tokens:
|
||||
name = token.name.replace("spacing.", "").replace("space.", "")
|
||||
lines.append(f' "{name}": "{token.value}",')
|
||||
lines.append(" },")
|
||||
|
||||
lines.extend([" },", " },", "};"])
|
||||
return "\n".join(lines)
|
||||
|
||||
def summary(self) -> Dict[str, Any]:
|
||||
"""Get collection summary."""
|
||||
return {
|
||||
"total_tokens": len(self.tokens),
|
||||
"categories": {cat.value: len(self.filter_by_category(cat))
|
||||
for cat in self.get_categories()},
|
||||
"types": {t.value: len(self.filter_by_type(t))
|
||||
for t in self.get_types()},
|
||||
"sources": self.sources,
|
||||
"duplicates": len(self.get_duplicates()),
|
||||
}
|
||||
|
||||
|
||||
class TokenSource(ABC):
|
||||
"""
|
||||
Abstract base class for token sources.
|
||||
|
||||
All token ingestors must implement this interface.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def source_type(self) -> str:
|
||||
"""Return source type identifier (e.g., 'css', 'scss', 'figma')."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def extract(self, source: str) -> TokenCollection:
|
||||
"""
|
||||
Extract tokens from source.
|
||||
|
||||
Args:
|
||||
source: File path, URL, or content depending on source type
|
||||
|
||||
Returns:
|
||||
TokenCollection with extracted tokens
|
||||
"""
|
||||
pass
|
||||
|
||||
def _create_source_id(self, file_path: str, line: int = 0) -> str:
|
||||
"""Create source identifier string."""
|
||||
if line:
|
||||
return f"{self.source_type}:{file_path}:{line}"
|
||||
return f"{self.source_type}:{file_path}"
|
||||
282
tools/ingest/css.py
Normal file
282
tools/ingest/css.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
CSS Token Source
|
||||
|
||||
Extracts design tokens from CSS custom properties (CSS variables).
|
||||
Parses :root declarations and other CSS variable definitions.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory
|
||||
|
||||
|
||||
class CSSTokenSource(TokenSource):
|
||||
"""
|
||||
Extract tokens from CSS files.
|
||||
|
||||
Parses CSS custom properties defined in :root or other selectors.
|
||||
Supports:
|
||||
- :root { --color-primary: #3B82F6; }
|
||||
- [data-theme="dark"] { --color-primary: #60A5FA; }
|
||||
- Comments as descriptions
|
||||
"""
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "css"
|
||||
|
||||
async def extract(self, source: str) -> TokenCollection:
|
||||
"""
|
||||
Extract tokens from CSS file or content.
|
||||
|
||||
Args:
|
||||
source: File path or CSS content string
|
||||
|
||||
Returns:
|
||||
TokenCollection with extracted tokens
|
||||
"""
|
||||
# Determine if source is file path or content
|
||||
if self._is_file_path(source):
|
||||
file_path = Path(source)
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"CSS file not found: {source}")
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
source_file = str(file_path.absolute())
|
||||
else:
|
||||
content = source
|
||||
source_file = "<inline>"
|
||||
|
||||
tokens = self._parse_css(content, source_file)
|
||||
|
||||
return TokenCollection(
|
||||
tokens=tokens,
|
||||
name=f"CSS Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
|
||||
sources=[self._create_source_id(source_file)],
|
||||
)
|
||||
|
||||
def _is_file_path(self, source: str) -> bool:
|
||||
"""Check if source looks like a file path."""
|
||||
# If it contains CSS syntax, it's content
|
||||
if '{' in source or ':' in source and ';' in source:
|
||||
return False
|
||||
# If it ends with .css, it's a file
|
||||
if source.endswith('.css'):
|
||||
return True
|
||||
# If path exists, it's a file
|
||||
return Path(source).exists()
|
||||
|
||||
def _parse_css(self, content: str, source_file: str) -> List[DesignToken]:
|
||||
"""Parse CSS content and extract custom properties."""
|
||||
tokens = []
|
||||
|
||||
# Track line numbers
|
||||
lines = content.split('\n')
|
||||
line_map = self._build_line_map(content)
|
||||
|
||||
# Find all CSS variable declarations
|
||||
# Pattern matches: --var-name: value;
|
||||
var_pattern = re.compile(
|
||||
r'(\/\*[^*]*\*\/\s*)?' # Optional preceding comment
|
||||
r'(--[\w-]+)\s*:\s*' # Variable name
|
||||
r'([^;]+);', # Value
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
# Find variables in all rule blocks
|
||||
for match in var_pattern.finditer(content):
|
||||
comment = match.group(1)
|
||||
var_name = match.group(2)
|
||||
var_value = match.group(3).strip()
|
||||
|
||||
# Get line number
|
||||
pos = match.start()
|
||||
line_num = self._get_line_number(pos, line_map)
|
||||
|
||||
# Extract description from comment
|
||||
description = ""
|
||||
if comment:
|
||||
description = self._clean_comment(comment)
|
||||
|
||||
# Get context (selector)
|
||||
context = self._get_selector_context(content, pos)
|
||||
|
||||
# Create token
|
||||
token = DesignToken(
|
||||
name=self._normalize_var_name(var_name),
|
||||
value=var_value,
|
||||
description=description,
|
||||
source=self._create_source_id(source_file, line_num),
|
||||
source_file=source_file,
|
||||
source_line=line_num,
|
||||
original_name=var_name,
|
||||
original_value=var_value,
|
||||
)
|
||||
|
||||
# Add context as tag if not :root
|
||||
if context and context != ":root":
|
||||
token.tags.append(f"context:{context}")
|
||||
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
def _build_line_map(self, content: str) -> List[int]:
|
||||
"""Build map of character positions to line numbers."""
|
||||
line_map = []
|
||||
pos = 0
|
||||
for i, line in enumerate(content.split('\n'), 1):
|
||||
line_map.append(pos)
|
||||
pos += len(line) + 1 # +1 for newline
|
||||
return line_map
|
||||
|
||||
def _get_line_number(self, pos: int, line_map: List[int]) -> int:
|
||||
"""Get line number for character position."""
|
||||
for i, line_start in enumerate(line_map):
|
||||
if i + 1 < len(line_map):
|
||||
if line_start <= pos < line_map[i + 1]:
|
||||
return i + 1
|
||||
else:
|
||||
return i + 1
|
||||
return 1
|
||||
|
||||
def _normalize_var_name(self, var_name: str) -> str:
|
||||
"""Convert CSS variable name to token name."""
|
||||
# Remove -- prefix
|
||||
name = var_name.lstrip('-')
|
||||
# Convert kebab-case to dot notation
|
||||
name = name.replace('-', '.')
|
||||
return name
|
||||
|
||||
def _clean_comment(self, comment: str) -> str:
|
||||
"""Extract text from CSS comment."""
|
||||
if not comment:
|
||||
return ""
|
||||
# Remove /* and */
|
||||
text = re.sub(r'/\*|\*/', '', comment)
|
||||
# Clean whitespace
|
||||
text = ' '.join(text.split())
|
||||
return text.strip()
|
||||
|
||||
def _get_selector_context(self, content: str, pos: int) -> str:
|
||||
"""Get the CSS selector context for a variable."""
|
||||
# Find the opening brace before this position
|
||||
before = content[:pos]
|
||||
last_open = before.rfind('{')
|
||||
if last_open == -1:
|
||||
return ""
|
||||
|
||||
# Find the selector before the brace
|
||||
selector_part = before[:last_open]
|
||||
# Get last selector (after } or start)
|
||||
last_close = selector_part.rfind('}')
|
||||
if last_close != -1:
|
||||
selector_part = selector_part[last_close + 1:]
|
||||
|
||||
# Clean up
|
||||
selector = selector_part.strip()
|
||||
# Handle multi-line selectors
|
||||
selector = ' '.join(selector.split())
|
||||
return selector
|
||||
|
||||
|
||||
class CSSInlineExtractor:
|
||||
"""
|
||||
Extract inline styles from HTML/JSX for token candidate identification.
|
||||
|
||||
Finds style="" attributes and extracts values that could become tokens.
|
||||
"""
|
||||
|
||||
# Patterns for extracting inline styles
|
||||
STYLE_ATTR_PATTERN = re.compile(
|
||||
r'style\s*=\s*["\']([^"\']+)["\']',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# JSX style object pattern
|
||||
JSX_STYLE_PATTERN = re.compile(
|
||||
r'style\s*=\s*\{\{([^}]+)\}\}',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
async def extract_candidates(self, source: str) -> List[Tuple[str, str, int]]:
|
||||
"""
|
||||
Extract inline style values as token candidates.
|
||||
|
||||
Returns list of (property, value, line_number) tuples.
|
||||
"""
|
||||
candidates = []
|
||||
|
||||
# Determine if file or content
|
||||
if Path(source).exists():
|
||||
content = Path(source).read_text(encoding="utf-8")
|
||||
else:
|
||||
content = source
|
||||
|
||||
lines = content.split('\n')
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check HTML style attribute
|
||||
for match in self.STYLE_ATTR_PATTERN.finditer(line):
|
||||
style_content = match.group(1)
|
||||
for prop, value in self._parse_style_string(style_content):
|
||||
if self._is_token_candidate(value):
|
||||
candidates.append((prop, value, i))
|
||||
|
||||
# Check JSX style object
|
||||
for match in self.JSX_STYLE_PATTERN.finditer(line):
|
||||
style_content = match.group(1)
|
||||
for prop, value in self._parse_jsx_style(style_content):
|
||||
if self._is_token_candidate(value):
|
||||
candidates.append((prop, value, i))
|
||||
|
||||
return candidates
|
||||
|
||||
def _parse_style_string(self, style: str) -> List[Tuple[str, str]]:
|
||||
"""Parse CSS style string into property-value pairs."""
|
||||
pairs = []
|
||||
for declaration in style.split(';'):
|
||||
if ':' in declaration:
|
||||
prop, value = declaration.split(':', 1)
|
||||
pairs.append((prop.strip(), value.strip()))
|
||||
return pairs
|
||||
|
||||
def _parse_jsx_style(self, style: str) -> List[Tuple[str, str]]:
|
||||
"""Parse JSX style object into property-value pairs."""
|
||||
pairs = []
|
||||
# Simple parsing for common cases
|
||||
for part in style.split(','):
|
||||
if ':' in part:
|
||||
prop, value = part.split(':', 1)
|
||||
prop = prop.strip().strip('"\'')
|
||||
value = value.strip().strip('"\'')
|
||||
# Convert camelCase to kebab-case
|
||||
prop = re.sub(r'([a-z])([A-Z])', r'\1-\2', prop).lower()
|
||||
pairs.append((prop, value))
|
||||
return pairs
|
||||
|
||||
def _is_token_candidate(self, value: str) -> bool:
|
||||
"""Check if value should be extracted as a token."""
|
||||
value = value.strip().lower()
|
||||
|
||||
# Colors are always candidates
|
||||
if re.match(r'^#[0-9a-f]{3,8}$', value):
|
||||
return True
|
||||
if re.match(r'^rgb[a]?\s*\(', value):
|
||||
return True
|
||||
if re.match(r'^hsl[a]?\s*\(', value):
|
||||
return True
|
||||
|
||||
# Dimensions with common units
|
||||
if re.match(r'^\d+(\.\d+)?(px|rem|em|%)$', value):
|
||||
return True
|
||||
|
||||
# Skip variable references
|
||||
if value.startswith('var('):
|
||||
return False
|
||||
|
||||
# Skip inherit/initial/etc
|
||||
if value in ('inherit', 'initial', 'unset', 'auto', 'none'):
|
||||
return False
|
||||
|
||||
return False
|
||||
432
tools/ingest/json_tokens.py
Normal file
432
tools/ingest/json_tokens.py
Normal file
@@ -0,0 +1,432 @@
|
||||
"""
|
||||
JSON Token Source
|
||||
|
||||
Extracts design tokens from JSON/YAML files.
|
||||
Supports W3C Design Tokens format and Style Dictionary format.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory
|
||||
|
||||
|
||||
class JSONTokenSource(TokenSource):
|
||||
"""
|
||||
Extract tokens from JSON/YAML token files.
|
||||
|
||||
Supports:
|
||||
- W3C Design Tokens Community Group format
|
||||
- Style Dictionary format
|
||||
- Tokens Studio format
|
||||
- Figma Tokens plugin format
|
||||
- Generic nested JSON with $value
|
||||
"""
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "json"
|
||||
|
||||
async def extract(self, source: str) -> TokenCollection:
|
||||
"""
|
||||
Extract tokens from JSON file or content.
|
||||
|
||||
Args:
|
||||
source: File path or JSON content string
|
||||
|
||||
Returns:
|
||||
TokenCollection with extracted tokens
|
||||
"""
|
||||
if self._is_file_path(source):
|
||||
file_path = Path(source)
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Token file not found: {source}")
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
source_file = str(file_path.absolute())
|
||||
else:
|
||||
content = source
|
||||
source_file = "<inline>"
|
||||
|
||||
# Parse JSON
|
||||
try:
|
||||
data = json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON: {e}")
|
||||
|
||||
# Detect format and extract
|
||||
tokens = self._extract_tokens(data, source_file)
|
||||
|
||||
return TokenCollection(
|
||||
tokens=tokens,
|
||||
name=f"JSON Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
|
||||
sources=[self._create_source_id(source_file)],
|
||||
)
|
||||
|
||||
def _is_file_path(self, source: str) -> bool:
|
||||
"""Check if source looks like a file path."""
|
||||
if source.strip().startswith('{'):
|
||||
return False
|
||||
if source.endswith('.json') or source.endswith('.tokens.json'):
|
||||
return True
|
||||
return Path(source).exists()
|
||||
|
||||
def _extract_tokens(self, data: Dict, source_file: str) -> List[DesignToken]:
|
||||
"""Extract tokens from parsed JSON."""
|
||||
tokens = []
|
||||
|
||||
# Detect format
|
||||
if self._is_w3c_format(data):
|
||||
tokens = self._extract_w3c_tokens(data, source_file)
|
||||
elif self._is_style_dictionary_format(data):
|
||||
tokens = self._extract_style_dictionary_tokens(data, source_file)
|
||||
elif self._is_tokens_studio_format(data):
|
||||
tokens = self._extract_tokens_studio(data, source_file)
|
||||
else:
|
||||
# Generic nested format
|
||||
tokens = self._extract_nested_tokens(data, source_file)
|
||||
|
||||
return tokens
|
||||
|
||||
def _is_w3c_format(self, data: Dict) -> bool:
|
||||
"""Check if data follows W3C Design Tokens format."""
|
||||
# W3C format uses $value and $type
|
||||
def check_node(node: Any) -> bool:
|
||||
if isinstance(node, dict):
|
||||
if '$value' in node:
|
||||
return True
|
||||
return any(check_node(v) for v in node.values())
|
||||
return False
|
||||
return check_node(data)
|
||||
|
||||
def _is_style_dictionary_format(self, data: Dict) -> bool:
|
||||
"""Check if data follows Style Dictionary format."""
|
||||
# Style Dictionary uses 'value' without $
|
||||
def check_node(node: Any) -> bool:
|
||||
if isinstance(node, dict):
|
||||
if 'value' in node and '$value' not in node:
|
||||
return True
|
||||
return any(check_node(v) for v in node.values())
|
||||
return False
|
||||
return check_node(data)
|
||||
|
||||
def _is_tokens_studio_format(self, data: Dict) -> bool:
|
||||
"""Check if data follows Tokens Studio format."""
|
||||
# Tokens Studio has specific structure with sets
|
||||
return '$themes' in data or '$metadata' in data
|
||||
|
||||
def _extract_w3c_tokens(
|
||||
self,
|
||||
data: Dict,
|
||||
source_file: str,
|
||||
prefix: str = ""
|
||||
) -> List[DesignToken]:
|
||||
"""Extract tokens in W3C Design Tokens format."""
|
||||
tokens = []
|
||||
|
||||
for key, value in data.items():
|
||||
# Skip metadata keys
|
||||
if key.startswith('$'):
|
||||
continue
|
||||
|
||||
current_path = f"{prefix}.{key}" if prefix else key
|
||||
|
||||
if isinstance(value, dict):
|
||||
if '$value' in value:
|
||||
# This is a token
|
||||
token = self._create_w3c_token(
|
||||
current_path, value, source_file
|
||||
)
|
||||
tokens.append(token)
|
||||
else:
|
||||
# Nested group
|
||||
tokens.extend(
|
||||
self._extract_w3c_tokens(value, source_file, current_path)
|
||||
)
|
||||
|
||||
return tokens
|
||||
|
||||
def _create_w3c_token(
|
||||
self,
|
||||
name: str,
|
||||
data: Dict,
|
||||
source_file: str
|
||||
) -> DesignToken:
|
||||
"""Create token from W3C format node."""
|
||||
value = data.get('$value')
|
||||
token_type = self._parse_w3c_type(data.get('$type', ''))
|
||||
description = data.get('$description', '')
|
||||
|
||||
# Handle aliases/references
|
||||
if isinstance(value, str) and value.startswith('{') and value.endswith('}'):
|
||||
# This is a reference like {colors.primary}
|
||||
pass # Keep as-is for now
|
||||
|
||||
# Get extensions
|
||||
extensions = {}
|
||||
if '$extensions' in data:
|
||||
extensions = data['$extensions']
|
||||
|
||||
token = DesignToken(
|
||||
name=name,
|
||||
value=value,
|
||||
type=token_type,
|
||||
description=description,
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
extensions=extensions,
|
||||
)
|
||||
|
||||
# Check for deprecated
|
||||
if extensions.get('deprecated'):
|
||||
token.deprecated = True
|
||||
token.deprecated_message = extensions.get('deprecatedMessage', '')
|
||||
|
||||
return token
|
||||
|
||||
def _parse_w3c_type(self, type_str: str) -> TokenType:
|
||||
"""Convert W3C type string to TokenType."""
|
||||
type_map = {
|
||||
'color': TokenType.COLOR,
|
||||
'dimension': TokenType.DIMENSION,
|
||||
'fontFamily': TokenType.FONT_FAMILY,
|
||||
'fontWeight': TokenType.FONT_WEIGHT,
|
||||
'duration': TokenType.DURATION,
|
||||
'cubicBezier': TokenType.CUBIC_BEZIER,
|
||||
'number': TokenType.NUMBER,
|
||||
'shadow': TokenType.SHADOW,
|
||||
'border': TokenType.BORDER,
|
||||
'gradient': TokenType.GRADIENT,
|
||||
'transition': TokenType.TRANSITION,
|
||||
}
|
||||
return type_map.get(type_str, TokenType.UNKNOWN)
|
||||
|
||||
def _extract_style_dictionary_tokens(
|
||||
self,
|
||||
data: Dict,
|
||||
source_file: str,
|
||||
prefix: str = ""
|
||||
) -> List[DesignToken]:
|
||||
"""Extract tokens in Style Dictionary format."""
|
||||
tokens = []
|
||||
|
||||
for key, value in data.items():
|
||||
current_path = f"{prefix}.{key}" if prefix else key
|
||||
|
||||
if isinstance(value, dict):
|
||||
if 'value' in value:
|
||||
# This is a token
|
||||
token = DesignToken(
|
||||
name=current_path,
|
||||
value=value['value'],
|
||||
description=value.get('comment', value.get('description', '')),
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
# Handle attributes
|
||||
if 'attributes' in value:
|
||||
attrs = value['attributes']
|
||||
if 'category' in attrs:
|
||||
token.tags.append(f"category:{attrs['category']}")
|
||||
|
||||
token.tags.append("style-dictionary")
|
||||
tokens.append(token)
|
||||
else:
|
||||
# Nested group
|
||||
tokens.extend(
|
||||
self._extract_style_dictionary_tokens(
|
||||
value, source_file, current_path
|
||||
)
|
||||
)
|
||||
|
||||
return tokens
|
||||
|
||||
def _extract_tokens_studio(
|
||||
self,
|
||||
data: Dict,
|
||||
source_file: str
|
||||
) -> List[DesignToken]:
|
||||
"""Extract tokens from Tokens Studio format."""
|
||||
tokens = []
|
||||
|
||||
# Tokens Studio has token sets as top-level keys
|
||||
# Skip metadata keys
|
||||
for set_name, set_data in data.items():
|
||||
if set_name.startswith('$'):
|
||||
continue
|
||||
|
||||
if isinstance(set_data, dict):
|
||||
set_tokens = self._extract_tokens_studio_set(
|
||||
set_data, source_file, set_name
|
||||
)
|
||||
for token in set_tokens:
|
||||
token.group = set_name
|
||||
tokens.extend(set_tokens)
|
||||
|
||||
return tokens
|
||||
|
||||
def _extract_tokens_studio_set(
|
||||
self,
|
||||
data: Dict,
|
||||
source_file: str,
|
||||
prefix: str = ""
|
||||
) -> List[DesignToken]:
|
||||
"""Extract tokens from a Tokens Studio set."""
|
||||
tokens = []
|
||||
|
||||
for key, value in data.items():
|
||||
current_path = f"{prefix}.{key}" if prefix else key
|
||||
|
||||
if isinstance(value, dict):
|
||||
if 'value' in value and 'type' in value:
|
||||
# This is a token
|
||||
token = DesignToken(
|
||||
name=current_path,
|
||||
value=value['value'],
|
||||
type=self._parse_tokens_studio_type(value.get('type', '')),
|
||||
description=value.get('description', ''),
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
)
|
||||
token.tags.append("tokens-studio")
|
||||
tokens.append(token)
|
||||
else:
|
||||
# Nested group
|
||||
tokens.extend(
|
||||
self._extract_tokens_studio_set(
|
||||
value, source_file, current_path
|
||||
)
|
||||
)
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_tokens_studio_type(self, type_str: str) -> TokenType:
|
||||
"""Convert Tokens Studio type to TokenType."""
|
||||
type_map = {
|
||||
'color': TokenType.COLOR,
|
||||
'sizing': TokenType.DIMENSION,
|
||||
'spacing': TokenType.DIMENSION,
|
||||
'borderRadius': TokenType.DIMENSION,
|
||||
'borderWidth': TokenType.DIMENSION,
|
||||
'fontFamilies': TokenType.FONT_FAMILY,
|
||||
'fontWeights': TokenType.FONT_WEIGHT,
|
||||
'fontSizes': TokenType.FONT_SIZE,
|
||||
'lineHeights': TokenType.LINE_HEIGHT,
|
||||
'letterSpacing': TokenType.LETTER_SPACING,
|
||||
'paragraphSpacing': TokenType.DIMENSION,
|
||||
'boxShadow': TokenType.SHADOW,
|
||||
'opacity': TokenType.NUMBER,
|
||||
'dimension': TokenType.DIMENSION,
|
||||
'text': TokenType.STRING,
|
||||
'other': TokenType.STRING,
|
||||
}
|
||||
return type_map.get(type_str, TokenType.UNKNOWN)
|
||||
|
||||
def _extract_nested_tokens(
|
||||
self,
|
||||
data: Dict,
|
||||
source_file: str,
|
||||
prefix: str = ""
|
||||
) -> List[DesignToken]:
|
||||
"""Extract tokens from generic nested JSON."""
|
||||
tokens = []
|
||||
|
||||
for key, value in data.items():
|
||||
current_path = f"{prefix}.{key}" if prefix else key
|
||||
|
||||
if isinstance(value, dict):
|
||||
# Check if this looks like a token (has primitive values)
|
||||
has_nested = any(isinstance(v, dict) for v in value.values())
|
||||
|
||||
if not has_nested and len(value) <= 3:
|
||||
# Might be a simple token object
|
||||
if 'value' in value:
|
||||
tokens.append(DesignToken(
|
||||
name=current_path,
|
||||
value=value['value'],
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
))
|
||||
else:
|
||||
# Recurse
|
||||
tokens.extend(
|
||||
self._extract_nested_tokens(value, source_file, current_path)
|
||||
)
|
||||
else:
|
||||
# Recurse into nested object
|
||||
tokens.extend(
|
||||
self._extract_nested_tokens(value, source_file, current_path)
|
||||
)
|
||||
|
||||
elif isinstance(value, (str, int, float, bool)):
|
||||
# Simple value - treat as token
|
||||
tokens.append(DesignToken(
|
||||
name=current_path,
|
||||
value=value,
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
))
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
class TokenExporter:
|
||||
"""
|
||||
Export tokens to various JSON formats.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def to_w3c(collection: TokenCollection) -> str:
|
||||
"""Export to W3C Design Tokens format."""
|
||||
result = {}
|
||||
|
||||
for token in collection.tokens:
|
||||
parts = token.normalize_name().split('.')
|
||||
current = result
|
||||
|
||||
for part in parts[:-1]:
|
||||
if part not in current:
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
|
||||
current[parts[-1]] = {
|
||||
"$value": token.value,
|
||||
"$type": token.type.value,
|
||||
}
|
||||
|
||||
if token.description:
|
||||
current[parts[-1]]["$description"] = token.description
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@staticmethod
|
||||
def to_style_dictionary(collection: TokenCollection) -> str:
|
||||
"""Export to Style Dictionary format."""
|
||||
result = {}
|
||||
|
||||
for token in collection.tokens:
|
||||
parts = token.normalize_name().split('.')
|
||||
current = result
|
||||
|
||||
for part in parts[:-1]:
|
||||
if part not in current:
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
|
||||
current[parts[-1]] = {
|
||||
"value": token.value,
|
||||
}
|
||||
|
||||
if token.description:
|
||||
current[parts[-1]]["comment"] = token.description
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@staticmethod
|
||||
def to_flat(collection: TokenCollection) -> str:
|
||||
"""Export to flat JSON object."""
|
||||
result = {}
|
||||
for token in collection.tokens:
|
||||
result[token.name] = token.value
|
||||
return json.dumps(result, indent=2)
|
||||
447
tools/ingest/merge.py
Normal file
447
tools/ingest/merge.py
Normal file
@@ -0,0 +1,447 @@
|
||||
"""
|
||||
Token Merge Module
|
||||
|
||||
Merge tokens from multiple sources with conflict resolution strategies.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Optional, Callable, Tuple
|
||||
from .base import DesignToken, TokenCollection, TokenCategory
|
||||
|
||||
|
||||
class MergeStrategy(str, Enum):
|
||||
"""Token merge conflict resolution strategies."""
|
||||
|
||||
# Simple strategies
|
||||
FIRST = "first" # Keep first occurrence
|
||||
LAST = "last" # Keep last occurrence (override)
|
||||
ERROR = "error" # Raise error on conflict
|
||||
|
||||
# Value-based strategies
|
||||
PREFER_FIGMA = "prefer_figma" # Prefer Figma source
|
||||
PREFER_CODE = "prefer_code" # Prefer code sources (CSS, SCSS)
|
||||
PREFER_SPECIFIC = "prefer_specific" # Prefer more specific values
|
||||
|
||||
# Smart strategies
|
||||
MERGE_METADATA = "merge_metadata" # Merge metadata, keep latest value
|
||||
INTERACTIVE = "interactive" # Require user decision
|
||||
|
||||
|
||||
@dataclass
|
||||
class MergeConflict:
|
||||
"""Represents a token name conflict during merge."""
|
||||
token_name: str
|
||||
existing: DesignToken
|
||||
incoming: DesignToken
|
||||
resolution: Optional[str] = None
|
||||
resolved_token: Optional[DesignToken] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MergeResult:
|
||||
"""Result of a token merge operation."""
|
||||
collection: TokenCollection
|
||||
conflicts: List[MergeConflict] = field(default_factory=list)
|
||||
stats: Dict[str, int] = field(default_factory=dict)
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.stats:
|
||||
self.stats = {
|
||||
"total_tokens": 0,
|
||||
"new_tokens": 0,
|
||||
"updated_tokens": 0,
|
||||
"conflicts_resolved": 0,
|
||||
"conflicts_unresolved": 0,
|
||||
}
|
||||
|
||||
|
||||
class TokenMerger:
|
||||
"""
|
||||
Merge multiple token collections with conflict resolution.
|
||||
|
||||
Usage:
|
||||
merger = TokenMerger(strategy=MergeStrategy.LAST)
|
||||
result = merger.merge([collection1, collection2, collection3])
|
||||
"""
|
||||
|
||||
# Source priority for PREFER_* strategies
|
||||
SOURCE_PRIORITY = {
|
||||
"figma": 100,
|
||||
"css": 80,
|
||||
"scss": 80,
|
||||
"tailwind": 70,
|
||||
"json": 60,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
strategy: MergeStrategy = MergeStrategy.LAST,
|
||||
custom_resolver: Optional[Callable[[MergeConflict], DesignToken]] = None
|
||||
):
|
||||
"""
|
||||
Initialize merger.
|
||||
|
||||
Args:
|
||||
strategy: Default conflict resolution strategy
|
||||
custom_resolver: Optional custom conflict resolver function
|
||||
"""
|
||||
self.strategy = strategy
|
||||
self.custom_resolver = custom_resolver
|
||||
|
||||
def merge(
|
||||
self,
|
||||
collections: List[TokenCollection],
|
||||
normalize_names: bool = True
|
||||
) -> MergeResult:
|
||||
"""
|
||||
Merge multiple token collections.
|
||||
|
||||
Args:
|
||||
collections: List of TokenCollections to merge
|
||||
normalize_names: Whether to normalize token names before merging
|
||||
|
||||
Returns:
|
||||
MergeResult with merged collection and conflict information
|
||||
"""
|
||||
result = MergeResult(
|
||||
collection=TokenCollection(
|
||||
name="Merged Tokens",
|
||||
sources=[],
|
||||
)
|
||||
)
|
||||
|
||||
# Track tokens by normalized name
|
||||
tokens_by_name: Dict[str, DesignToken] = {}
|
||||
|
||||
for collection in collections:
|
||||
result.collection.sources.extend(collection.sources)
|
||||
|
||||
for token in collection.tokens:
|
||||
# Normalize name if requested
|
||||
name = token.normalize_name() if normalize_names else token.name
|
||||
|
||||
if name in tokens_by_name:
|
||||
# Conflict detected
|
||||
existing = tokens_by_name[name]
|
||||
conflict = MergeConflict(
|
||||
token_name=name,
|
||||
existing=existing,
|
||||
incoming=token,
|
||||
)
|
||||
|
||||
# Resolve conflict
|
||||
resolved = self._resolve_conflict(conflict)
|
||||
conflict.resolved_token = resolved
|
||||
|
||||
if resolved:
|
||||
tokens_by_name[name] = resolved
|
||||
result.stats["conflicts_resolved"] += 1
|
||||
result.stats["updated_tokens"] += 1
|
||||
else:
|
||||
result.stats["conflicts_unresolved"] += 1
|
||||
result.warnings.append(
|
||||
f"Unresolved conflict for token: {name}"
|
||||
)
|
||||
|
||||
result.conflicts.append(conflict)
|
||||
else:
|
||||
# New token
|
||||
tokens_by_name[name] = token
|
||||
result.stats["new_tokens"] += 1
|
||||
|
||||
# Build final collection
|
||||
result.collection.tokens = list(tokens_by_name.values())
|
||||
result.stats["total_tokens"] = len(result.collection.tokens)
|
||||
|
||||
return result
|
||||
|
||||
def _resolve_conflict(self, conflict: MergeConflict) -> Optional[DesignToken]:
|
||||
"""Resolve a single conflict based on strategy."""
|
||||
|
||||
# Try custom resolver first
|
||||
if self.custom_resolver:
|
||||
return self.custom_resolver(conflict)
|
||||
|
||||
# Apply strategy
|
||||
if self.strategy == MergeStrategy.FIRST:
|
||||
conflict.resolution = "kept_first"
|
||||
return conflict.existing
|
||||
|
||||
elif self.strategy == MergeStrategy.LAST:
|
||||
conflict.resolution = "used_last"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
|
||||
elif self.strategy == MergeStrategy.ERROR:
|
||||
conflict.resolution = "error"
|
||||
raise ValueError(
|
||||
f"Token conflict: {conflict.token_name} "
|
||||
f"(existing: {conflict.existing.source}, "
|
||||
f"incoming: {conflict.incoming.source})"
|
||||
)
|
||||
|
||||
elif self.strategy == MergeStrategy.PREFER_FIGMA:
|
||||
return self._prefer_source(conflict, "figma")
|
||||
|
||||
elif self.strategy == MergeStrategy.PREFER_CODE:
|
||||
return self._prefer_code_source(conflict)
|
||||
|
||||
elif self.strategy == MergeStrategy.PREFER_SPECIFIC:
|
||||
return self._prefer_specific_value(conflict)
|
||||
|
||||
elif self.strategy == MergeStrategy.MERGE_METADATA:
|
||||
return self._merge_metadata(conflict)
|
||||
|
||||
elif self.strategy == MergeStrategy.INTERACTIVE:
|
||||
# For interactive, we can't resolve automatically
|
||||
conflict.resolution = "needs_input"
|
||||
return None
|
||||
|
||||
return conflict.incoming
|
||||
|
||||
def _update_token(
|
||||
self,
|
||||
source: DesignToken,
|
||||
base: DesignToken
|
||||
) -> DesignToken:
|
||||
"""Create updated token preserving some base metadata."""
|
||||
# Create new token with source's value but enhanced metadata
|
||||
updated = DesignToken(
|
||||
name=source.name,
|
||||
value=source.value,
|
||||
type=source.type,
|
||||
description=source.description or base.description,
|
||||
source=source.source,
|
||||
source_file=source.source_file,
|
||||
source_line=source.source_line,
|
||||
original_name=source.original_name,
|
||||
original_value=source.original_value,
|
||||
category=source.category,
|
||||
tags=list(set(source.tags + base.tags)),
|
||||
deprecated=source.deprecated or base.deprecated,
|
||||
deprecated_message=source.deprecated_message or base.deprecated_message,
|
||||
version=source.version,
|
||||
updated_at=datetime.now(),
|
||||
extensions={**base.extensions, **source.extensions},
|
||||
)
|
||||
return updated
|
||||
|
||||
def _prefer_source(
|
||||
self,
|
||||
conflict: MergeConflict,
|
||||
preferred_source: str
|
||||
) -> DesignToken:
|
||||
"""Prefer token from specific source type."""
|
||||
existing_source = conflict.existing.source.split(':')[0]
|
||||
incoming_source = conflict.incoming.source.split(':')[0]
|
||||
|
||||
if incoming_source == preferred_source:
|
||||
conflict.resolution = f"preferred_{preferred_source}"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
elif existing_source == preferred_source:
|
||||
conflict.resolution = f"kept_{preferred_source}"
|
||||
return conflict.existing
|
||||
else:
|
||||
# Neither is preferred, use last
|
||||
conflict.resolution = "fallback_last"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
|
||||
def _prefer_code_source(self, conflict: MergeConflict) -> DesignToken:
|
||||
"""Prefer code sources (CSS, SCSS) over design sources."""
|
||||
code_sources = {"css", "scss", "tailwind"}
|
||||
|
||||
existing_source = conflict.existing.source.split(':')[0]
|
||||
incoming_source = conflict.incoming.source.split(':')[0]
|
||||
|
||||
existing_is_code = existing_source in code_sources
|
||||
incoming_is_code = incoming_source in code_sources
|
||||
|
||||
if incoming_is_code and not existing_is_code:
|
||||
conflict.resolution = "preferred_code"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
elif existing_is_code and not incoming_is_code:
|
||||
conflict.resolution = "kept_code"
|
||||
return conflict.existing
|
||||
else:
|
||||
# Both or neither are code, use priority
|
||||
return self._prefer_by_priority(conflict)
|
||||
|
||||
def _prefer_by_priority(self, conflict: MergeConflict) -> DesignToken:
|
||||
"""Choose based on source priority."""
|
||||
existing_source = conflict.existing.source.split(':')[0]
|
||||
incoming_source = conflict.incoming.source.split(':')[0]
|
||||
|
||||
existing_priority = self.SOURCE_PRIORITY.get(existing_source, 0)
|
||||
incoming_priority = self.SOURCE_PRIORITY.get(incoming_source, 0)
|
||||
|
||||
if incoming_priority > existing_priority:
|
||||
conflict.resolution = "higher_priority"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
else:
|
||||
conflict.resolution = "kept_priority"
|
||||
return conflict.existing
|
||||
|
||||
def _prefer_specific_value(self, conflict: MergeConflict) -> DesignToken:
|
||||
"""Prefer more specific/concrete values."""
|
||||
existing_value = str(conflict.existing.value).lower()
|
||||
incoming_value = str(conflict.incoming.value).lower()
|
||||
|
||||
# Prefer concrete values over variables/references
|
||||
existing_is_var = existing_value.startswith('var(') or \
|
||||
existing_value.startswith('$') or \
|
||||
existing_value.startswith('{')
|
||||
incoming_is_var = incoming_value.startswith('var(') or \
|
||||
incoming_value.startswith('$') or \
|
||||
incoming_value.startswith('{')
|
||||
|
||||
if incoming_is_var and not existing_is_var:
|
||||
conflict.resolution = "kept_concrete"
|
||||
return conflict.existing
|
||||
elif existing_is_var and not incoming_is_var:
|
||||
conflict.resolution = "preferred_concrete"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
|
||||
# Prefer hex colors over named colors
|
||||
existing_is_hex = existing_value.startswith('#')
|
||||
incoming_is_hex = incoming_value.startswith('#')
|
||||
|
||||
if incoming_is_hex and not existing_is_hex:
|
||||
conflict.resolution = "preferred_hex"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
elif existing_is_hex and not incoming_is_hex:
|
||||
conflict.resolution = "kept_hex"
|
||||
return conflict.existing
|
||||
|
||||
# Default to last
|
||||
conflict.resolution = "fallback_last"
|
||||
return self._update_token(conflict.incoming, conflict.existing)
|
||||
|
||||
def _merge_metadata(self, conflict: MergeConflict) -> DesignToken:
|
||||
"""Merge metadata from both tokens, keep latest value."""
|
||||
conflict.resolution = "merged_metadata"
|
||||
|
||||
# Use incoming value but merge all metadata
|
||||
merged_tags = list(set(
|
||||
conflict.existing.tags + conflict.incoming.tags
|
||||
))
|
||||
|
||||
merged_extensions = {
|
||||
**conflict.existing.extensions,
|
||||
**conflict.incoming.extensions
|
||||
}
|
||||
|
||||
# Track both sources
|
||||
merged_extensions['dss'] = merged_extensions.get('dss', {})
|
||||
merged_extensions['dss']['previousSources'] = [
|
||||
conflict.existing.source,
|
||||
conflict.incoming.source
|
||||
]
|
||||
|
||||
return DesignToken(
|
||||
name=conflict.incoming.name,
|
||||
value=conflict.incoming.value,
|
||||
type=conflict.incoming.type or conflict.existing.type,
|
||||
description=conflict.incoming.description or conflict.existing.description,
|
||||
source=conflict.incoming.source,
|
||||
source_file=conflict.incoming.source_file,
|
||||
source_line=conflict.incoming.source_line,
|
||||
original_name=conflict.incoming.original_name,
|
||||
original_value=conflict.incoming.original_value,
|
||||
category=conflict.incoming.category or conflict.existing.category,
|
||||
tags=merged_tags,
|
||||
deprecated=conflict.incoming.deprecated or conflict.existing.deprecated,
|
||||
deprecated_message=conflict.incoming.deprecated_message or conflict.existing.deprecated_message,
|
||||
version=conflict.incoming.version,
|
||||
updated_at=datetime.now(),
|
||||
extensions=merged_extensions,
|
||||
)
|
||||
|
||||
|
||||
class TokenDiff:
|
||||
"""
|
||||
Compare two token collections and find differences.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def diff(
|
||||
source: TokenCollection,
|
||||
target: TokenCollection
|
||||
) -> Dict[str, List]:
|
||||
"""
|
||||
Compare two token collections.
|
||||
|
||||
Returns:
|
||||
Dict with 'added', 'removed', 'changed', 'unchanged' lists
|
||||
"""
|
||||
source_by_name = {t.normalize_name(): t for t in source.tokens}
|
||||
target_by_name = {t.normalize_name(): t for t in target.tokens}
|
||||
|
||||
source_names = set(source_by_name.keys())
|
||||
target_names = set(target_by_name.keys())
|
||||
|
||||
result = {
|
||||
'added': [], # In target but not source
|
||||
'removed': [], # In source but not target
|
||||
'changed': [], # In both but different value
|
||||
'unchanged': [], # In both with same value
|
||||
}
|
||||
|
||||
# Find added (in target, not in source)
|
||||
for name in target_names - source_names:
|
||||
result['added'].append(target_by_name[name])
|
||||
|
||||
# Find removed (in source, not in target)
|
||||
for name in source_names - target_names:
|
||||
result['removed'].append(source_by_name[name])
|
||||
|
||||
# Find changed/unchanged (in both)
|
||||
for name in source_names & target_names:
|
||||
source_token = source_by_name[name]
|
||||
target_token = target_by_name[name]
|
||||
|
||||
if str(source_token.value) != str(target_token.value):
|
||||
result['changed'].append({
|
||||
'name': name,
|
||||
'old_value': source_token.value,
|
||||
'new_value': target_token.value,
|
||||
'source_token': source_token,
|
||||
'target_token': target_token,
|
||||
})
|
||||
else:
|
||||
result['unchanged'].append(source_token)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def summary(diff_result: Dict[str, List]) -> str:
|
||||
"""Generate human-readable diff summary."""
|
||||
lines = ["Token Diff Summary:", "=" * 40]
|
||||
|
||||
if diff_result['added']:
|
||||
lines.append(f"\n+ Added ({len(diff_result['added'])}):")
|
||||
for token in diff_result['added'][:10]:
|
||||
lines.append(f" + {token.name}: {token.value}")
|
||||
if len(diff_result['added']) > 10:
|
||||
lines.append(f" ... and {len(diff_result['added']) - 10} more")
|
||||
|
||||
if diff_result['removed']:
|
||||
lines.append(f"\n- Removed ({len(diff_result['removed'])}):")
|
||||
for token in diff_result['removed'][:10]:
|
||||
lines.append(f" - {token.name}: {token.value}")
|
||||
if len(diff_result['removed']) > 10:
|
||||
lines.append(f" ... and {len(diff_result['removed']) - 10} more")
|
||||
|
||||
if diff_result['changed']:
|
||||
lines.append(f"\n~ Changed ({len(diff_result['changed'])}):")
|
||||
for change in diff_result['changed'][:10]:
|
||||
lines.append(
|
||||
f" ~ {change['name']}: {change['old_value']} → {change['new_value']}"
|
||||
)
|
||||
if len(diff_result['changed']) > 10:
|
||||
lines.append(f" ... and {len(diff_result['changed']) - 10} more")
|
||||
|
||||
lines.append(f"\n Unchanged: {len(diff_result['unchanged'])}")
|
||||
|
||||
return "\n".join(lines)
|
||||
289
tools/ingest/scss.py
Normal file
289
tools/ingest/scss.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""
|
||||
SCSS Token Source
|
||||
|
||||
Extracts design tokens from SCSS/Sass variables.
|
||||
Supports $variable declarations and @use module variables.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from .base import DesignToken, TokenCollection, TokenSource
|
||||
|
||||
|
||||
class SCSSTokenSource(TokenSource):
|
||||
"""
|
||||
Extract tokens from SCSS/Sass files.
|
||||
|
||||
Parses:
|
||||
- $variable: value;
|
||||
- $variable: value !default;
|
||||
- // Comment descriptions
|
||||
- @use module variables
|
||||
- Maps: $colors: (primary: #3B82F6, secondary: #10B981);
|
||||
"""
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "scss"
|
||||
|
||||
async def extract(self, source: str) -> TokenCollection:
|
||||
"""
|
||||
Extract tokens from SCSS file or content.
|
||||
|
||||
Args:
|
||||
source: File path or SCSS content string
|
||||
|
||||
Returns:
|
||||
TokenCollection with extracted tokens
|
||||
"""
|
||||
if self._is_file_path(source):
|
||||
file_path = Path(source)
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"SCSS file not found: {source}")
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
source_file = str(file_path.absolute())
|
||||
else:
|
||||
content = source
|
||||
source_file = "<inline>"
|
||||
|
||||
tokens = []
|
||||
|
||||
# Extract simple variables
|
||||
tokens.extend(self._parse_variables(content, source_file))
|
||||
|
||||
# Extract map variables
|
||||
tokens.extend(self._parse_maps(content, source_file))
|
||||
|
||||
return TokenCollection(
|
||||
tokens=tokens,
|
||||
name=f"SCSS Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
|
||||
sources=[self._create_source_id(source_file)],
|
||||
)
|
||||
|
||||
def _is_file_path(self, source: str) -> bool:
|
||||
"""Check if source looks like a file path."""
|
||||
if '$' in source and ':' in source:
|
||||
return False
|
||||
if source.endswith('.scss') or source.endswith('.sass'):
|
||||
return True
|
||||
return Path(source).exists()
|
||||
|
||||
def _parse_variables(self, content: str, source_file: str) -> List[DesignToken]:
|
||||
"""Parse simple $variable declarations."""
|
||||
tokens = []
|
||||
lines = content.split('\n')
|
||||
|
||||
# Pattern for variable declarations
|
||||
var_pattern = re.compile(
|
||||
r'^\s*'
|
||||
r'(\$[\w-]+)\s*:\s*' # Variable name
|
||||
r'([^;!]+)' # Value
|
||||
r'(\s*!default)?' # Optional !default
|
||||
r'\s*;',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
# Track comments for descriptions
|
||||
prev_comment = ""
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check for comment
|
||||
comment_match = re.match(r'^\s*//\s*(.+)$', line)
|
||||
if comment_match:
|
||||
prev_comment = comment_match.group(1).strip()
|
||||
continue
|
||||
|
||||
# Check for variable
|
||||
var_match = var_pattern.match(line)
|
||||
if var_match:
|
||||
var_name = var_match.group(1)
|
||||
var_value = var_match.group(2).strip()
|
||||
is_default = bool(var_match.group(3))
|
||||
|
||||
# Skip if value is a map (handled separately)
|
||||
if var_value.startswith('(') and var_value.endswith(')'):
|
||||
prev_comment = ""
|
||||
continue
|
||||
|
||||
# Skip if value references another variable that we can't resolve
|
||||
if var_value.startswith('$') and '(' not in var_value:
|
||||
# It's a simple variable reference, try to extract
|
||||
pass
|
||||
|
||||
token = DesignToken(
|
||||
name=self._normalize_var_name(var_name),
|
||||
value=self._process_value(var_value),
|
||||
description=prev_comment,
|
||||
source=self._create_source_id(source_file, i),
|
||||
source_file=source_file,
|
||||
source_line=i,
|
||||
original_name=var_name,
|
||||
original_value=var_value,
|
||||
)
|
||||
|
||||
if is_default:
|
||||
token.tags.append("default")
|
||||
|
||||
tokens.append(token)
|
||||
prev_comment = ""
|
||||
else:
|
||||
# Reset comment if line doesn't match
|
||||
if line.strip() and not line.strip().startswith('//'):
|
||||
prev_comment = ""
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_maps(self, content: str, source_file: str) -> List[DesignToken]:
|
||||
"""Parse SCSS map declarations."""
|
||||
tokens = []
|
||||
|
||||
# Pattern for map declarations (handles multi-line)
|
||||
map_pattern = re.compile(
|
||||
r'\$(\w[\w-]*)\s*:\s*\(([\s\S]*?)\)\s*;',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
for match in map_pattern.finditer(content):
|
||||
map_name = match.group(1)
|
||||
map_content = match.group(2)
|
||||
|
||||
# Get line number
|
||||
line_num = content[:match.start()].count('\n') + 1
|
||||
|
||||
# Parse map entries
|
||||
entries = self._parse_map_entries(map_content)
|
||||
|
||||
for key, value in entries.items():
|
||||
token = DesignToken(
|
||||
name=f"{self._normalize_var_name('$' + map_name)}.{key}",
|
||||
value=self._process_value(value),
|
||||
source=self._create_source_id(source_file, line_num),
|
||||
source_file=source_file,
|
||||
source_line=line_num,
|
||||
original_name=f"${map_name}.{key}",
|
||||
original_value=value,
|
||||
)
|
||||
token.tags.append("from-map")
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_map_entries(self, map_content: str) -> Dict[str, str]:
|
||||
"""Parse entries from a SCSS map."""
|
||||
entries = {}
|
||||
|
||||
# Handle nested maps and simple key-value pairs
|
||||
# This is a simplified parser for common cases
|
||||
|
||||
# Remove comments
|
||||
map_content = re.sub(r'//[^\n]*', '', map_content)
|
||||
|
||||
# Split by comma (not inside parentheses)
|
||||
depth = 0
|
||||
current = ""
|
||||
parts = []
|
||||
|
||||
for char in map_content:
|
||||
if char == '(':
|
||||
depth += 1
|
||||
current += char
|
||||
elif char == ')':
|
||||
depth -= 1
|
||||
current += char
|
||||
elif char == ',' and depth == 0:
|
||||
parts.append(current.strip())
|
||||
current = ""
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current.strip():
|
||||
parts.append(current.strip())
|
||||
|
||||
# Parse each part
|
||||
for part in parts:
|
||||
if ':' in part:
|
||||
key, value = part.split(':', 1)
|
||||
key = key.strip().strip('"\'')
|
||||
value = value.strip()
|
||||
entries[key] = value
|
||||
|
||||
return entries
|
||||
|
||||
def _normalize_var_name(self, var_name: str) -> str:
|
||||
"""Convert SCSS variable name to token name."""
|
||||
# Remove $ prefix
|
||||
name = var_name.lstrip('$')
|
||||
# Convert kebab-case and underscores to dots
|
||||
name = re.sub(r'[-_]', '.', name)
|
||||
return name.lower()
|
||||
|
||||
def _process_value(self, value: str) -> str:
|
||||
"""Process SCSS value for token storage."""
|
||||
value = value.strip()
|
||||
|
||||
# Handle function calls (keep as-is for now)
|
||||
if '(' in value and ')' in value:
|
||||
return value
|
||||
|
||||
# Handle quotes
|
||||
if (value.startswith('"') and value.endswith('"')) or \
|
||||
(value.startswith("'") and value.endswith("'")):
|
||||
return value[1:-1]
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class SCSSVariableResolver:
|
||||
"""
|
||||
Resolve SCSS variable references.
|
||||
|
||||
Builds a dependency graph and resolves $var references to actual values.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.variables: Dict[str, str] = {}
|
||||
self.resolved: Dict[str, str] = {}
|
||||
|
||||
def add_variable(self, name: str, value: str) -> None:
|
||||
"""Add a variable to the resolver."""
|
||||
self.variables[name] = value
|
||||
|
||||
def resolve(self, name: str) -> Optional[str]:
|
||||
"""Resolve a variable to its final value."""
|
||||
if name in self.resolved:
|
||||
return self.resolved[name]
|
||||
|
||||
value = self.variables.get(name)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
# Check if value references other variables
|
||||
if '$' in value:
|
||||
resolved_value = self._resolve_references(value)
|
||||
self.resolved[name] = resolved_value
|
||||
return resolved_value
|
||||
|
||||
self.resolved[name] = value
|
||||
return value
|
||||
|
||||
def _resolve_references(self, value: str, depth: int = 0) -> str:
|
||||
"""Recursively resolve variable references in a value."""
|
||||
if depth > 10: # Prevent infinite loops
|
||||
return value
|
||||
|
||||
# Find variable references
|
||||
var_pattern = re.compile(r'\$[\w-]+')
|
||||
|
||||
def replace_var(match):
|
||||
var_name = match.group(0)
|
||||
resolved = self.resolve(var_name.lstrip('$'))
|
||||
return resolved if resolved else var_name
|
||||
|
||||
return var_pattern.sub(replace_var, value)
|
||||
|
||||
def resolve_all(self) -> Dict[str, str]:
|
||||
"""Resolve all variables."""
|
||||
for name in self.variables:
|
||||
self.resolve(name)
|
||||
return self.resolved
|
||||
330
tools/ingest/tailwind.py
Normal file
330
tools/ingest/tailwind.py
Normal file
@@ -0,0 +1,330 @@
|
||||
"""
|
||||
Tailwind Token Source
|
||||
|
||||
Extracts design tokens from Tailwind CSS configuration files.
|
||||
Supports tailwind.config.js/ts and CSS-based Tailwind v4 configurations.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from .base import DesignToken, TokenCollection, TokenSource, TokenCategory
|
||||
|
||||
|
||||
class TailwindTokenSource(TokenSource):
|
||||
"""
|
||||
Extract tokens from Tailwind CSS configuration.
|
||||
|
||||
Parses:
|
||||
- tailwind.config.js/ts (theme and extend sections)
|
||||
- Tailwind v4 CSS-based configuration
|
||||
- CSS custom properties from Tailwind output
|
||||
"""
|
||||
|
||||
# Tailwind category mappings
|
||||
TAILWIND_CATEGORIES = {
|
||||
'colors': TokenCategory.COLORS,
|
||||
'backgroundColor': TokenCategory.COLORS,
|
||||
'textColor': TokenCategory.COLORS,
|
||||
'borderColor': TokenCategory.COLORS,
|
||||
'spacing': TokenCategory.SPACING,
|
||||
'padding': TokenCategory.SPACING,
|
||||
'margin': TokenCategory.SPACING,
|
||||
'gap': TokenCategory.SPACING,
|
||||
'fontSize': TokenCategory.TYPOGRAPHY,
|
||||
'fontFamily': TokenCategory.TYPOGRAPHY,
|
||||
'fontWeight': TokenCategory.TYPOGRAPHY,
|
||||
'lineHeight': TokenCategory.TYPOGRAPHY,
|
||||
'letterSpacing': TokenCategory.TYPOGRAPHY,
|
||||
'width': TokenCategory.SIZING,
|
||||
'height': TokenCategory.SIZING,
|
||||
'maxWidth': TokenCategory.SIZING,
|
||||
'maxHeight': TokenCategory.SIZING,
|
||||
'minWidth': TokenCategory.SIZING,
|
||||
'minHeight': TokenCategory.SIZING,
|
||||
'borderRadius': TokenCategory.BORDERS,
|
||||
'borderWidth': TokenCategory.BORDERS,
|
||||
'boxShadow': TokenCategory.SHADOWS,
|
||||
'dropShadow': TokenCategory.SHADOWS,
|
||||
'opacity': TokenCategory.OPACITY,
|
||||
'zIndex': TokenCategory.Z_INDEX,
|
||||
'transitionDuration': TokenCategory.MOTION,
|
||||
'transitionTimingFunction': TokenCategory.MOTION,
|
||||
'animation': TokenCategory.MOTION,
|
||||
'screens': TokenCategory.BREAKPOINTS,
|
||||
}
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "tailwind"
|
||||
|
||||
async def extract(self, source: str) -> TokenCollection:
|
||||
"""
|
||||
Extract tokens from Tailwind config.
|
||||
|
||||
Args:
|
||||
source: Path to tailwind.config.js/ts or directory containing it
|
||||
|
||||
Returns:
|
||||
TokenCollection with extracted tokens
|
||||
"""
|
||||
config_path = self._find_config(source)
|
||||
if not config_path:
|
||||
raise FileNotFoundError(f"Tailwind config not found in: {source}")
|
||||
|
||||
content = config_path.read_text(encoding="utf-8")
|
||||
source_file = str(config_path.absolute())
|
||||
|
||||
# Parse based on file type
|
||||
if config_path.suffix in ('.js', '.cjs', '.mjs', '.ts'):
|
||||
tokens = self._parse_js_config(content, source_file)
|
||||
elif config_path.suffix == '.css':
|
||||
tokens = self._parse_css_config(content, source_file)
|
||||
else:
|
||||
tokens = []
|
||||
|
||||
return TokenCollection(
|
||||
tokens=tokens,
|
||||
name=f"Tailwind Tokens from {config_path.name}",
|
||||
sources=[self._create_source_id(source_file)],
|
||||
)
|
||||
|
||||
def _find_config(self, source: str) -> Optional[Path]:
|
||||
"""Find Tailwind config file."""
|
||||
path = Path(source)
|
||||
|
||||
# If it's a file, use it directly
|
||||
if path.is_file():
|
||||
return path
|
||||
|
||||
# If it's a directory, look for config files
|
||||
if path.is_dir():
|
||||
config_names = [
|
||||
'tailwind.config.js',
|
||||
'tailwind.config.cjs',
|
||||
'tailwind.config.mjs',
|
||||
'tailwind.config.ts',
|
||||
]
|
||||
for name in config_names:
|
||||
config_path = path / name
|
||||
if config_path.exists():
|
||||
return config_path
|
||||
|
||||
return None
|
||||
|
||||
def _parse_js_config(self, content: str, source_file: str) -> List[DesignToken]:
|
||||
"""Parse JavaScript/TypeScript Tailwind config."""
|
||||
tokens = []
|
||||
|
||||
# Extract theme object using regex (simplified parsing)
|
||||
# This handles common patterns but may not cover all edge cases
|
||||
|
||||
# Look for theme: { ... } or theme.extend: { ... }
|
||||
theme_match = re.search(
|
||||
r'theme\s*:\s*\{([\s\S]*?)\n\s*\}(?=\s*[,}])',
|
||||
content
|
||||
)
|
||||
|
||||
extend_match = re.search(
|
||||
r'extend\s*:\s*\{([\s\S]*?)\n\s{4}\}',
|
||||
content
|
||||
)
|
||||
|
||||
if extend_match:
|
||||
theme_content = extend_match.group(1)
|
||||
tokens.extend(self._parse_theme_object(theme_content, source_file, "extend"))
|
||||
|
||||
if theme_match and not extend_match:
|
||||
theme_content = theme_match.group(1)
|
||||
tokens.extend(self._parse_theme_object(theme_content, source_file, "theme"))
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_theme_object(self, content: str, source_file: str, prefix: str) -> List[DesignToken]:
|
||||
"""Parse theme object content."""
|
||||
tokens = []
|
||||
|
||||
# Find property blocks like: colors: { primary: '#3B82F6', ... }
|
||||
prop_pattern = re.compile(
|
||||
r"(\w+)\s*:\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}",
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
for match in prop_pattern.finditer(content):
|
||||
category_name = match.group(1)
|
||||
category_content = match.group(2)
|
||||
|
||||
category = self.TAILWIND_CATEGORIES.get(
|
||||
category_name, TokenCategory.OTHER
|
||||
)
|
||||
|
||||
# Parse values in this category
|
||||
tokens.extend(
|
||||
self._parse_category_values(
|
||||
category_name,
|
||||
category_content,
|
||||
source_file,
|
||||
category
|
||||
)
|
||||
)
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_category_values(
|
||||
self,
|
||||
category_name: str,
|
||||
content: str,
|
||||
source_file: str,
|
||||
category: TokenCategory
|
||||
) -> List[DesignToken]:
|
||||
"""Parse values within a category."""
|
||||
tokens = []
|
||||
|
||||
# Match key: value pairs
|
||||
# Handles: key: 'value', key: "value", key: value, 'key': value
|
||||
value_pattern = re.compile(
|
||||
r"['\"]?(\w[\w-]*)['\"]?\s*:\s*['\"]?([^,'\"}\n]+)['\"]?",
|
||||
)
|
||||
|
||||
for match in value_pattern.finditer(content):
|
||||
key = match.group(1)
|
||||
value = match.group(2).strip()
|
||||
|
||||
# Skip function calls and complex values for now
|
||||
if '(' in value or '{' in value:
|
||||
continue
|
||||
|
||||
# Skip references to other values
|
||||
if value.startswith('colors.') or value.startswith('theme('):
|
||||
continue
|
||||
|
||||
token = DesignToken(
|
||||
name=f"{category_name}.{key}",
|
||||
value=value,
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
original_name=f"{category_name}.{key}",
|
||||
original_value=value,
|
||||
category=category,
|
||||
)
|
||||
token.tags.append("tailwind")
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
def _parse_css_config(self, content: str, source_file: str) -> List[DesignToken]:
|
||||
"""Parse Tailwind v4 CSS-based configuration."""
|
||||
tokens = []
|
||||
|
||||
# Tailwind v4 uses @theme directive
|
||||
theme_match = re.search(
|
||||
r'@theme\s*\{([\s\S]*?)\}',
|
||||
content
|
||||
)
|
||||
|
||||
if theme_match:
|
||||
theme_content = theme_match.group(1)
|
||||
|
||||
# Parse CSS custom properties
|
||||
var_pattern = re.compile(
|
||||
r'(--[\w-]+)\s*:\s*([^;]+);'
|
||||
)
|
||||
|
||||
for match in var_pattern.finditer(theme_content):
|
||||
var_name = match.group(1)
|
||||
var_value = match.group(2).strip()
|
||||
|
||||
# Determine category from variable name
|
||||
category = self._category_from_var_name(var_name)
|
||||
|
||||
token = DesignToken(
|
||||
name=self._normalize_var_name(var_name),
|
||||
value=var_value,
|
||||
source=self._create_source_id(source_file),
|
||||
source_file=source_file,
|
||||
original_name=var_name,
|
||||
original_value=var_value,
|
||||
category=category,
|
||||
)
|
||||
token.tags.append("tailwind-v4")
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
def _normalize_var_name(self, var_name: str) -> str:
|
||||
"""Convert CSS variable name to token name."""
|
||||
name = var_name.lstrip('-')
|
||||
name = name.replace('-', '.')
|
||||
return name.lower()
|
||||
|
||||
def _category_from_var_name(self, var_name: str) -> TokenCategory:
|
||||
"""Determine category from variable name."""
|
||||
name_lower = var_name.lower()
|
||||
|
||||
if 'color' in name_lower or 'bg' in name_lower:
|
||||
return TokenCategory.COLORS
|
||||
if 'spacing' in name_lower or 'gap' in name_lower:
|
||||
return TokenCategory.SPACING
|
||||
if 'font' in name_lower or 'text' in name_lower:
|
||||
return TokenCategory.TYPOGRAPHY
|
||||
if 'radius' in name_lower or 'border' in name_lower:
|
||||
return TokenCategory.BORDERS
|
||||
if 'shadow' in name_lower:
|
||||
return TokenCategory.SHADOWS
|
||||
|
||||
return TokenCategory.OTHER
|
||||
|
||||
|
||||
class TailwindClassExtractor:
|
||||
"""
|
||||
Extract Tailwind class usage from source files.
|
||||
|
||||
Identifies Tailwind utility classes for analysis and migration.
|
||||
"""
|
||||
|
||||
# Common Tailwind class prefixes
|
||||
TAILWIND_PREFIXES = [
|
||||
'bg-', 'text-', 'border-', 'ring-',
|
||||
'p-', 'px-', 'py-', 'pt-', 'pr-', 'pb-', 'pl-',
|
||||
'm-', 'mx-', 'my-', 'mt-', 'mr-', 'mb-', 'ml-',
|
||||
'w-', 'h-', 'min-w-', 'min-h-', 'max-w-', 'max-h-',
|
||||
'flex-', 'grid-', 'gap-',
|
||||
'font-', 'text-', 'leading-', 'tracking-',
|
||||
'rounded-', 'shadow-', 'opacity-',
|
||||
'z-', 'transition-', 'duration-', 'ease-',
|
||||
]
|
||||
|
||||
async def extract_usage(self, source: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Extract Tailwind class usage from file.
|
||||
|
||||
Returns dict mapping class categories to list of used classes.
|
||||
"""
|
||||
if Path(source).exists():
|
||||
content = Path(source).read_text(encoding="utf-8")
|
||||
else:
|
||||
content = source
|
||||
|
||||
usage: Dict[str, List[str]] = {}
|
||||
|
||||
# Find className or class attributes
|
||||
class_pattern = re.compile(
|
||||
r'(?:className|class)\s*=\s*["\']([^"\']+)["\']'
|
||||
)
|
||||
|
||||
for match in class_pattern.finditer(content):
|
||||
classes = match.group(1).split()
|
||||
|
||||
for cls in classes:
|
||||
# Check if it's a Tailwind class
|
||||
for prefix in self.TAILWIND_PREFIXES:
|
||||
if cls.startswith(prefix):
|
||||
category = prefix.rstrip('-')
|
||||
if category not in usage:
|
||||
usage[category] = []
|
||||
if cls not in usage[category]:
|
||||
usage[category].append(cls)
|
||||
break
|
||||
|
||||
return usage
|
||||
Reference in New Issue
Block a user