""" JSON Token Source Extracts design tokens from JSON/YAML files. Supports W3C Design Tokens format and Style Dictionary format. """ import json import re from pathlib import Path from typing import List, Dict, Any, Optional from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory class JSONTokenSource(TokenSource): """ Extract tokens from JSON/YAML token files. Supports: - W3C Design Tokens Community Group format - Style Dictionary format - Tokens Studio format - Figma Tokens plugin format - Generic nested JSON with $value """ @property def source_type(self) -> str: return "json" async def extract(self, source: str) -> TokenCollection: """ Extract tokens from JSON file or content. Args: source: File path or JSON content string Returns: TokenCollection with extracted tokens """ if self._is_file_path(source): file_path = Path(source) if not file_path.exists(): raise FileNotFoundError(f"Token file not found: {source}") content = file_path.read_text(encoding="utf-8") source_file = str(file_path.absolute()) else: content = source source_file = "" # Parse JSON try: data = json.loads(content) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON: {e}") # Detect format and extract tokens = self._extract_tokens(data, source_file) return TokenCollection( tokens=tokens, name=f"JSON Tokens from {Path(source_file).name if source_file != '' else 'inline'}", sources=[self._create_source_id(source_file)], ) def _is_file_path(self, source: str) -> bool: """Check if source looks like a file path.""" if source.strip().startswith('{'): return False if source.endswith('.json') or source.endswith('.tokens.json'): return True return Path(source).exists() def _extract_tokens(self, data: Dict, source_file: str) -> List[DesignToken]: """Extract tokens from parsed JSON.""" tokens = [] # Detect format if self._is_w3c_format(data): tokens = self._extract_w3c_tokens(data, source_file) elif self._is_style_dictionary_format(data): tokens = self._extract_style_dictionary_tokens(data, source_file) elif self._is_tokens_studio_format(data): tokens = self._extract_tokens_studio(data, source_file) else: # Generic nested format tokens = self._extract_nested_tokens(data, source_file) return tokens def _is_w3c_format(self, data: Dict) -> bool: """Check if data follows W3C Design Tokens format.""" # W3C format uses $value and $type def check_node(node: Any) -> bool: if isinstance(node, dict): if '$value' in node: return True return any(check_node(v) for v in node.values()) return False return check_node(data) def _is_style_dictionary_format(self, data: Dict) -> bool: """Check if data follows Style Dictionary format.""" # Style Dictionary uses 'value' without $ def check_node(node: Any) -> bool: if isinstance(node, dict): if 'value' in node and '$value' not in node: return True return any(check_node(v) for v in node.values()) return False return check_node(data) def _is_tokens_studio_format(self, data: Dict) -> bool: """Check if data follows Tokens Studio format.""" # Tokens Studio has specific structure with sets return '$themes' in data or '$metadata' in data def _extract_w3c_tokens( self, data: Dict, source_file: str, prefix: str = "" ) -> List[DesignToken]: """Extract tokens in W3C Design Tokens format.""" tokens = [] for key, value in data.items(): # Skip metadata keys if key.startswith('$'): continue current_path = f"{prefix}.{key}" if prefix else key if isinstance(value, dict): if '$value' in value: # This is a token token = self._create_w3c_token( current_path, value, source_file ) tokens.append(token) else: # Nested group tokens.extend( self._extract_w3c_tokens(value, source_file, current_path) ) return tokens def _create_w3c_token( self, name: str, data: Dict, source_file: str ) -> DesignToken: """Create token from W3C format node.""" value = data.get('$value') token_type = self._parse_w3c_type(data.get('$type', '')) description = data.get('$description', '') # Handle aliases/references if isinstance(value, str) and value.startswith('{') and value.endswith('}'): # This is a reference like {colors.primary} pass # Keep as-is for now # Get extensions extensions = {} if '$extensions' in data: extensions = data['$extensions'] token = DesignToken( name=name, value=value, type=token_type, description=description, source=self._create_source_id(source_file), source_file=source_file, extensions=extensions, ) # Check for deprecated if extensions.get('deprecated'): token.deprecated = True token.deprecated_message = extensions.get('deprecatedMessage', '') return token def _parse_w3c_type(self, type_str: str) -> TokenType: """Convert W3C type string to TokenType.""" type_map = { 'color': TokenType.COLOR, 'dimension': TokenType.DIMENSION, 'fontFamily': TokenType.FONT_FAMILY, 'fontWeight': TokenType.FONT_WEIGHT, 'duration': TokenType.DURATION, 'cubicBezier': TokenType.CUBIC_BEZIER, 'number': TokenType.NUMBER, 'shadow': TokenType.SHADOW, 'border': TokenType.BORDER, 'gradient': TokenType.GRADIENT, 'transition': TokenType.TRANSITION, } return type_map.get(type_str, TokenType.UNKNOWN) def _extract_style_dictionary_tokens( self, data: Dict, source_file: str, prefix: str = "" ) -> List[DesignToken]: """Extract tokens in Style Dictionary format.""" tokens = [] for key, value in data.items(): current_path = f"{prefix}.{key}" if prefix else key if isinstance(value, dict): if 'value' in value: # This is a token token = DesignToken( name=current_path, value=value['value'], description=value.get('comment', value.get('description', '')), source=self._create_source_id(source_file), source_file=source_file, ) # Handle attributes if 'attributes' in value: attrs = value['attributes'] if 'category' in attrs: token.tags.append(f"category:{attrs['category']}") token.tags.append("style-dictionary") tokens.append(token) else: # Nested group tokens.extend( self._extract_style_dictionary_tokens( value, source_file, current_path ) ) return tokens def _extract_tokens_studio( self, data: Dict, source_file: str ) -> List[DesignToken]: """Extract tokens from Tokens Studio format.""" tokens = [] # Tokens Studio has token sets as top-level keys # Skip metadata keys for set_name, set_data in data.items(): if set_name.startswith('$'): continue if isinstance(set_data, dict): set_tokens = self._extract_tokens_studio_set( set_data, source_file, set_name ) for token in set_tokens: token.group = set_name tokens.extend(set_tokens) return tokens def _extract_tokens_studio_set( self, data: Dict, source_file: str, prefix: str = "" ) -> List[DesignToken]: """Extract tokens from a Tokens Studio set.""" tokens = [] for key, value in data.items(): current_path = f"{prefix}.{key}" if prefix else key if isinstance(value, dict): if 'value' in value and 'type' in value: # This is a token token = DesignToken( name=current_path, value=value['value'], type=self._parse_tokens_studio_type(value.get('type', '')), description=value.get('description', ''), source=self._create_source_id(source_file), source_file=source_file, ) token.tags.append("tokens-studio") tokens.append(token) else: # Nested group tokens.extend( self._extract_tokens_studio_set( value, source_file, current_path ) ) return tokens def _parse_tokens_studio_type(self, type_str: str) -> TokenType: """Convert Tokens Studio type to TokenType.""" type_map = { 'color': TokenType.COLOR, 'sizing': TokenType.DIMENSION, 'spacing': TokenType.DIMENSION, 'borderRadius': TokenType.DIMENSION, 'borderWidth': TokenType.DIMENSION, 'fontFamilies': TokenType.FONT_FAMILY, 'fontWeights': TokenType.FONT_WEIGHT, 'fontSizes': TokenType.FONT_SIZE, 'lineHeights': TokenType.LINE_HEIGHT, 'letterSpacing': TokenType.LETTER_SPACING, 'paragraphSpacing': TokenType.DIMENSION, 'boxShadow': TokenType.SHADOW, 'opacity': TokenType.NUMBER, 'dimension': TokenType.DIMENSION, 'text': TokenType.STRING, 'other': TokenType.STRING, } return type_map.get(type_str, TokenType.UNKNOWN) def _extract_nested_tokens( self, data: Dict, source_file: str, prefix: str = "" ) -> List[DesignToken]: """Extract tokens from generic nested JSON.""" tokens = [] for key, value in data.items(): current_path = f"{prefix}.{key}" if prefix else key if isinstance(value, dict): # Check if this looks like a token (has primitive values) has_nested = any(isinstance(v, dict) for v in value.values()) if not has_nested and len(value) <= 3: # Might be a simple token object if 'value' in value: tokens.append(DesignToken( name=current_path, value=value['value'], source=self._create_source_id(source_file), source_file=source_file, )) else: # Recurse tokens.extend( self._extract_nested_tokens(value, source_file, current_path) ) else: # Recurse into nested object tokens.extend( self._extract_nested_tokens(value, source_file, current_path) ) elif isinstance(value, (str, int, float, bool)): # Simple value - treat as token tokens.append(DesignToken( name=current_path, value=value, source=self._create_source_id(source_file), source_file=source_file, )) return tokens class TokenExporter: """ Export tokens to various JSON formats. """ @staticmethod def to_w3c(collection: TokenCollection) -> str: """Export to W3C Design Tokens format.""" result = {} for token in collection.tokens: parts = token.normalize_name().split('.') current = result for part in parts[:-1]: if part not in current: current[part] = {} current = current[part] current[parts[-1]] = { "$value": token.value, "$type": token.type.value, } if token.description: current[parts[-1]]["$description"] = token.description return json.dumps(result, indent=2) @staticmethod def to_style_dictionary(collection: TokenCollection) -> str: """Export to Style Dictionary format.""" result = {} for token in collection.tokens: parts = token.normalize_name().split('.') current = result for part in parts[:-1]: if part not in current: current[part] = {} current = current[part] current[parts[-1]] = { "value": token.value, } if token.description: current[parts[-1]]["comment"] = token.description return json.dumps(result, indent=2) @staticmethod def to_flat(collection: TokenCollection) -> str: """Export to flat JSON object.""" result = {} for token in collection.tokens: result[token.name] = token.value return json.dumps(result, indent=2)