dss/tools/ingest/json_tokens.py

"""
JSON Token Source

Extracts design tokens from JSON/YAML files.
Supports W3C Design Tokens format and Style Dictionary format.
"""

import json
import re
from pathlib import Path
from typing import List, Dict, Any, Optional
from .base import DesignToken, TokenCollection, TokenSource, TokenType, TokenCategory


class JSONTokenSource(TokenSource):
    """
    Extract tokens from JSON/YAML token files.

    Supports:
    - W3C Design Tokens Community Group format
    - Style Dictionary format
    - Tokens Studio format
    - Figma Tokens plugin format
    - Generic nested JSON with $value
    """

    @property
    def source_type(self) -> str:
        return "json"

    async def extract(self, source: str) -> TokenCollection:
        """
        Extract tokens from JSON file or content.

        Args:
            source: File path or JSON content string

        Returns:
            TokenCollection with extracted tokens
        """
        if self._is_file_path(source):
            file_path = Path(source)
            if not file_path.exists():
                raise FileNotFoundError(f"Token file not found: {source}")
            content = file_path.read_text(encoding="utf-8")
            source_file = str(file_path.absolute())
        else:
            content = source
            source_file = "<inline>"

        # Parse JSON
        try:
            data = json.loads(content)
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON: {e}")

        # Detect format and extract
        tokens = self._extract_tokens(data, source_file)

        return TokenCollection(
            tokens=tokens,
            name=f"JSON Tokens from {Path(source_file).name if source_file != '<inline>' else 'inline'}",
            sources=[self._create_source_id(source_file)],
        )

    def _is_file_path(self, source: str) -> bool:
        """Check if source looks like a file path."""
        if source.strip().startswith('{'):
            return False
        if source.endswith('.json') or source.endswith('.tokens.json'):
            return True
        return Path(source).exists()

    def _extract_tokens(self, data: Dict, source_file: str) -> List[DesignToken]:
        """Extract tokens from parsed JSON."""
        tokens = []

        # Detect format
        if self._is_w3c_format(data):
            tokens = self._extract_w3c_tokens(data, source_file)
        elif self._is_style_dictionary_format(data):
            tokens = self._extract_style_dictionary_tokens(data, source_file)
        elif self._is_tokens_studio_format(data):
            tokens = self._extract_tokens_studio(data, source_file)
        else:
            # Generic nested format
            tokens = self._extract_nested_tokens(data, source_file)

        return tokens

    def _is_w3c_format(self, data: Dict) -> bool:
        """Check if data follows W3C Design Tokens format."""
        # W3C format uses $value and $type
        def check_node(node: Any) -> bool:
            if isinstance(node, dict):
                if '$value' in node:
                    return True
                return any(check_node(v) for v in node.values())
            return False
        return check_node(data)

    def _is_style_dictionary_format(self, data: Dict) -> bool:
        """Check if data follows Style Dictionary format."""
        # Style Dictionary uses 'value' without $
        def check_node(node: Any) -> bool:
            if isinstance(node, dict):
                if 'value' in node and '$value' not in node:
                    return True
                return any(check_node(v) for v in node.values())
            return False
        return check_node(data)

    def _is_tokens_studio_format(self, data: Dict) -> bool:
        """Check if data follows Tokens Studio format."""
        # Tokens Studio has specific structure with sets
        return '$themes' in data or '$metadata' in data

    def _extract_w3c_tokens(
        self,
        data: Dict,
        source_file: str,
        prefix: str = ""
    ) -> List[DesignToken]:
        """Extract tokens in W3C Design Tokens format."""
        tokens = []

        for key, value in data.items():
            # Skip metadata keys
            if key.startswith('$'):
                continue

            current_path = f"{prefix}.{key}" if prefix else key

            if isinstance(value, dict):
                if '$value' in value:
                    # This is a token
                    token = self._create_w3c_token(
                        current_path, value, source_file
                    )
                    tokens.append(token)
                else:
                    # Nested group
                    tokens.extend(
                        self._extract_w3c_tokens(value, source_file, current_path)
                    )

        return tokens

    def _create_w3c_token(
        self,
        name: str,
        data: Dict,
        source_file: str
    ) -> DesignToken:
        """Create token from W3C format node."""
        value = data.get('$value')
        token_type = self._parse_w3c_type(data.get('$type', ''))
        description = data.get('$description', '')

        # Handle aliases/references
        if isinstance(value, str) and value.startswith('{') and value.endswith('}'):
            # This is a reference like {colors.primary}
            pass  # Keep as-is for now

        # Get extensions
        extensions = {}
        if '$extensions' in data:
            extensions = data['$extensions']

        token = DesignToken(
            name=name,
            value=value,
            type=token_type,
            description=description,
            source=self._create_source_id(source_file),
            source_file=source_file,
            extensions=extensions,
        )

        # Check for deprecated
        if extensions.get('deprecated'):
            token.deprecated = True
            token.deprecated_message = extensions.get('deprecatedMessage', '')

        return token

    def _parse_w3c_type(self, type_str: str) -> TokenType:
        """Convert W3C type string to TokenType."""
        type_map = {
            'color': TokenType.COLOR,
            'dimension': TokenType.DIMENSION,
            'fontFamily': TokenType.FONT_FAMILY,
            'fontWeight': TokenType.FONT_WEIGHT,
            'duration': TokenType.DURATION,
            'cubicBezier': TokenType.CUBIC_BEZIER,
            'number': TokenType.NUMBER,
            'shadow': TokenType.SHADOW,
            'border': TokenType.BORDER,
            'gradient': TokenType.GRADIENT,
            'transition': TokenType.TRANSITION,
        }
        return type_map.get(type_str, TokenType.UNKNOWN)

    def _extract_style_dictionary_tokens(
        self,
        data: Dict,
        source_file: str,
        prefix: str = ""
    ) -> List[DesignToken]:
        """Extract tokens in Style Dictionary format."""
        tokens = []

        for key, value in data.items():
            current_path = f"{prefix}.{key}" if prefix else key

            if isinstance(value, dict):
                if 'value' in value:
                    # This is a token
                    token = DesignToken(
                        name=current_path,
                        value=value['value'],
                        description=value.get('comment', value.get('description', '')),
                        source=self._create_source_id(source_file),
                        source_file=source_file,
                    )

                    # Handle attributes
                    if 'attributes' in value:
                        attrs = value['attributes']
                        if 'category' in attrs:
                            token.tags.append(f"category:{attrs['category']}")

                    token.tags.append("style-dictionary")
                    tokens.append(token)
                else:
                    # Nested group
                    tokens.extend(
                        self._extract_style_dictionary_tokens(
                            value, source_file, current_path
                        )
                    )

        return tokens

    def _extract_tokens_studio(
        self,
        data: Dict,
        source_file: str
    ) -> List[DesignToken]:
        """Extract tokens from Tokens Studio format."""
        tokens = []

        # Tokens Studio has token sets as top-level keys
        # Skip metadata keys
        for set_name, set_data in data.items():
            if set_name.startswith('$'):
                continue

            if isinstance(set_data, dict):
                set_tokens = self._extract_tokens_studio_set(
                    set_data, source_file, set_name
                )
                for token in set_tokens:
                    token.group = set_name
                tokens.extend(set_tokens)

        return tokens

    def _extract_tokens_studio_set(
        self,
        data: Dict,
        source_file: str,
        prefix: str = ""
    ) -> List[DesignToken]:
        """Extract tokens from a Tokens Studio set."""
        tokens = []

        for key, value in data.items():
            current_path = f"{prefix}.{key}" if prefix else key

            if isinstance(value, dict):
                if 'value' in value and 'type' in value:
                    # This is a token
                    token = DesignToken(
                        name=current_path,
                        value=value['value'],
                        type=self._parse_tokens_studio_type(value.get('type', '')),
                        description=value.get('description', ''),
                        source=self._create_source_id(source_file),
                        source_file=source_file,
                    )
                    token.tags.append("tokens-studio")
                    tokens.append(token)
                else:
                    # Nested group
                    tokens.extend(
                        self._extract_tokens_studio_set(
                            value, source_file, current_path
                        )
                    )

        return tokens

    def _parse_tokens_studio_type(self, type_str: str) -> TokenType:
        """Convert Tokens Studio type to TokenType."""
        type_map = {
            'color': TokenType.COLOR,
            'sizing': TokenType.DIMENSION,
            'spacing': TokenType.DIMENSION,
            'borderRadius': TokenType.DIMENSION,
            'borderWidth': TokenType.DIMENSION,
            'fontFamilies': TokenType.FONT_FAMILY,
            'fontWeights': TokenType.FONT_WEIGHT,
            'fontSizes': TokenType.FONT_SIZE,
            'lineHeights': TokenType.LINE_HEIGHT,
            'letterSpacing': TokenType.LETTER_SPACING,
            'paragraphSpacing': TokenType.DIMENSION,
            'boxShadow': TokenType.SHADOW,
            'opacity': TokenType.NUMBER,
            'dimension': TokenType.DIMENSION,
            'text': TokenType.STRING,
            'other': TokenType.STRING,
        }
        return type_map.get(type_str, TokenType.UNKNOWN)

    def _extract_nested_tokens(
        self,
        data: Dict,
        source_file: str,
        prefix: str = ""
    ) -> List[DesignToken]:
        """Extract tokens from generic nested JSON."""
        tokens = []

        for key, value in data.items():
            current_path = f"{prefix}.{key}" if prefix else key

            if isinstance(value, dict):
                # Check if this looks like a token (has primitive values)
                has_nested = any(isinstance(v, dict) for v in value.values())

                if not has_nested and len(value) <= 3:
                    # Might be a simple token object
                    if 'value' in value:
                        tokens.append(DesignToken(
                            name=current_path,
                            value=value['value'],
                            source=self._create_source_id(source_file),
                            source_file=source_file,
                        ))
                    else:
                        # Recurse
                        tokens.extend(
                            self._extract_nested_tokens(value, source_file, current_path)
                        )
                else:
                    # Recurse into nested object
                    tokens.extend(
                        self._extract_nested_tokens(value, source_file, current_path)
                    )

            elif isinstance(value, (str, int, float, bool)):
                # Simple value - treat as token
                tokens.append(DesignToken(
                    name=current_path,
                    value=value,
                    source=self._create_source_id(source_file),
                    source_file=source_file,
                ))

        return tokens


class TokenExporter:
    """
    Export tokens to various JSON formats.
    """

    @staticmethod
    def to_w3c(collection: TokenCollection) -> str:
        """Export to W3C Design Tokens format."""
        result = {}

        for token in collection.tokens:
            parts = token.normalize_name().split('.')
            current = result

            for part in parts[:-1]:
                if part not in current:
                    current[part] = {}
                current = current[part]

            current[parts[-1]] = {
                "$value": token.value,
                "$type": token.type.value,
            }

            if token.description:
                current[parts[-1]]["$description"] = token.description

        return json.dumps(result, indent=2)

    @staticmethod
    def to_style_dictionary(collection: TokenCollection) -> str:
        """Export to Style Dictionary format."""
        result = {}

        for token in collection.tokens:
            parts = token.normalize_name().split('.')
            current = result

            for part in parts[:-1]:
                if part not in current:
                    current[part] = {}
                current = current[part]

            current[parts[-1]] = {
                "value": token.value,
            }

            if token.description:
                current[parts[-1]]["comment"] = token.description

        return json.dumps(result, indent=2)

    @staticmethod
    def to_flat(collection: TokenCollection) -> str:
        """Export to flat JSON object."""
        result = {}
        for token in collection.tokens:
            result[token.name] = token.value
        return json.dumps(result, indent=2)