dss/demo/tools/analyze/styles.py

"""
Style Pattern Analyzer

Detects and analyzes style patterns in code to identify:
- Hardcoded values that should be tokens
- Duplicate values across files
- Inconsistent naming patterns
- Unused styles
"""

import re
from pathlib import Path
from typing import List, Dict, Any, Optional, Set, Tuple
from collections import defaultdict
from dataclasses import dataclass, field

from .base import (
    Location,
    TokenCandidate,
    StylePattern,
    StylingApproach,
)


# Color patterns
HEX_COLOR = re.compile(r'#(?:[0-9a-fA-F]{3}){1,2}\b')
RGB_COLOR = re.compile(r'rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)')
HSL_COLOR = re.compile(r'hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)')
OKLCH_COLOR = re.compile(r'oklch\s*\([^)]+\)')

# Dimension patterns
PX_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*px\b')
REM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*rem\b')
EM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*em\b')
PERCENT_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*%\b')

# Font patterns
FONT_SIZE = re.compile(r'font-size\s*:\s*([^;]+)')
FONT_FAMILY = re.compile(r'font-family\s*:\s*([^;]+)')
FONT_WEIGHT = re.compile(r'font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)')
LINE_HEIGHT = re.compile(r'line-height\s*:\s*([^;]+)')

# Spacing patterns
MARGIN_PADDING = re.compile(r'(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)')
GAP = re.compile(r'gap\s*:\s*([^;]+)')

# Border patterns
BORDER_RADIUS = re.compile(r'border-radius\s*:\s*([^;]+)')
BORDER_WIDTH = re.compile(r'border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)')

# Shadow patterns
BOX_SHADOW = re.compile(r'box-shadow\s*:\s*([^;]+)')

# Z-index
Z_INDEX = re.compile(r'z-index\s*:\s*(\d+)')


@dataclass
class ValueOccurrence:
    """Tracks where a value appears."""
    value: str
    file: str
    line: int
    property: str  # CSS property name
    context: str   # Surrounding code


class StyleAnalyzer:
    """
    Analyzes style files and inline styles to find:
    - Hardcoded values that should be tokens
    - Duplicate values
    - Inconsistent patterns
    """

    def __init__(self, root_path: str):
        self.root = Path(root_path).resolve()
        self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)

    async def analyze(
        self,
        include_inline: bool = True,
        include_css: bool = True
    ) -> Dict[str, Any]:
        """
        Analyze all styles in the project.

        Returns:
            Dict with analysis results including duplicates and candidates
        """
        # Reset collectors
        self.values.clear()
        self.color_values.clear()
        self.spacing_values.clear()
        self.font_values.clear()

        # Scan CSS/SCSS files
        if include_css:
            await self._scan_style_files()

        # Scan inline styles in JS/TS files
        if include_inline:
            await self._scan_inline_styles()

        # Analyze results
        duplicates = self._find_duplicates()
        candidates = self._generate_token_candidates()

        return {
            'total_values_found': sum(len(v) for v in self.values.values()),
            'unique_colors': len(self.color_values),
            'unique_spacing': len(self.spacing_values),
            'duplicates': duplicates,
            'token_candidates': candidates,
        }

    async def _scan_style_files(self) -> None:
        """Scan CSS and SCSS files for values."""
        skip_dirs = {'node_modules', '.git', 'dist', 'build'}

        for pattern in ['**/*.css', '**/*.scss', '**/*.sass', '**/*.less']:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')
                    rel_path = str(file_path.relative_to(self.root))
                    self._extract_values_from_css(content, rel_path)
                except Exception:
                    continue

    async def _scan_inline_styles(self) -> None:
        """Scan JS/TS files for inline style values."""
        skip_dirs = {'node_modules', '.git', 'dist', 'build'}

        for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')
                    rel_path = str(file_path.relative_to(self.root))
                    self._extract_values_from_jsx(content, rel_path)
                except Exception:
                    continue

    def _extract_values_from_css(self, content: str, file_path: str) -> None:
        """Extract style values from CSS content."""
        lines = content.split('\n')

        for line_num, line in enumerate(lines, 1):
            # Skip comments and empty lines
            if not line.strip() or line.strip().startswith('//') or line.strip().startswith('/*'):
                continue

            # Extract colors
            for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]:
                for match in pattern.finditer(line):
                    value = match.group(0).lower()
                    self._record_color(value, file_path, line_num, line.strip())

            # Extract dimensions
            for match in PX_VALUE.finditer(line):
                value = f"{match.group(1)}px"
                self._record_spacing(value, file_path, line_num, line.strip())

            for match in REM_VALUE.finditer(line):
                value = f"{match.group(1)}rem"
                self._record_spacing(value, file_path, line_num, line.strip())

            # Extract font properties
            for match in FONT_SIZE.finditer(line):
                value = match.group(1).strip()
                self._record_font(value, file_path, line_num, 'font-size', line.strip())

            for match in FONT_WEIGHT.finditer(line):
                value = match.group(1).strip()
                self._record_font(value, file_path, line_num, 'font-weight', line.strip())

            # Extract z-index
            for match in Z_INDEX.finditer(line):
                value = match.group(1)
                self._record_value(f"z-{value}", file_path, line_num, 'z-index', line.strip())

    def _extract_values_from_jsx(self, content: str, file_path: str) -> None:
        """Extract style values from JSX inline styles."""
        # Find style={{ ... }} blocks
        style_pattern = re.compile(r'style\s*=\s*\{\s*\{([^}]+)\}\s*\}', re.DOTALL)

        for match in style_pattern.finditer(content):
            style_content = match.group(1)
            line_num = content[:match.start()].count('\n') + 1

            # Parse the style object
            # Look for property: value patterns
            prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?')

            for prop_match in prop_pattern.finditer(style_content):
                prop_name = prop_match.group(1)
                prop_value = prop_match.group(2).strip()

                # Check for colors
                if any(c in prop_name.lower() for c in ['color', 'background']):
                    if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value):
                        self._record_color(prop_value.lower(), file_path, line_num, style_content[:100])

                # Check for dimensions
                if PX_VALUE.search(prop_value):
                    self._record_spacing(prop_value, file_path, line_num, style_content[:100])

                if 'fontSize' in prop_name or 'fontWeight' in prop_name:
                    self._record_font(prop_value, file_path, line_num, prop_name, style_content[:100])

    def _record_color(self, value: str, file: str, line: int, context: str) -> None:
        """Record a color value occurrence."""
        normalized = self._normalize_color(value)
        self.color_values[normalized].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property='color',
            context=context,
        ))
        self.values[normalized].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property='color',
            context=context,
        ))

    def _record_spacing(self, value: str, file: str, line: int, context: str) -> None:
        """Record a spacing/dimension value occurrence."""
        self.spacing_values[value].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property='spacing',
            context=context,
        ))
        self.values[value].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property='spacing',
            context=context,
        ))

    def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None:
        """Record a font-related value occurrence."""
        self.font_values[value].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property=prop,
            context=context,
        ))
        self.values[value].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property=prop,
            context=context,
        ))

    def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None:
        """Record a generic value occurrence."""
        self.values[value].append(ValueOccurrence(
            value=value,
            file=file,
            line=line,
            property=prop,
            context=context,
        ))

    def _normalize_color(self, color: str) -> str:
        """Normalize color value for comparison."""
        color = color.lower().strip()
        # Expand 3-digit hex to 6-digit
        if re.match(r'^#[0-9a-f]{3}$', color):
            color = f"#{color[1]*2}{color[2]*2}{color[3]*2}"
        return color

    def _find_duplicates(self) -> List[Dict[str, Any]]:
        """Find values that appear multiple times."""
        duplicates = []

        for value, occurrences in self.values.items():
            if len(occurrences) >= 2:
                # Get unique files
                files = list(set(o.file for o in occurrences))

                duplicates.append({
                    'value': value,
                    'count': len(occurrences),
                    'files': files[:5],  # Limit to 5 files
                    'category': occurrences[0].property,
                    'locations': [
                        {'file': o.file, 'line': o.line}
                        for o in occurrences[:5]
                    ],
                })

        # Sort by count (most duplicated first)
        duplicates.sort(key=lambda x: x['count'], reverse=True)

        return duplicates[:50]  # Return top 50

    def _generate_token_candidates(self) -> List[TokenCandidate]:
        """Generate token suggestions for repeated values."""
        candidates = []

        # Color candidates
        for value, occurrences in self.color_values.items():
            if len(occurrences) >= 2:
                suggested_name = self._suggest_color_name(value)
                candidates.append(TokenCandidate(
                    value=value,
                    suggested_name=suggested_name,
                    category='colors',
                    occurrences=len(occurrences),
                    locations=[
                        Location(o.file, o.line) for o in occurrences[:5]
                    ],
                    confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)),
                ))

        # Spacing candidates
        for value, occurrences in self.spacing_values.items():
            if len(occurrences) >= 3:  # Higher threshold for spacing
                suggested_name = self._suggest_spacing_name(value)
                candidates.append(TokenCandidate(
                    value=value,
                    suggested_name=suggested_name,
                    category='spacing',
                    occurrences=len(occurrences),
                    locations=[
                        Location(o.file, o.line) for o in occurrences[:5]
                    ],
                    confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)),
                ))

        # Sort by confidence
        candidates.sort(key=lambda x: x.confidence, reverse=True)

        return candidates[:30]  # Return top 30

    def _suggest_color_name(self, color: str) -> str:
        """Suggest a token name for a color value."""
        # Common color mappings
        common_colors = {
            '#ffffff': 'color.white',
            '#000000': 'color.black',
            '#f3f4f6': 'color.neutral.100',
            '#e5e7eb': 'color.neutral.200',
            '#d1d5db': 'color.neutral.300',
            '#9ca3af': 'color.neutral.400',
            '#6b7280': 'color.neutral.500',
            '#4b5563': 'color.neutral.600',
            '#374151': 'color.neutral.700',
            '#1f2937': 'color.neutral.800',
            '#111827': 'color.neutral.900',
        }

        if color in common_colors:
            return common_colors[color]

        # Detect color family by hue (simplified)
        if color.startswith('#'):
            return f"color.custom.{color[1:7]}"

        return f"color.custom.value"

    def _suggest_spacing_name(self, value: str) -> str:
        """Suggest a token name for a spacing value."""
        # Common spacing values
        spacing_map = {
            '0px': 'spacing.0',
            '4px': 'spacing.xs',
            '8px': 'spacing.sm',
            '12px': 'spacing.md',
            '16px': 'spacing.lg',
            '20px': 'spacing.lg',
            '24px': 'spacing.xl',
            '32px': 'spacing.2xl',
            '48px': 'spacing.3xl',
            '64px': 'spacing.4xl',
            '0.25rem': 'spacing.xs',
            '0.5rem': 'spacing.sm',
            '0.75rem': 'spacing.md',
            '1rem': 'spacing.lg',
            '1.5rem': 'spacing.xl',
            '2rem': 'spacing.2xl',
        }

        if value in spacing_map:
            return spacing_map[value]

        return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}"

    async def find_unused_styles(self) -> List[Dict[str, Any]]:
        """
        Find CSS classes/selectors that are not used in the codebase.

        Returns list of potentially unused styles.
        """
        # Collect all CSS class definitions
        css_classes = set()
        class_locations = {}

        skip_dirs = {'node_modules', '.git', 'dist', 'build'}

        for pattern in ['**/*.css', '**/*.scss']:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')
                    rel_path = str(file_path.relative_to(self.root))

                    # Find class definitions
                    for match in re.finditer(r'\.([a-zA-Z_][\w-]*)\s*[{,]', content):
                        class_name = match.group(1)
                        css_classes.add(class_name)
                        class_locations[class_name] = rel_path

                except Exception:
                    continue

        # Collect all class usage in JS/JSX/TS/TSX
        used_classes = set()

        for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')

                    # Find className usage
                    for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content):
                        classes = match.group(1).split()
                        used_classes.update(classes)

                    # Find styles.xxx usage (CSS modules)
                    for match in re.finditer(r'styles\.(\w+)', content):
                        used_classes.add(match.group(1))

                except Exception:
                    continue

        # Find unused
        unused = css_classes - used_classes

        return [
            {
                'class': cls,
                'file': class_locations.get(cls, 'unknown'),
            }
            for cls in sorted(unused)
        ][:50]  # Limit results

    async def analyze_naming_consistency(self) -> Dict[str, Any]:
        """
        Analyze naming consistency across style files.

        Returns analysis of naming patterns and inconsistencies.
        """
        patterns = {
            'kebab-case': [],   # my-class-name
            'camelCase': [],    # myClassName
            'snake_case': [],   # my_class_name
            'BEM': [],          # block__element--modifier
        }

        skip_dirs = {'node_modules', '.git', 'dist', 'build'}

        for pattern in ['**/*.css', '**/*.scss']:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')
                    rel_path = str(file_path.relative_to(self.root))

                    # Find class names
                    for match in re.finditer(r'\.([a-zA-Z_][\w-]*)', content):
                        name = match.group(1)
                        line = content[:match.start()].count('\n') + 1

                        # Classify naming pattern
                        if '__' in name or '--' in name:
                            patterns['BEM'].append({'name': name, 'file': rel_path, 'line': line})
                        elif '_' in name:
                            patterns['snake_case'].append({'name': name, 'file': rel_path, 'line': line})
                        elif '-' in name:
                            patterns['kebab-case'].append({'name': name, 'file': rel_path, 'line': line})
                        elif name != name.lower():
                            patterns['camelCase'].append({'name': name, 'file': rel_path, 'line': line})

                except Exception:
                    continue

        # Calculate primary pattern
        pattern_counts = {k: len(v) for k, v in patterns.items()}
        primary = max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None

        # Find inconsistencies (patterns different from primary)
        inconsistencies = []
        if primary:
            for pattern_type, items in patterns.items():
                if pattern_type != primary and items:
                    inconsistencies.extend(items[:10])

        return {
            'pattern_counts': pattern_counts,
            'primary_pattern': primary,
            'inconsistencies': inconsistencies[:20],
        }