dss/dss/analyze/styles.py

"""
Style Pattern Analyzer.

Detects and analyzes style patterns in code to identify:
- Hardcoded values that should be tokens
- Duplicate values across files
- Inconsistent naming patterns
- Unused styles
"""

import re
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List

from .base import Location, TokenCandidate

# Color patterns
HEX_COLOR = re.compile(r"#(?:[0-9a-fA-F]{3}){1,2}\b")
RGB_COLOR = re.compile(r"rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)")
HSL_COLOR = re.compile(r"hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)")
OKLCH_COLOR = re.compile(r"oklch\s*\([^)]+\)")

# Dimension patterns
PX_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*px\b")
REM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*rem\b")
EM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*em\b")
PERCENT_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*%\b")

# Font patterns
FONT_SIZE = re.compile(r"font-size\s*:\s*([^;]+)")
FONT_FAMILY = re.compile(r"font-family\s*:\s*([^;]+)")
FONT_WEIGHT = re.compile(r"font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)")
LINE_HEIGHT = re.compile(r"line-height\s*:\s*([^;]+)")

# Spacing patterns
MARGIN_PADDING = re.compile(r"(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)")
GAP = re.compile(r"gap\s*:\s*([^;]+)")

# Border patterns
BORDER_RADIUS = re.compile(r"border-radius\s*:\s*([^;]+)")
BORDER_WIDTH = re.compile(r"border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)")

# Shadow patterns
BOX_SHADOW = re.compile(r"box-shadow\s*:\s*([^;]+)")

# Z-index
Z_INDEX = re.compile(r"z-index\s*:\s*(\d+)")


@dataclass
class ValueOccurrence:
    """Tracks where a value appears."""

    value: str
    file: str
    line: int
    property: str  # CSS property name
    context: str  # Surrounding code


class StyleAnalyzer:
    """
    Analyzes style files and inline styles to find:

    - Hardcoded values that should be tokens
    - Duplicate values
    - Inconsistent patterns
    """

    def __init__(self, root_path: str):
        self.root = Path(root_path).resolve()
        self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
        self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)

    async def analyze(
        self, include_inline: bool = True, include_css: bool = True
    ) -> Dict[str, Any]:
        """
        Analyze all styles in the project.

        Returns:
            Dict with analysis results including duplicates and candidates
        """
        # Reset collectors
        self.values.clear()
        self.color_values.clear()
        self.spacing_values.clear()
        self.font_values.clear()

        # Scan CSS/SCSS files
        if include_css:
            await self._scan_style_files()

        # Scan inline styles in JS/TS files
        if include_inline:
            await self._scan_inline_styles()

        # Analyze results
        duplicates = self._find_duplicates()
        candidates = self._generate_token_candidates()

        return {
            "total_values_found": sum(len(v) for v in self.values.values()),
            "unique_colors": len(self.color_values),
            "unique_spacing": len(self.spacing_values),
            "duplicates": duplicates,
            "token_candidates": candidates,
        }

    async def _scan_style_files(self) -> None:
        """Scan CSS and SCSS files for values."""
        skip_dirs = {"node_modules", ".git", "dist", "build"}

        for pattern in ["**/*.css", "**/*.scss", "**/*.sass", "**/*.less"]:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding="utf-8", errors="ignore")
                    rel_path = str(file_path.relative_to(self.root))
                    self._extract_values_from_css(content, rel_path)
                except Exception:
                    continue

    async def _scan_inline_styles(self) -> None:
        """Scan JS/TS files for inline style values."""
        skip_dirs = {"node_modules", ".git", "dist", "build"}

        for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding="utf-8", errors="ignore")
                    rel_path = str(file_path.relative_to(self.root))
                    self._extract_values_from_jsx(content, rel_path)
                except Exception:
                    continue

    def _extract_values_from_css(self, content: str, file_path: str) -> None:
        """Extract style values from CSS content."""
        lines = content.split("\n")

        for line_num, line in enumerate(lines, 1):
            # Skip comments and empty lines
            if not line.strip() or line.strip().startswith("//") or line.strip().startswith("/*"):
                continue

            # Extract colors
            for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]:
                for match in pattern.finditer(line):
                    value = match.group(0).lower()
                    self._record_color(value, file_path, line_num, line.strip())

            # Extract dimensions
            for match in PX_VALUE.finditer(line):
                value = f"{match.group(1)}px"
                self._record_spacing(value, file_path, line_num, line.strip())

            for match in REM_VALUE.finditer(line):
                value = f"{match.group(1)}rem"
                self._record_spacing(value, file_path, line_num, line.strip())

            # Extract font properties
            for match in FONT_SIZE.finditer(line):
                value = match.group(1).strip()
                self._record_font(value, file_path, line_num, "font-size", line.strip())

            for match in FONT_WEIGHT.finditer(line):
                value = match.group(1).strip()
                self._record_font(value, file_path, line_num, "font-weight", line.strip())

            # Extract z-index
            for match in Z_INDEX.finditer(line):
                value = match.group(1)
                self._record_value(f"z-{value}", file_path, line_num, "z-index", line.strip())

    def _extract_values_from_jsx(self, content: str, file_path: str) -> None:
        """Extract style values from JSX inline styles."""
        # Find style={{ ... }} blocks
        style_pattern = re.compile(r"style\s*=\s*\{\s*\{([^}]+)\}\s*\}", re.DOTALL)

        for match in style_pattern.finditer(content):
            style_content = match.group(1)
            line_num = content[: match.start()].count("\n") + 1

            # Parse the style object
            # Look for property: value patterns
            prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?')

            for prop_match in prop_pattern.finditer(style_content):
                prop_name = prop_match.group(1)
                prop_value = prop_match.group(2).strip()

                # Check for colors
                if any(c in prop_name.lower() for c in ["color", "background"]):
                    if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value):
                        self._record_color(
                            prop_value.lower(), file_path, line_num, style_content[:100]
                        )

                # Check for dimensions
                if PX_VALUE.search(prop_value):
                    self._record_spacing(prop_value, file_path, line_num, style_content[:100])

                if "fontSize" in prop_name or "fontWeight" in prop_name:
                    self._record_font(
                        prop_value, file_path, line_num, prop_name, style_content[:100]
                    )

    def _record_color(self, value: str, file: str, line: int, context: str) -> None:
        """Record a color value occurrence."""
        normalized = self._normalize_color(value)
        self.color_values[normalized].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property="color",
                context=context,
            )
        )
        self.values[normalized].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property="color",
                context=context,
            )
        )

    def _record_spacing(self, value: str, file: str, line: int, context: str) -> None:
        """Record a spacing/dimension value occurrence."""
        self.spacing_values[value].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property="spacing",
                context=context,
            )
        )
        self.values[value].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property="spacing",
                context=context,
            )
        )

    def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None:
        """Record a font-related value occurrence."""
        self.font_values[value].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property=prop,
                context=context,
            )
        )
        self.values[value].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property=prop,
                context=context,
            )
        )

    def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None:
        """Record a generic value occurrence."""
        self.values[value].append(
            ValueOccurrence(
                value=value,
                file=file,
                line=line,
                property=prop,
                context=context,
            )
        )

    def _normalize_color(self, color: str) -> str:
        """Normalize color value for comparison."""
        color = color.lower().strip()
        # Expand 3-digit hex to 6-digit
        if re.match(r"^#[0-9a-f]{3}$", color):
            color = f"#{color[1]*2}{color[2]*2}{color[3]*2}"
        return color

    def _find_duplicates(self) -> List[Dict[str, Any]]:
        """Find values that appear multiple times."""
        duplicates = []

        for value, occurrences in self.values.items():
            if len(occurrences) >= 2:
                # Get unique files
                files = list(set(o.file for o in occurrences))

                duplicates.append(
                    {
                        "value": value,
                        "count": len(occurrences),
                        "files": files[:5],  # Limit to 5 files
                        "category": occurrences[0].property,
                        "locations": [{"file": o.file, "line": o.line} for o in occurrences[:5]],
                    }
                )

        # Sort by count (most duplicated first)
        duplicates.sort(key=lambda x: x["count"], reverse=True)

        return duplicates[:50]  # Return top 50

    def _generate_token_candidates(self) -> List[TokenCandidate]:
        """Generate token suggestions for repeated values."""
        candidates = []

        # Color candidates
        for value, occurrences in self.color_values.items():
            if len(occurrences) >= 2:
                suggested_name = self._suggest_color_name(value)
                candidates.append(
                    TokenCandidate(
                        value=value,
                        suggested_name=suggested_name,
                        category="colors",
                        occurrences=len(occurrences),
                        locations=[Location(o.file, o.line) for o in occurrences[:5]],
                        confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)),
                    )
                )

        # Spacing candidates
        for value, occurrences in self.spacing_values.items():
            if len(occurrences) >= 3:  # Higher threshold for spacing
                suggested_name = self._suggest_spacing_name(value)
                candidates.append(
                    TokenCandidate(
                        value=value,
                        suggested_name=suggested_name,
                        category="spacing",
                        occurrences=len(occurrences),
                        locations=[Location(o.file, o.line) for o in occurrences[:5]],
                        confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)),
                    )
                )

        # Sort by confidence
        candidates.sort(key=lambda x: x.confidence, reverse=True)

        return candidates[:30]  # Return top 30

    def _suggest_color_name(self, color: str) -> str:
        """Suggest a token name for a color value."""
        # Common color mappings
        common_colors = {
            "#ffffff": "color.white",
            "#000000": "color.black",
            "#f3f4f6": "color.neutral.100",
            "#e5e7eb": "color.neutral.200",
            "#d1d5db": "color.neutral.300",
            "#9ca3af": "color.neutral.400",
            "#6b7280": "color.neutral.500",
            "#4b5563": "color.neutral.600",
            "#374151": "color.neutral.700",
            "#1f2937": "color.neutral.800",
            "#111827": "color.neutral.900",
        }

        if color in common_colors:
            return common_colors[color]

        # Detect color family by hue (simplified)
        if color.startswith("#"):
            return f"color.custom.{color[1:7]}"

        return "color.custom.value"

    def _suggest_spacing_name(self, value: str) -> str:
        """Suggest a token name for a spacing value."""
        # Common spacing values
        spacing_map = {
            "0px": "spacing.0",
            "4px": "spacing.xs",
            "8px": "spacing.sm",
            "12px": "spacing.md",
            "16px": "spacing.lg",
            "20px": "spacing.lg",
            "24px": "spacing.xl",
            "32px": "spacing.2xl",
            "48px": "spacing.3xl",
            "64px": "spacing.4xl",
            "0.25rem": "spacing.xs",
            "0.5rem": "spacing.sm",
            "0.75rem": "spacing.md",
            "1rem": "spacing.lg",
            "1.5rem": "spacing.xl",
            "2rem": "spacing.2xl",
        }

        if value in spacing_map:
            return spacing_map[value]

        return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}"

    async def find_unused_styles(self) -> List[Dict[str, Any]]:
        """
        Find CSS classes/selectors that are not used in the codebase.

        Returns list of potentially unused styles.
        """
        # Collect all CSS class definitions
        css_classes = set()
        class_locations = {}

        skip_dirs = {"node_modules", ".git", "dist", "build"}

        for pattern in ["**/*.css", "**/*.scss"]:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding="utf-8", errors="ignore")
                    rel_path = str(file_path.relative_to(self.root))

                    # Find class definitions
                    for match in re.finditer(r"\.([a-zA-Z_][\w-]*)\s*[{,]", content):
                        class_name = match.group(1)
                        css_classes.add(class_name)
                        class_locations[class_name] = rel_path

                except Exception:
                    continue

        # Collect all class usage in JS/JSX/TS/TSX
        used_classes = set()

        for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding="utf-8", errors="ignore")

                    # Find className usage
                    for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content):
                        classes = match.group(1).split()
                        used_classes.update(classes)

                    # Find styles.xxx usage (CSS modules)
                    for match in re.finditer(r"styles\.(\w+)", content):
                        used_classes.add(match.group(1))

                except Exception:
                    continue

        # Find unused
        unused = css_classes - used_classes

        return [
            {
                "class": cls,
                "file": class_locations.get(cls, "unknown"),
            }
            for cls in sorted(unused)
        ][
            :50
        ]  # Limit results

    async def analyze_naming_consistency(self) -> Dict[str, Any]:
        """
        Analyze naming consistency across style files.

        Returns analysis of naming patterns and inconsistencies.
        """
        patterns = {
            "kebab-case": [],  # my-class-name
            "camelCase": [],  # myClassName
            "snake_case": [],  # my_class_name
            "BEM": [],  # block__element--modifier
        }

        skip_dirs = {"node_modules", ".git", "dist", "build"}

        for pattern in ["**/*.css", "**/*.scss"]:
            for file_path in self.root.rglob(pattern):
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                try:
                    content = file_path.read_text(encoding="utf-8", errors="ignore")
                    rel_path = str(file_path.relative_to(self.root))

                    # Find class names
                    for match in re.finditer(r"\.([a-zA-Z_][\w-]*)", content):
                        name = match.group(1)
                        line = content[: match.start()].count("\n") + 1

                        # Classify naming pattern
                        if "__" in name or "--" in name:
                            patterns["BEM"].append({"name": name, "file": rel_path, "line": line})
                        elif "_" in name:
                            patterns["snake_case"].append(
                                {"name": name, "file": rel_path, "line": line}
                            )
                        elif "-" in name:
                            patterns["kebab-case"].append(
                                {"name": name, "file": rel_path, "line": line}
                            )
                        elif name != name.lower():
                            patterns["camelCase"].append(
                                {"name": name, "file": rel_path, "line": line}
                            )

                except Exception:
                    continue

        # Calculate primary pattern
        pattern_counts = {k: len(v) for k, v in patterns.items()}
        primary = (
            max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None
        )

        # Find inconsistencies (patterns different from primary)
        inconsistencies = []
        if primary:
            for pattern_type, items in patterns.items():
                if pattern_type != primary and items:
                    inconsistencies.extend(items[:10])

        return {
            "pattern_counts": pattern_counts,
            "primary_pattern": primary,
            "inconsistencies": inconsistencies[:20],
        }