Files
dss/dss/analyze/styles.py
2025-12-11 07:13:06 -03:00

549 lines
20 KiB
Python

"""
Style Pattern Analyzer.
Detects and analyzes style patterns in code to identify:
- Hardcoded values that should be tokens
- Duplicate values across files
- Inconsistent naming patterns
- Unused styles
"""
import re
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List
from .base import Location, TokenCandidate
# Color patterns
HEX_COLOR = re.compile(r"#(?:[0-9a-fA-F]{3}){1,2}\b")
RGB_COLOR = re.compile(r"rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)")
HSL_COLOR = re.compile(r"hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)")
OKLCH_COLOR = re.compile(r"oklch\s*\([^)]+\)")
# Dimension patterns
PX_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*px\b")
REM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*rem\b")
EM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*em\b")
PERCENT_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*%\b")
# Font patterns
FONT_SIZE = re.compile(r"font-size\s*:\s*([^;]+)")
FONT_FAMILY = re.compile(r"font-family\s*:\s*([^;]+)")
FONT_WEIGHT = re.compile(r"font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)")
LINE_HEIGHT = re.compile(r"line-height\s*:\s*([^;]+)")
# Spacing patterns
MARGIN_PADDING = re.compile(r"(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)")
GAP = re.compile(r"gap\s*:\s*([^;]+)")
# Border patterns
BORDER_RADIUS = re.compile(r"border-radius\s*:\s*([^;]+)")
BORDER_WIDTH = re.compile(r"border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)")
# Shadow patterns
BOX_SHADOW = re.compile(r"box-shadow\s*:\s*([^;]+)")
# Z-index
Z_INDEX = re.compile(r"z-index\s*:\s*(\d+)")
@dataclass
class ValueOccurrence:
"""Tracks where a value appears."""
value: str
file: str
line: int
property: str # CSS property name
context: str # Surrounding code
class StyleAnalyzer:
"""
Analyzes style files and inline styles to find:
- Hardcoded values that should be tokens
- Duplicate values
- Inconsistent patterns
"""
def __init__(self, root_path: str):
self.root = Path(root_path).resolve()
self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
async def analyze(
self, include_inline: bool = True, include_css: bool = True
) -> Dict[str, Any]:
"""
Analyze all styles in the project.
Returns:
Dict with analysis results including duplicates and candidates
"""
# Reset collectors
self.values.clear()
self.color_values.clear()
self.spacing_values.clear()
self.font_values.clear()
# Scan CSS/SCSS files
if include_css:
await self._scan_style_files()
# Scan inline styles in JS/TS files
if include_inline:
await self._scan_inline_styles()
# Analyze results
duplicates = self._find_duplicates()
candidates = self._generate_token_candidates()
return {
"total_values_found": sum(len(v) for v in self.values.values()),
"unique_colors": len(self.color_values),
"unique_spacing": len(self.spacing_values),
"duplicates": duplicates,
"token_candidates": candidates,
}
async def _scan_style_files(self) -> None:
"""Scan CSS and SCSS files for values."""
skip_dirs = {"node_modules", ".git", "dist", "build"}
for pattern in ["**/*.css", "**/*.scss", "**/*.sass", "**/*.less"]:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding="utf-8", errors="ignore")
rel_path = str(file_path.relative_to(self.root))
self._extract_values_from_css(content, rel_path)
except Exception:
continue
async def _scan_inline_styles(self) -> None:
"""Scan JS/TS files for inline style values."""
skip_dirs = {"node_modules", ".git", "dist", "build"}
for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding="utf-8", errors="ignore")
rel_path = str(file_path.relative_to(self.root))
self._extract_values_from_jsx(content, rel_path)
except Exception:
continue
def _extract_values_from_css(self, content: str, file_path: str) -> None:
"""Extract style values from CSS content."""
lines = content.split("\n")
for line_num, line in enumerate(lines, 1):
# Skip comments and empty lines
if not line.strip() or line.strip().startswith("//") or line.strip().startswith("/*"):
continue
# Extract colors
for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]:
for match in pattern.finditer(line):
value = match.group(0).lower()
self._record_color(value, file_path, line_num, line.strip())
# Extract dimensions
for match in PX_VALUE.finditer(line):
value = f"{match.group(1)}px"
self._record_spacing(value, file_path, line_num, line.strip())
for match in REM_VALUE.finditer(line):
value = f"{match.group(1)}rem"
self._record_spacing(value, file_path, line_num, line.strip())
# Extract font properties
for match in FONT_SIZE.finditer(line):
value = match.group(1).strip()
self._record_font(value, file_path, line_num, "font-size", line.strip())
for match in FONT_WEIGHT.finditer(line):
value = match.group(1).strip()
self._record_font(value, file_path, line_num, "font-weight", line.strip())
# Extract z-index
for match in Z_INDEX.finditer(line):
value = match.group(1)
self._record_value(f"z-{value}", file_path, line_num, "z-index", line.strip())
def _extract_values_from_jsx(self, content: str, file_path: str) -> None:
"""Extract style values from JSX inline styles."""
# Find style={{ ... }} blocks
style_pattern = re.compile(r"style\s*=\s*\{\s*\{([^}]+)\}\s*\}", re.DOTALL)
for match in style_pattern.finditer(content):
style_content = match.group(1)
line_num = content[: match.start()].count("\n") + 1
# Parse the style object
# Look for property: value patterns
prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?')
for prop_match in prop_pattern.finditer(style_content):
prop_name = prop_match.group(1)
prop_value = prop_match.group(2).strip()
# Check for colors
if any(c in prop_name.lower() for c in ["color", "background"]):
if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value):
self._record_color(
prop_value.lower(), file_path, line_num, style_content[:100]
)
# Check for dimensions
if PX_VALUE.search(prop_value):
self._record_spacing(prop_value, file_path, line_num, style_content[:100])
if "fontSize" in prop_name or "fontWeight" in prop_name:
self._record_font(
prop_value, file_path, line_num, prop_name, style_content[:100]
)
def _record_color(self, value: str, file: str, line: int, context: str) -> None:
"""Record a color value occurrence."""
normalized = self._normalize_color(value)
self.color_values[normalized].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property="color",
context=context,
)
)
self.values[normalized].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property="color",
context=context,
)
)
def _record_spacing(self, value: str, file: str, line: int, context: str) -> None:
"""Record a spacing/dimension value occurrence."""
self.spacing_values[value].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property="spacing",
context=context,
)
)
self.values[value].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property="spacing",
context=context,
)
)
def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None:
"""Record a font-related value occurrence."""
self.font_values[value].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
)
)
self.values[value].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
)
)
def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None:
"""Record a generic value occurrence."""
self.values[value].append(
ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
)
)
def _normalize_color(self, color: str) -> str:
"""Normalize color value for comparison."""
color = color.lower().strip()
# Expand 3-digit hex to 6-digit
if re.match(r"^#[0-9a-f]{3}$", color):
color = f"#{color[1]*2}{color[2]*2}{color[3]*2}"
return color
def _find_duplicates(self) -> List[Dict[str, Any]]:
"""Find values that appear multiple times."""
duplicates = []
for value, occurrences in self.values.items():
if len(occurrences) >= 2:
# Get unique files
files = list(set(o.file for o in occurrences))
duplicates.append(
{
"value": value,
"count": len(occurrences),
"files": files[:5], # Limit to 5 files
"category": occurrences[0].property,
"locations": [{"file": o.file, "line": o.line} for o in occurrences[:5]],
}
)
# Sort by count (most duplicated first)
duplicates.sort(key=lambda x: x["count"], reverse=True)
return duplicates[:50] # Return top 50
def _generate_token_candidates(self) -> List[TokenCandidate]:
"""Generate token suggestions for repeated values."""
candidates = []
# Color candidates
for value, occurrences in self.color_values.items():
if len(occurrences) >= 2:
suggested_name = self._suggest_color_name(value)
candidates.append(
TokenCandidate(
value=value,
suggested_name=suggested_name,
category="colors",
occurrences=len(occurrences),
locations=[Location(o.file, o.line) for o in occurrences[:5]],
confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)),
)
)
# Spacing candidates
for value, occurrences in self.spacing_values.items():
if len(occurrences) >= 3: # Higher threshold for spacing
suggested_name = self._suggest_spacing_name(value)
candidates.append(
TokenCandidate(
value=value,
suggested_name=suggested_name,
category="spacing",
occurrences=len(occurrences),
locations=[Location(o.file, o.line) for o in occurrences[:5]],
confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)),
)
)
# Sort by confidence
candidates.sort(key=lambda x: x.confidence, reverse=True)
return candidates[:30] # Return top 30
def _suggest_color_name(self, color: str) -> str:
"""Suggest a token name for a color value."""
# Common color mappings
common_colors = {
"#ffffff": "color.white",
"#000000": "color.black",
"#f3f4f6": "color.neutral.100",
"#e5e7eb": "color.neutral.200",
"#d1d5db": "color.neutral.300",
"#9ca3af": "color.neutral.400",
"#6b7280": "color.neutral.500",
"#4b5563": "color.neutral.600",
"#374151": "color.neutral.700",
"#1f2937": "color.neutral.800",
"#111827": "color.neutral.900",
}
if color in common_colors:
return common_colors[color]
# Detect color family by hue (simplified)
if color.startswith("#"):
return f"color.custom.{color[1:7]}"
return "color.custom.value"
def _suggest_spacing_name(self, value: str) -> str:
"""Suggest a token name for a spacing value."""
# Common spacing values
spacing_map = {
"0px": "spacing.0",
"4px": "spacing.xs",
"8px": "spacing.sm",
"12px": "spacing.md",
"16px": "spacing.lg",
"20px": "spacing.lg",
"24px": "spacing.xl",
"32px": "spacing.2xl",
"48px": "spacing.3xl",
"64px": "spacing.4xl",
"0.25rem": "spacing.xs",
"0.5rem": "spacing.sm",
"0.75rem": "spacing.md",
"1rem": "spacing.lg",
"1.5rem": "spacing.xl",
"2rem": "spacing.2xl",
}
if value in spacing_map:
return spacing_map[value]
return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}"
async def find_unused_styles(self) -> List[Dict[str, Any]]:
"""
Find CSS classes/selectors that are not used in the codebase.
Returns list of potentially unused styles.
"""
# Collect all CSS class definitions
css_classes = set()
class_locations = {}
skip_dirs = {"node_modules", ".git", "dist", "build"}
for pattern in ["**/*.css", "**/*.scss"]:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding="utf-8", errors="ignore")
rel_path = str(file_path.relative_to(self.root))
# Find class definitions
for match in re.finditer(r"\.([a-zA-Z_][\w-]*)\s*[{,]", content):
class_name = match.group(1)
css_classes.add(class_name)
class_locations[class_name] = rel_path
except Exception:
continue
# Collect all class usage in JS/JSX/TS/TSX
used_classes = set()
for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding="utf-8", errors="ignore")
# Find className usage
for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content):
classes = match.group(1).split()
used_classes.update(classes)
# Find styles.xxx usage (CSS modules)
for match in re.finditer(r"styles\.(\w+)", content):
used_classes.add(match.group(1))
except Exception:
continue
# Find unused
unused = css_classes - used_classes
return [
{
"class": cls,
"file": class_locations.get(cls, "unknown"),
}
for cls in sorted(unused)
][
:50
] # Limit results
async def analyze_naming_consistency(self) -> Dict[str, Any]:
"""
Analyze naming consistency across style files.
Returns analysis of naming patterns and inconsistencies.
"""
patterns = {
"kebab-case": [], # my-class-name
"camelCase": [], # myClassName
"snake_case": [], # my_class_name
"BEM": [], # block__element--modifier
}
skip_dirs = {"node_modules", ".git", "dist", "build"}
for pattern in ["**/*.css", "**/*.scss"]:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding="utf-8", errors="ignore")
rel_path = str(file_path.relative_to(self.root))
# Find class names
for match in re.finditer(r"\.([a-zA-Z_][\w-]*)", content):
name = match.group(1)
line = content[: match.start()].count("\n") + 1
# Classify naming pattern
if "__" in name or "--" in name:
patterns["BEM"].append({"name": name, "file": rel_path, "line": line})
elif "_" in name:
patterns["snake_case"].append(
{"name": name, "file": rel_path, "line": line}
)
elif "-" in name:
patterns["kebab-case"].append(
{"name": name, "file": rel_path, "line": line}
)
elif name != name.lower():
patterns["camelCase"].append(
{"name": name, "file": rel_path, "line": line}
)
except Exception:
continue
# Calculate primary pattern
pattern_counts = {k: len(v) for k, v in patterns.items()}
primary = (
max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None
)
# Find inconsistencies (patterns different from primary)
inconsistencies = []
if primary:
for pattern_type, items in patterns.items():
if pattern_type != primary and items:
inconsistencies.extend(items[:10])
return {
"pattern_counts": pattern_counts,
"primary_pattern": primary,
"inconsistencies": inconsistencies[:20],
}