549 lines
20 KiB
Python
549 lines
20 KiB
Python
"""
|
|
Style Pattern Analyzer.
|
|
|
|
Detects and analyzes style patterns in code to identify:
|
|
- Hardcoded values that should be tokens
|
|
- Duplicate values across files
|
|
- Inconsistent naming patterns
|
|
- Unused styles
|
|
"""
|
|
|
|
import re
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
|
|
from .base import Location, TokenCandidate
|
|
|
|
# Color patterns
|
|
HEX_COLOR = re.compile(r"#(?:[0-9a-fA-F]{3}){1,2}\b")
|
|
RGB_COLOR = re.compile(r"rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)")
|
|
HSL_COLOR = re.compile(r"hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)")
|
|
OKLCH_COLOR = re.compile(r"oklch\s*\([^)]+\)")
|
|
|
|
# Dimension patterns
|
|
PX_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*px\b")
|
|
REM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*rem\b")
|
|
EM_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*em\b")
|
|
PERCENT_VALUE = re.compile(r"\b(\d+(?:\.\d+)?)\s*%\b")
|
|
|
|
# Font patterns
|
|
FONT_SIZE = re.compile(r"font-size\s*:\s*([^;]+)")
|
|
FONT_FAMILY = re.compile(r"font-family\s*:\s*([^;]+)")
|
|
FONT_WEIGHT = re.compile(r"font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)")
|
|
LINE_HEIGHT = re.compile(r"line-height\s*:\s*([^;]+)")
|
|
|
|
# Spacing patterns
|
|
MARGIN_PADDING = re.compile(r"(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)")
|
|
GAP = re.compile(r"gap\s*:\s*([^;]+)")
|
|
|
|
# Border patterns
|
|
BORDER_RADIUS = re.compile(r"border-radius\s*:\s*([^;]+)")
|
|
BORDER_WIDTH = re.compile(r"border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)")
|
|
|
|
# Shadow patterns
|
|
BOX_SHADOW = re.compile(r"box-shadow\s*:\s*([^;]+)")
|
|
|
|
# Z-index
|
|
Z_INDEX = re.compile(r"z-index\s*:\s*(\d+)")
|
|
|
|
|
|
@dataclass
|
|
class ValueOccurrence:
|
|
"""Tracks where a value appears."""
|
|
|
|
value: str
|
|
file: str
|
|
line: int
|
|
property: str # CSS property name
|
|
context: str # Surrounding code
|
|
|
|
|
|
class StyleAnalyzer:
|
|
"""
|
|
Analyzes style files and inline styles to find:
|
|
|
|
- Hardcoded values that should be tokens
|
|
- Duplicate values
|
|
- Inconsistent patterns
|
|
"""
|
|
|
|
def __init__(self, root_path: str):
|
|
self.root = Path(root_path).resolve()
|
|
self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
|
|
self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
|
|
self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
|
|
self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
|
|
|
|
async def analyze(
|
|
self, include_inline: bool = True, include_css: bool = True
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze all styles in the project.
|
|
|
|
Returns:
|
|
Dict with analysis results including duplicates and candidates
|
|
"""
|
|
# Reset collectors
|
|
self.values.clear()
|
|
self.color_values.clear()
|
|
self.spacing_values.clear()
|
|
self.font_values.clear()
|
|
|
|
# Scan CSS/SCSS files
|
|
if include_css:
|
|
await self._scan_style_files()
|
|
|
|
# Scan inline styles in JS/TS files
|
|
if include_inline:
|
|
await self._scan_inline_styles()
|
|
|
|
# Analyze results
|
|
duplicates = self._find_duplicates()
|
|
candidates = self._generate_token_candidates()
|
|
|
|
return {
|
|
"total_values_found": sum(len(v) for v in self.values.values()),
|
|
"unique_colors": len(self.color_values),
|
|
"unique_spacing": len(self.spacing_values),
|
|
"duplicates": duplicates,
|
|
"token_candidates": candidates,
|
|
}
|
|
|
|
async def _scan_style_files(self) -> None:
|
|
"""Scan CSS and SCSS files for values."""
|
|
skip_dirs = {"node_modules", ".git", "dist", "build"}
|
|
|
|
for pattern in ["**/*.css", "**/*.scss", "**/*.sass", "**/*.less"]:
|
|
for file_path in self.root.rglob(pattern):
|
|
if any(skip in file_path.parts for skip in skip_dirs):
|
|
continue
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
rel_path = str(file_path.relative_to(self.root))
|
|
self._extract_values_from_css(content, rel_path)
|
|
except Exception:
|
|
continue
|
|
|
|
async def _scan_inline_styles(self) -> None:
|
|
"""Scan JS/TS files for inline style values."""
|
|
skip_dirs = {"node_modules", ".git", "dist", "build"}
|
|
|
|
for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
|
|
for file_path in self.root.rglob(pattern):
|
|
if any(skip in file_path.parts for skip in skip_dirs):
|
|
continue
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
rel_path = str(file_path.relative_to(self.root))
|
|
self._extract_values_from_jsx(content, rel_path)
|
|
except Exception:
|
|
continue
|
|
|
|
def _extract_values_from_css(self, content: str, file_path: str) -> None:
|
|
"""Extract style values from CSS content."""
|
|
lines = content.split("\n")
|
|
|
|
for line_num, line in enumerate(lines, 1):
|
|
# Skip comments and empty lines
|
|
if not line.strip() or line.strip().startswith("//") or line.strip().startswith("/*"):
|
|
continue
|
|
|
|
# Extract colors
|
|
for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]:
|
|
for match in pattern.finditer(line):
|
|
value = match.group(0).lower()
|
|
self._record_color(value, file_path, line_num, line.strip())
|
|
|
|
# Extract dimensions
|
|
for match in PX_VALUE.finditer(line):
|
|
value = f"{match.group(1)}px"
|
|
self._record_spacing(value, file_path, line_num, line.strip())
|
|
|
|
for match in REM_VALUE.finditer(line):
|
|
value = f"{match.group(1)}rem"
|
|
self._record_spacing(value, file_path, line_num, line.strip())
|
|
|
|
# Extract font properties
|
|
for match in FONT_SIZE.finditer(line):
|
|
value = match.group(1).strip()
|
|
self._record_font(value, file_path, line_num, "font-size", line.strip())
|
|
|
|
for match in FONT_WEIGHT.finditer(line):
|
|
value = match.group(1).strip()
|
|
self._record_font(value, file_path, line_num, "font-weight", line.strip())
|
|
|
|
# Extract z-index
|
|
for match in Z_INDEX.finditer(line):
|
|
value = match.group(1)
|
|
self._record_value(f"z-{value}", file_path, line_num, "z-index", line.strip())
|
|
|
|
def _extract_values_from_jsx(self, content: str, file_path: str) -> None:
|
|
"""Extract style values from JSX inline styles."""
|
|
# Find style={{ ... }} blocks
|
|
style_pattern = re.compile(r"style\s*=\s*\{\s*\{([^}]+)\}\s*\}", re.DOTALL)
|
|
|
|
for match in style_pattern.finditer(content):
|
|
style_content = match.group(1)
|
|
line_num = content[: match.start()].count("\n") + 1
|
|
|
|
# Parse the style object
|
|
# Look for property: value patterns
|
|
prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?')
|
|
|
|
for prop_match in prop_pattern.finditer(style_content):
|
|
prop_name = prop_match.group(1)
|
|
prop_value = prop_match.group(2).strip()
|
|
|
|
# Check for colors
|
|
if any(c in prop_name.lower() for c in ["color", "background"]):
|
|
if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value):
|
|
self._record_color(
|
|
prop_value.lower(), file_path, line_num, style_content[:100]
|
|
)
|
|
|
|
# Check for dimensions
|
|
if PX_VALUE.search(prop_value):
|
|
self._record_spacing(prop_value, file_path, line_num, style_content[:100])
|
|
|
|
if "fontSize" in prop_name or "fontWeight" in prop_name:
|
|
self._record_font(
|
|
prop_value, file_path, line_num, prop_name, style_content[:100]
|
|
)
|
|
|
|
def _record_color(self, value: str, file: str, line: int, context: str) -> None:
|
|
"""Record a color value occurrence."""
|
|
normalized = self._normalize_color(value)
|
|
self.color_values[normalized].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property="color",
|
|
context=context,
|
|
)
|
|
)
|
|
self.values[normalized].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property="color",
|
|
context=context,
|
|
)
|
|
)
|
|
|
|
def _record_spacing(self, value: str, file: str, line: int, context: str) -> None:
|
|
"""Record a spacing/dimension value occurrence."""
|
|
self.spacing_values[value].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property="spacing",
|
|
context=context,
|
|
)
|
|
)
|
|
self.values[value].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property="spacing",
|
|
context=context,
|
|
)
|
|
)
|
|
|
|
def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None:
|
|
"""Record a font-related value occurrence."""
|
|
self.font_values[value].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property=prop,
|
|
context=context,
|
|
)
|
|
)
|
|
self.values[value].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property=prop,
|
|
context=context,
|
|
)
|
|
)
|
|
|
|
def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None:
|
|
"""Record a generic value occurrence."""
|
|
self.values[value].append(
|
|
ValueOccurrence(
|
|
value=value,
|
|
file=file,
|
|
line=line,
|
|
property=prop,
|
|
context=context,
|
|
)
|
|
)
|
|
|
|
def _normalize_color(self, color: str) -> str:
|
|
"""Normalize color value for comparison."""
|
|
color = color.lower().strip()
|
|
# Expand 3-digit hex to 6-digit
|
|
if re.match(r"^#[0-9a-f]{3}$", color):
|
|
color = f"#{color[1]*2}{color[2]*2}{color[3]*2}"
|
|
return color
|
|
|
|
def _find_duplicates(self) -> List[Dict[str, Any]]:
|
|
"""Find values that appear multiple times."""
|
|
duplicates = []
|
|
|
|
for value, occurrences in self.values.items():
|
|
if len(occurrences) >= 2:
|
|
# Get unique files
|
|
files = list(set(o.file for o in occurrences))
|
|
|
|
duplicates.append(
|
|
{
|
|
"value": value,
|
|
"count": len(occurrences),
|
|
"files": files[:5], # Limit to 5 files
|
|
"category": occurrences[0].property,
|
|
"locations": [{"file": o.file, "line": o.line} for o in occurrences[:5]],
|
|
}
|
|
)
|
|
|
|
# Sort by count (most duplicated first)
|
|
duplicates.sort(key=lambda x: x["count"], reverse=True)
|
|
|
|
return duplicates[:50] # Return top 50
|
|
|
|
def _generate_token_candidates(self) -> List[TokenCandidate]:
|
|
"""Generate token suggestions for repeated values."""
|
|
candidates = []
|
|
|
|
# Color candidates
|
|
for value, occurrences in self.color_values.items():
|
|
if len(occurrences) >= 2:
|
|
suggested_name = self._suggest_color_name(value)
|
|
candidates.append(
|
|
TokenCandidate(
|
|
value=value,
|
|
suggested_name=suggested_name,
|
|
category="colors",
|
|
occurrences=len(occurrences),
|
|
locations=[Location(o.file, o.line) for o in occurrences[:5]],
|
|
confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)),
|
|
)
|
|
)
|
|
|
|
# Spacing candidates
|
|
for value, occurrences in self.spacing_values.items():
|
|
if len(occurrences) >= 3: # Higher threshold for spacing
|
|
suggested_name = self._suggest_spacing_name(value)
|
|
candidates.append(
|
|
TokenCandidate(
|
|
value=value,
|
|
suggested_name=suggested_name,
|
|
category="spacing",
|
|
occurrences=len(occurrences),
|
|
locations=[Location(o.file, o.line) for o in occurrences[:5]],
|
|
confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)),
|
|
)
|
|
)
|
|
|
|
# Sort by confidence
|
|
candidates.sort(key=lambda x: x.confidence, reverse=True)
|
|
|
|
return candidates[:30] # Return top 30
|
|
|
|
def _suggest_color_name(self, color: str) -> str:
|
|
"""Suggest a token name for a color value."""
|
|
# Common color mappings
|
|
common_colors = {
|
|
"#ffffff": "color.white",
|
|
"#000000": "color.black",
|
|
"#f3f4f6": "color.neutral.100",
|
|
"#e5e7eb": "color.neutral.200",
|
|
"#d1d5db": "color.neutral.300",
|
|
"#9ca3af": "color.neutral.400",
|
|
"#6b7280": "color.neutral.500",
|
|
"#4b5563": "color.neutral.600",
|
|
"#374151": "color.neutral.700",
|
|
"#1f2937": "color.neutral.800",
|
|
"#111827": "color.neutral.900",
|
|
}
|
|
|
|
if color in common_colors:
|
|
return common_colors[color]
|
|
|
|
# Detect color family by hue (simplified)
|
|
if color.startswith("#"):
|
|
return f"color.custom.{color[1:7]}"
|
|
|
|
return "color.custom.value"
|
|
|
|
def _suggest_spacing_name(self, value: str) -> str:
|
|
"""Suggest a token name for a spacing value."""
|
|
# Common spacing values
|
|
spacing_map = {
|
|
"0px": "spacing.0",
|
|
"4px": "spacing.xs",
|
|
"8px": "spacing.sm",
|
|
"12px": "spacing.md",
|
|
"16px": "spacing.lg",
|
|
"20px": "spacing.lg",
|
|
"24px": "spacing.xl",
|
|
"32px": "spacing.2xl",
|
|
"48px": "spacing.3xl",
|
|
"64px": "spacing.4xl",
|
|
"0.25rem": "spacing.xs",
|
|
"0.5rem": "spacing.sm",
|
|
"0.75rem": "spacing.md",
|
|
"1rem": "spacing.lg",
|
|
"1.5rem": "spacing.xl",
|
|
"2rem": "spacing.2xl",
|
|
}
|
|
|
|
if value in spacing_map:
|
|
return spacing_map[value]
|
|
|
|
return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}"
|
|
|
|
async def find_unused_styles(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Find CSS classes/selectors that are not used in the codebase.
|
|
|
|
Returns list of potentially unused styles.
|
|
"""
|
|
# Collect all CSS class definitions
|
|
css_classes = set()
|
|
class_locations = {}
|
|
|
|
skip_dirs = {"node_modules", ".git", "dist", "build"}
|
|
|
|
for pattern in ["**/*.css", "**/*.scss"]:
|
|
for file_path in self.root.rglob(pattern):
|
|
if any(skip in file_path.parts for skip in skip_dirs):
|
|
continue
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
rel_path = str(file_path.relative_to(self.root))
|
|
|
|
# Find class definitions
|
|
for match in re.finditer(r"\.([a-zA-Z_][\w-]*)\s*[{,]", content):
|
|
class_name = match.group(1)
|
|
css_classes.add(class_name)
|
|
class_locations[class_name] = rel_path
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
# Collect all class usage in JS/JSX/TS/TSX
|
|
used_classes = set()
|
|
|
|
for pattern in ["**/*.jsx", "**/*.tsx", "**/*.js", "**/*.ts"]:
|
|
for file_path in self.root.rglob(pattern):
|
|
if any(skip in file_path.parts for skip in skip_dirs):
|
|
continue
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
# Find className usage
|
|
for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content):
|
|
classes = match.group(1).split()
|
|
used_classes.update(classes)
|
|
|
|
# Find styles.xxx usage (CSS modules)
|
|
for match in re.finditer(r"styles\.(\w+)", content):
|
|
used_classes.add(match.group(1))
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
# Find unused
|
|
unused = css_classes - used_classes
|
|
|
|
return [
|
|
{
|
|
"class": cls,
|
|
"file": class_locations.get(cls, "unknown"),
|
|
}
|
|
for cls in sorted(unused)
|
|
][
|
|
:50
|
|
] # Limit results
|
|
|
|
async def analyze_naming_consistency(self) -> Dict[str, Any]:
|
|
"""
|
|
Analyze naming consistency across style files.
|
|
|
|
Returns analysis of naming patterns and inconsistencies.
|
|
"""
|
|
patterns = {
|
|
"kebab-case": [], # my-class-name
|
|
"camelCase": [], # myClassName
|
|
"snake_case": [], # my_class_name
|
|
"BEM": [], # block__element--modifier
|
|
}
|
|
|
|
skip_dirs = {"node_modules", ".git", "dist", "build"}
|
|
|
|
for pattern in ["**/*.css", "**/*.scss"]:
|
|
for file_path in self.root.rglob(pattern):
|
|
if any(skip in file_path.parts for skip in skip_dirs):
|
|
continue
|
|
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
rel_path = str(file_path.relative_to(self.root))
|
|
|
|
# Find class names
|
|
for match in re.finditer(r"\.([a-zA-Z_][\w-]*)", content):
|
|
name = match.group(1)
|
|
line = content[: match.start()].count("\n") + 1
|
|
|
|
# Classify naming pattern
|
|
if "__" in name or "--" in name:
|
|
patterns["BEM"].append({"name": name, "file": rel_path, "line": line})
|
|
elif "_" in name:
|
|
patterns["snake_case"].append(
|
|
{"name": name, "file": rel_path, "line": line}
|
|
)
|
|
elif "-" in name:
|
|
patterns["kebab-case"].append(
|
|
{"name": name, "file": rel_path, "line": line}
|
|
)
|
|
elif name != name.lower():
|
|
patterns["camelCase"].append(
|
|
{"name": name, "file": rel_path, "line": line}
|
|
)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
# Calculate primary pattern
|
|
pattern_counts = {k: len(v) for k, v in patterns.items()}
|
|
primary = (
|
|
max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None
|
|
)
|
|
|
|
# Find inconsistencies (patterns different from primary)
|
|
inconsistencies = []
|
|
if primary:
|
|
for pattern_type, items in patterns.items():
|
|
if pattern_type != primary and items:
|
|
inconsistencies.extend(items[:10])
|
|
|
|
return {
|
|
"pattern_counts": pattern_counts,
|
|
"primary_pattern": primary,
|
|
"inconsistencies": inconsistencies[:20],
|
|
}
|