Files
dss/demo/tools/analyze/styles.py
Digital Production Factory 276ed71f31 Initial commit: Clean DSS implementation
Migrated from design-system-swarm with fresh git history.
Old project history preserved in /home/overbits/apps/design-system-swarm

Core components:
- MCP Server (Python FastAPI with mcp 1.23.1)
- Claude Plugin (agents, commands, skills, strategies, hooks, core)
- DSS Backend (dss-mvp1 - token translation, Figma sync)
- Admin UI (Node.js/React)
- Server (Node.js/Express)
- Storybook integration (dss-mvp1/.storybook)

Self-contained configuration:
- All paths relative or use DSS_BASE_PATH=/home/overbits/dss
- PYTHONPATH configured for dss-mvp1 and dss-claude-plugin
- .env file with all configuration
- Claude plugin uses ${CLAUDE_PLUGIN_ROOT} for portability

Migration completed: $(date)
🤖 Clean migration with full functionality preserved
2025-12-09 18:45:48 -03:00

528 lines
19 KiB
Python

"""
Style Pattern Analyzer
Detects and analyzes style patterns in code to identify:
- Hardcoded values that should be tokens
- Duplicate values across files
- Inconsistent naming patterns
- Unused styles
"""
import re
from pathlib import Path
from typing import List, Dict, Any, Optional, Set, Tuple
from collections import defaultdict
from dataclasses import dataclass, field
from .base import (
Location,
TokenCandidate,
StylePattern,
StylingApproach,
)
# Color patterns
HEX_COLOR = re.compile(r'#(?:[0-9a-fA-F]{3}){1,2}\b')
RGB_COLOR = re.compile(r'rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)')
HSL_COLOR = re.compile(r'hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)')
OKLCH_COLOR = re.compile(r'oklch\s*\([^)]+\)')
# Dimension patterns
PX_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*px\b')
REM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*rem\b')
EM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*em\b')
PERCENT_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*%\b')
# Font patterns
FONT_SIZE = re.compile(r'font-size\s*:\s*([^;]+)')
FONT_FAMILY = re.compile(r'font-family\s*:\s*([^;]+)')
FONT_WEIGHT = re.compile(r'font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)')
LINE_HEIGHT = re.compile(r'line-height\s*:\s*([^;]+)')
# Spacing patterns
MARGIN_PADDING = re.compile(r'(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)')
GAP = re.compile(r'gap\s*:\s*([^;]+)')
# Border patterns
BORDER_RADIUS = re.compile(r'border-radius\s*:\s*([^;]+)')
BORDER_WIDTH = re.compile(r'border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)')
# Shadow patterns
BOX_SHADOW = re.compile(r'box-shadow\s*:\s*([^;]+)')
# Z-index
Z_INDEX = re.compile(r'z-index\s*:\s*(\d+)')
@dataclass
class ValueOccurrence:
"""Tracks where a value appears."""
value: str
file: str
line: int
property: str # CSS property name
context: str # Surrounding code
class StyleAnalyzer:
"""
Analyzes style files and inline styles to find:
- Hardcoded values that should be tokens
- Duplicate values
- Inconsistent patterns
"""
def __init__(self, root_path: str):
self.root = Path(root_path).resolve()
self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list)
async def analyze(
self,
include_inline: bool = True,
include_css: bool = True
) -> Dict[str, Any]:
"""
Analyze all styles in the project.
Returns:
Dict with analysis results including duplicates and candidates
"""
# Reset collectors
self.values.clear()
self.color_values.clear()
self.spacing_values.clear()
self.font_values.clear()
# Scan CSS/SCSS files
if include_css:
await self._scan_style_files()
# Scan inline styles in JS/TS files
if include_inline:
await self._scan_inline_styles()
# Analyze results
duplicates = self._find_duplicates()
candidates = self._generate_token_candidates()
return {
'total_values_found': sum(len(v) for v in self.values.values()),
'unique_colors': len(self.color_values),
'unique_spacing': len(self.spacing_values),
'duplicates': duplicates,
'token_candidates': candidates,
}
async def _scan_style_files(self) -> None:
"""Scan CSS and SCSS files for values."""
skip_dirs = {'node_modules', '.git', 'dist', 'build'}
for pattern in ['**/*.css', '**/*.scss', '**/*.sass', '**/*.less']:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
rel_path = str(file_path.relative_to(self.root))
self._extract_values_from_css(content, rel_path)
except Exception:
continue
async def _scan_inline_styles(self) -> None:
"""Scan JS/TS files for inline style values."""
skip_dirs = {'node_modules', '.git', 'dist', 'build'}
for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
rel_path = str(file_path.relative_to(self.root))
self._extract_values_from_jsx(content, rel_path)
except Exception:
continue
def _extract_values_from_css(self, content: str, file_path: str) -> None:
"""Extract style values from CSS content."""
lines = content.split('\n')
for line_num, line in enumerate(lines, 1):
# Skip comments and empty lines
if not line.strip() or line.strip().startswith('//') or line.strip().startswith('/*'):
continue
# Extract colors
for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]:
for match in pattern.finditer(line):
value = match.group(0).lower()
self._record_color(value, file_path, line_num, line.strip())
# Extract dimensions
for match in PX_VALUE.finditer(line):
value = f"{match.group(1)}px"
self._record_spacing(value, file_path, line_num, line.strip())
for match in REM_VALUE.finditer(line):
value = f"{match.group(1)}rem"
self._record_spacing(value, file_path, line_num, line.strip())
# Extract font properties
for match in FONT_SIZE.finditer(line):
value = match.group(1).strip()
self._record_font(value, file_path, line_num, 'font-size', line.strip())
for match in FONT_WEIGHT.finditer(line):
value = match.group(1).strip()
self._record_font(value, file_path, line_num, 'font-weight', line.strip())
# Extract z-index
for match in Z_INDEX.finditer(line):
value = match.group(1)
self._record_value(f"z-{value}", file_path, line_num, 'z-index', line.strip())
def _extract_values_from_jsx(self, content: str, file_path: str) -> None:
"""Extract style values from JSX inline styles."""
# Find style={{ ... }} blocks
style_pattern = re.compile(r'style\s*=\s*\{\s*\{([^}]+)\}\s*\}', re.DOTALL)
for match in style_pattern.finditer(content):
style_content = match.group(1)
line_num = content[:match.start()].count('\n') + 1
# Parse the style object
# Look for property: value patterns
prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?')
for prop_match in prop_pattern.finditer(style_content):
prop_name = prop_match.group(1)
prop_value = prop_match.group(2).strip()
# Check for colors
if any(c in prop_name.lower() for c in ['color', 'background']):
if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value):
self._record_color(prop_value.lower(), file_path, line_num, style_content[:100])
# Check for dimensions
if PX_VALUE.search(prop_value):
self._record_spacing(prop_value, file_path, line_num, style_content[:100])
if 'fontSize' in prop_name or 'fontWeight' in prop_name:
self._record_font(prop_value, file_path, line_num, prop_name, style_content[:100])
def _record_color(self, value: str, file: str, line: int, context: str) -> None:
"""Record a color value occurrence."""
normalized = self._normalize_color(value)
self.color_values[normalized].append(ValueOccurrence(
value=value,
file=file,
line=line,
property='color',
context=context,
))
self.values[normalized].append(ValueOccurrence(
value=value,
file=file,
line=line,
property='color',
context=context,
))
def _record_spacing(self, value: str, file: str, line: int, context: str) -> None:
"""Record a spacing/dimension value occurrence."""
self.spacing_values[value].append(ValueOccurrence(
value=value,
file=file,
line=line,
property='spacing',
context=context,
))
self.values[value].append(ValueOccurrence(
value=value,
file=file,
line=line,
property='spacing',
context=context,
))
def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None:
"""Record a font-related value occurrence."""
self.font_values[value].append(ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
))
self.values[value].append(ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
))
def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None:
"""Record a generic value occurrence."""
self.values[value].append(ValueOccurrence(
value=value,
file=file,
line=line,
property=prop,
context=context,
))
def _normalize_color(self, color: str) -> str:
"""Normalize color value for comparison."""
color = color.lower().strip()
# Expand 3-digit hex to 6-digit
if re.match(r'^#[0-9a-f]{3}$', color):
color = f"#{color[1]*2}{color[2]*2}{color[3]*2}"
return color
def _find_duplicates(self) -> List[Dict[str, Any]]:
"""Find values that appear multiple times."""
duplicates = []
for value, occurrences in self.values.items():
if len(occurrences) >= 2:
# Get unique files
files = list(set(o.file for o in occurrences))
duplicates.append({
'value': value,
'count': len(occurrences),
'files': files[:5], # Limit to 5 files
'category': occurrences[0].property,
'locations': [
{'file': o.file, 'line': o.line}
for o in occurrences[:5]
],
})
# Sort by count (most duplicated first)
duplicates.sort(key=lambda x: x['count'], reverse=True)
return duplicates[:50] # Return top 50
def _generate_token_candidates(self) -> List[TokenCandidate]:
"""Generate token suggestions for repeated values."""
candidates = []
# Color candidates
for value, occurrences in self.color_values.items():
if len(occurrences) >= 2:
suggested_name = self._suggest_color_name(value)
candidates.append(TokenCandidate(
value=value,
suggested_name=suggested_name,
category='colors',
occurrences=len(occurrences),
locations=[
Location(o.file, o.line) for o in occurrences[:5]
],
confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)),
))
# Spacing candidates
for value, occurrences in self.spacing_values.items():
if len(occurrences) >= 3: # Higher threshold for spacing
suggested_name = self._suggest_spacing_name(value)
candidates.append(TokenCandidate(
value=value,
suggested_name=suggested_name,
category='spacing',
occurrences=len(occurrences),
locations=[
Location(o.file, o.line) for o in occurrences[:5]
],
confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)),
))
# Sort by confidence
candidates.sort(key=lambda x: x.confidence, reverse=True)
return candidates[:30] # Return top 30
def _suggest_color_name(self, color: str) -> str:
"""Suggest a token name for a color value."""
# Common color mappings
common_colors = {
'#ffffff': 'color.white',
'#000000': 'color.black',
'#f3f4f6': 'color.neutral.100',
'#e5e7eb': 'color.neutral.200',
'#d1d5db': 'color.neutral.300',
'#9ca3af': 'color.neutral.400',
'#6b7280': 'color.neutral.500',
'#4b5563': 'color.neutral.600',
'#374151': 'color.neutral.700',
'#1f2937': 'color.neutral.800',
'#111827': 'color.neutral.900',
}
if color in common_colors:
return common_colors[color]
# Detect color family by hue (simplified)
if color.startswith('#'):
return f"color.custom.{color[1:7]}"
return f"color.custom.value"
def _suggest_spacing_name(self, value: str) -> str:
"""Suggest a token name for a spacing value."""
# Common spacing values
spacing_map = {
'0px': 'spacing.0',
'4px': 'spacing.xs',
'8px': 'spacing.sm',
'12px': 'spacing.md',
'16px': 'spacing.lg',
'20px': 'spacing.lg',
'24px': 'spacing.xl',
'32px': 'spacing.2xl',
'48px': 'spacing.3xl',
'64px': 'spacing.4xl',
'0.25rem': 'spacing.xs',
'0.5rem': 'spacing.sm',
'0.75rem': 'spacing.md',
'1rem': 'spacing.lg',
'1.5rem': 'spacing.xl',
'2rem': 'spacing.2xl',
}
if value in spacing_map:
return spacing_map[value]
return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}"
async def find_unused_styles(self) -> List[Dict[str, Any]]:
"""
Find CSS classes/selectors that are not used in the codebase.
Returns list of potentially unused styles.
"""
# Collect all CSS class definitions
css_classes = set()
class_locations = {}
skip_dirs = {'node_modules', '.git', 'dist', 'build'}
for pattern in ['**/*.css', '**/*.scss']:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
rel_path = str(file_path.relative_to(self.root))
# Find class definitions
for match in re.finditer(r'\.([a-zA-Z_][\w-]*)\s*[{,]', content):
class_name = match.group(1)
css_classes.add(class_name)
class_locations[class_name] = rel_path
except Exception:
continue
# Collect all class usage in JS/JSX/TS/TSX
used_classes = set()
for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
# Find className usage
for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content):
classes = match.group(1).split()
used_classes.update(classes)
# Find styles.xxx usage (CSS modules)
for match in re.finditer(r'styles\.(\w+)', content):
used_classes.add(match.group(1))
except Exception:
continue
# Find unused
unused = css_classes - used_classes
return [
{
'class': cls,
'file': class_locations.get(cls, 'unknown'),
}
for cls in sorted(unused)
][:50] # Limit results
async def analyze_naming_consistency(self) -> Dict[str, Any]:
"""
Analyze naming consistency across style files.
Returns analysis of naming patterns and inconsistencies.
"""
patterns = {
'kebab-case': [], # my-class-name
'camelCase': [], # myClassName
'snake_case': [], # my_class_name
'BEM': [], # block__element--modifier
}
skip_dirs = {'node_modules', '.git', 'dist', 'build'}
for pattern in ['**/*.css', '**/*.scss']:
for file_path in self.root.rglob(pattern):
if any(skip in file_path.parts for skip in skip_dirs):
continue
try:
content = file_path.read_text(encoding='utf-8', errors='ignore')
rel_path = str(file_path.relative_to(self.root))
# Find class names
for match in re.finditer(r'\.([a-zA-Z_][\w-]*)', content):
name = match.group(1)
line = content[:match.start()].count('\n') + 1
# Classify naming pattern
if '__' in name or '--' in name:
patterns['BEM'].append({'name': name, 'file': rel_path, 'line': line})
elif '_' in name:
patterns['snake_case'].append({'name': name, 'file': rel_path, 'line': line})
elif '-' in name:
patterns['kebab-case'].append({'name': name, 'file': rel_path, 'line': line})
elif name != name.lower():
patterns['camelCase'].append({'name': name, 'file': rel_path, 'line': line})
except Exception:
continue
# Calculate primary pattern
pattern_counts = {k: len(v) for k, v in patterns.items()}
primary = max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None
# Find inconsistencies (patterns different from primary)
inconsistencies = []
if primary:
for pattern_type, items in patterns.items():
if pattern_type != primary and items:
inconsistencies.extend(items[:10])
return {
'pattern_counts': pattern_counts,
'primary_pattern': primary,
'inconsistencies': inconsistencies[:20],
}