""" Style Pattern Analyzer Detects and analyzes style patterns in code to identify: - Hardcoded values that should be tokens - Duplicate values across files - Inconsistent naming patterns - Unused styles """ import re from pathlib import Path from typing import List, Dict, Any, Optional, Set, Tuple from collections import defaultdict from dataclasses import dataclass, field from .base import ( Location, TokenCandidate, StylePattern, StylingApproach, ) # Color patterns HEX_COLOR = re.compile(r'#(?:[0-9a-fA-F]{3}){1,2}\b') RGB_COLOR = re.compile(r'rgba?\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+(?:\s*,\s*[\d.]+)?\s*\)') HSL_COLOR = re.compile(r'hsla?\s*\(\s*\d+\s*,\s*[\d.]+%\s*,\s*[\d.]+%(?:\s*,\s*[\d.]+)?\s*\)') OKLCH_COLOR = re.compile(r'oklch\s*\([^)]+\)') # Dimension patterns PX_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*px\b') REM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*rem\b') EM_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*em\b') PERCENT_VALUE = re.compile(r'\b(\d+(?:\.\d+)?)\s*%\b') # Font patterns FONT_SIZE = re.compile(r'font-size\s*:\s*([^;]+)') FONT_FAMILY = re.compile(r'font-family\s*:\s*([^;]+)') FONT_WEIGHT = re.compile(r'font-weight\s*:\s*(\d+|normal|bold|lighter|bolder)') LINE_HEIGHT = re.compile(r'line-height\s*:\s*([^;]+)') # Spacing patterns MARGIN_PADDING = re.compile(r'(?:margin|padding)(?:-(?:top|right|bottom|left))?\s*:\s*([^;]+)') GAP = re.compile(r'gap\s*:\s*([^;]+)') # Border patterns BORDER_RADIUS = re.compile(r'border-radius\s*:\s*([^;]+)') BORDER_WIDTH = re.compile(r'border(?:-(?:top|right|bottom|left))?-width\s*:\s*([^;]+)') # Shadow patterns BOX_SHADOW = re.compile(r'box-shadow\s*:\s*([^;]+)') # Z-index Z_INDEX = re.compile(r'z-index\s*:\s*(\d+)') @dataclass class ValueOccurrence: """Tracks where a value appears.""" value: str file: str line: int property: str # CSS property name context: str # Surrounding code class StyleAnalyzer: """ Analyzes style files and inline styles to find: - Hardcoded values that should be tokens - Duplicate values - Inconsistent patterns """ def __init__(self, root_path: str): self.root = Path(root_path).resolve() self.values: Dict[str, List[ValueOccurrence]] = defaultdict(list) self.color_values: Dict[str, List[ValueOccurrence]] = defaultdict(list) self.spacing_values: Dict[str, List[ValueOccurrence]] = defaultdict(list) self.font_values: Dict[str, List[ValueOccurrence]] = defaultdict(list) async def analyze( self, include_inline: bool = True, include_css: bool = True ) -> Dict[str, Any]: """ Analyze all styles in the project. Returns: Dict with analysis results including duplicates and candidates """ # Reset collectors self.values.clear() self.color_values.clear() self.spacing_values.clear() self.font_values.clear() # Scan CSS/SCSS files if include_css: await self._scan_style_files() # Scan inline styles in JS/TS files if include_inline: await self._scan_inline_styles() # Analyze results duplicates = self._find_duplicates() candidates = self._generate_token_candidates() return { 'total_values_found': sum(len(v) for v in self.values.values()), 'unique_colors': len(self.color_values), 'unique_spacing': len(self.spacing_values), 'duplicates': duplicates, 'token_candidates': candidates, } async def _scan_style_files(self) -> None: """Scan CSS and SCSS files for values.""" skip_dirs = {'node_modules', '.git', 'dist', 'build'} for pattern in ['**/*.css', '**/*.scss', '**/*.sass', '**/*.less']: for file_path in self.root.rglob(pattern): if any(skip in file_path.parts for skip in skip_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') rel_path = str(file_path.relative_to(self.root)) self._extract_values_from_css(content, rel_path) except Exception: continue async def _scan_inline_styles(self) -> None: """Scan JS/TS files for inline style values.""" skip_dirs = {'node_modules', '.git', 'dist', 'build'} for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']: for file_path in self.root.rglob(pattern): if any(skip in file_path.parts for skip in skip_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') rel_path = str(file_path.relative_to(self.root)) self._extract_values_from_jsx(content, rel_path) except Exception: continue def _extract_values_from_css(self, content: str, file_path: str) -> None: """Extract style values from CSS content.""" lines = content.split('\n') for line_num, line in enumerate(lines, 1): # Skip comments and empty lines if not line.strip() or line.strip().startswith('//') or line.strip().startswith('/*'): continue # Extract colors for pattern in [HEX_COLOR, RGB_COLOR, HSL_COLOR, OKLCH_COLOR]: for match in pattern.finditer(line): value = match.group(0).lower() self._record_color(value, file_path, line_num, line.strip()) # Extract dimensions for match in PX_VALUE.finditer(line): value = f"{match.group(1)}px" self._record_spacing(value, file_path, line_num, line.strip()) for match in REM_VALUE.finditer(line): value = f"{match.group(1)}rem" self._record_spacing(value, file_path, line_num, line.strip()) # Extract font properties for match in FONT_SIZE.finditer(line): value = match.group(1).strip() self._record_font(value, file_path, line_num, 'font-size', line.strip()) for match in FONT_WEIGHT.finditer(line): value = match.group(1).strip() self._record_font(value, file_path, line_num, 'font-weight', line.strip()) # Extract z-index for match in Z_INDEX.finditer(line): value = match.group(1) self._record_value(f"z-{value}", file_path, line_num, 'z-index', line.strip()) def _extract_values_from_jsx(self, content: str, file_path: str) -> None: """Extract style values from JSX inline styles.""" # Find style={{ ... }} blocks style_pattern = re.compile(r'style\s*=\s*\{\s*\{([^}]+)\}\s*\}', re.DOTALL) for match in style_pattern.finditer(content): style_content = match.group(1) line_num = content[:match.start()].count('\n') + 1 # Parse the style object # Look for property: value patterns prop_pattern = re.compile(r'(\w+)\s*:\s*["\']?([^,\n"\']+)["\']?') for prop_match in prop_pattern.finditer(style_content): prop_name = prop_match.group(1) prop_value = prop_match.group(2).strip() # Check for colors if any(c in prop_name.lower() for c in ['color', 'background']): if HEX_COLOR.search(prop_value) or RGB_COLOR.search(prop_value): self._record_color(prop_value.lower(), file_path, line_num, style_content[:100]) # Check for dimensions if PX_VALUE.search(prop_value): self._record_spacing(prop_value, file_path, line_num, style_content[:100]) if 'fontSize' in prop_name or 'fontWeight' in prop_name: self._record_font(prop_value, file_path, line_num, prop_name, style_content[:100]) def _record_color(self, value: str, file: str, line: int, context: str) -> None: """Record a color value occurrence.""" normalized = self._normalize_color(value) self.color_values[normalized].append(ValueOccurrence( value=value, file=file, line=line, property='color', context=context, )) self.values[normalized].append(ValueOccurrence( value=value, file=file, line=line, property='color', context=context, )) def _record_spacing(self, value: str, file: str, line: int, context: str) -> None: """Record a spacing/dimension value occurrence.""" self.spacing_values[value].append(ValueOccurrence( value=value, file=file, line=line, property='spacing', context=context, )) self.values[value].append(ValueOccurrence( value=value, file=file, line=line, property='spacing', context=context, )) def _record_font(self, value: str, file: str, line: int, prop: str, context: str) -> None: """Record a font-related value occurrence.""" self.font_values[value].append(ValueOccurrence( value=value, file=file, line=line, property=prop, context=context, )) self.values[value].append(ValueOccurrence( value=value, file=file, line=line, property=prop, context=context, )) def _record_value(self, value: str, file: str, line: int, prop: str, context: str) -> None: """Record a generic value occurrence.""" self.values[value].append(ValueOccurrence( value=value, file=file, line=line, property=prop, context=context, )) def _normalize_color(self, color: str) -> str: """Normalize color value for comparison.""" color = color.lower().strip() # Expand 3-digit hex to 6-digit if re.match(r'^#[0-9a-f]{3}$', color): color = f"#{color[1]*2}{color[2]*2}{color[3]*2}" return color def _find_duplicates(self) -> List[Dict[str, Any]]: """Find values that appear multiple times.""" duplicates = [] for value, occurrences in self.values.items(): if len(occurrences) >= 2: # Get unique files files = list(set(o.file for o in occurrences)) duplicates.append({ 'value': value, 'count': len(occurrences), 'files': files[:5], # Limit to 5 files 'category': occurrences[0].property, 'locations': [ {'file': o.file, 'line': o.line} for o in occurrences[:5] ], }) # Sort by count (most duplicated first) duplicates.sort(key=lambda x: x['count'], reverse=True) return duplicates[:50] # Return top 50 def _generate_token_candidates(self) -> List[TokenCandidate]: """Generate token suggestions for repeated values.""" candidates = [] # Color candidates for value, occurrences in self.color_values.items(): if len(occurrences) >= 2: suggested_name = self._suggest_color_name(value) candidates.append(TokenCandidate( value=value, suggested_name=suggested_name, category='colors', occurrences=len(occurrences), locations=[ Location(o.file, o.line) for o in occurrences[:5] ], confidence=min(0.9, 0.3 + (len(occurrences) * 0.1)), )) # Spacing candidates for value, occurrences in self.spacing_values.items(): if len(occurrences) >= 3: # Higher threshold for spacing suggested_name = self._suggest_spacing_name(value) candidates.append(TokenCandidate( value=value, suggested_name=suggested_name, category='spacing', occurrences=len(occurrences), locations=[ Location(o.file, o.line) for o in occurrences[:5] ], confidence=min(0.8, 0.2 + (len(occurrences) * 0.05)), )) # Sort by confidence candidates.sort(key=lambda x: x.confidence, reverse=True) return candidates[:30] # Return top 30 def _suggest_color_name(self, color: str) -> str: """Suggest a token name for a color value.""" # Common color mappings common_colors = { '#ffffff': 'color.white', '#000000': 'color.black', '#f3f4f6': 'color.neutral.100', '#e5e7eb': 'color.neutral.200', '#d1d5db': 'color.neutral.300', '#9ca3af': 'color.neutral.400', '#6b7280': 'color.neutral.500', '#4b5563': 'color.neutral.600', '#374151': 'color.neutral.700', '#1f2937': 'color.neutral.800', '#111827': 'color.neutral.900', } if color in common_colors: return common_colors[color] # Detect color family by hue (simplified) if color.startswith('#'): return f"color.custom.{color[1:7]}" return f"color.custom.value" def _suggest_spacing_name(self, value: str) -> str: """Suggest a token name for a spacing value.""" # Common spacing values spacing_map = { '0px': 'spacing.0', '4px': 'spacing.xs', '8px': 'spacing.sm', '12px': 'spacing.md', '16px': 'spacing.lg', '20px': 'spacing.lg', '24px': 'spacing.xl', '32px': 'spacing.2xl', '48px': 'spacing.3xl', '64px': 'spacing.4xl', '0.25rem': 'spacing.xs', '0.5rem': 'spacing.sm', '0.75rem': 'spacing.md', '1rem': 'spacing.lg', '1.5rem': 'spacing.xl', '2rem': 'spacing.2xl', } if value in spacing_map: return spacing_map[value] return f"spacing.custom.{value.replace('px', '').replace('rem', 'r')}" async def find_unused_styles(self) -> List[Dict[str, Any]]: """ Find CSS classes/selectors that are not used in the codebase. Returns list of potentially unused styles. """ # Collect all CSS class definitions css_classes = set() class_locations = {} skip_dirs = {'node_modules', '.git', 'dist', 'build'} for pattern in ['**/*.css', '**/*.scss']: for file_path in self.root.rglob(pattern): if any(skip in file_path.parts for skip in skip_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') rel_path = str(file_path.relative_to(self.root)) # Find class definitions for match in re.finditer(r'\.([a-zA-Z_][\w-]*)\s*[{,]', content): class_name = match.group(1) css_classes.add(class_name) class_locations[class_name] = rel_path except Exception: continue # Collect all class usage in JS/JSX/TS/TSX used_classes = set() for pattern in ['**/*.jsx', '**/*.tsx', '**/*.js', '**/*.ts']: for file_path in self.root.rglob(pattern): if any(skip in file_path.parts for skip in skip_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') # Find className usage for match in re.finditer(r'className\s*=\s*["\']([^"\']+)["\']', content): classes = match.group(1).split() used_classes.update(classes) # Find styles.xxx usage (CSS modules) for match in re.finditer(r'styles\.(\w+)', content): used_classes.add(match.group(1)) except Exception: continue # Find unused unused = css_classes - used_classes return [ { 'class': cls, 'file': class_locations.get(cls, 'unknown'), } for cls in sorted(unused) ][:50] # Limit results async def analyze_naming_consistency(self) -> Dict[str, Any]: """ Analyze naming consistency across style files. Returns analysis of naming patterns and inconsistencies. """ patterns = { 'kebab-case': [], # my-class-name 'camelCase': [], # myClassName 'snake_case': [], # my_class_name 'BEM': [], # block__element--modifier } skip_dirs = {'node_modules', '.git', 'dist', 'build'} for pattern in ['**/*.css', '**/*.scss']: for file_path in self.root.rglob(pattern): if any(skip in file_path.parts for skip in skip_dirs): continue try: content = file_path.read_text(encoding='utf-8', errors='ignore') rel_path = str(file_path.relative_to(self.root)) # Find class names for match in re.finditer(r'\.([a-zA-Z_][\w-]*)', content): name = match.group(1) line = content[:match.start()].count('\n') + 1 # Classify naming pattern if '__' in name or '--' in name: patterns['BEM'].append({'name': name, 'file': rel_path, 'line': line}) elif '_' in name: patterns['snake_case'].append({'name': name, 'file': rel_path, 'line': line}) elif '-' in name: patterns['kebab-case'].append({'name': name, 'file': rel_path, 'line': line}) elif name != name.lower(): patterns['camelCase'].append({'name': name, 'file': rel_path, 'line': line}) except Exception: continue # Calculate primary pattern pattern_counts = {k: len(v) for k, v in patterns.items()} primary = max(pattern_counts, key=pattern_counts.get) if any(pattern_counts.values()) else None # Find inconsistencies (patterns different from primary) inconsistencies = [] if primary: for pattern_type, items in patterns.items(): if pattern_type != primary and items: inconsistencies.extend(items[:10]) return { 'pattern_counts': pattern_counts, 'primary_pattern': primary, 'inconsistencies': inconsistencies[:20], }