dss/apps/api/metrics.py

"""
DSS Metrics API Module.

Handles metrics collection from CI pipelines and provides dashboard data
for UI designers to view portfolio-wide design system adoption.

Enterprise Architecture:
- Tier 1 (Dashboard): Read-only aggregated metrics for UI designers
- Receives uploads from Tier 2 (CI/CD pipelines)
- No write operations from dashboard - only CI uploads
"""

import json
import os
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional

from fastapi import APIRouter, Header, HTTPException, Query
from pydantic import BaseModel

# Router for metrics endpoints
router = APIRouter(prefix="/api/metrics", tags=["metrics"])

# Database path
DB_PATH = Path(os.getenv("DSS_DB_PATH", Path.home() / ".dss" / "metrics.db"))


# === Pydantic Models ===


class ViolationLocation(BaseModel):
    """Location of a rule violation in source code."""

    rule: str
    line: int
    column: Optional[int] = None
    file: Optional[str] = None


class FileMetrics(BaseModel):
    """Metrics for a single file."""

    file: str
    errors: int
    warnings: int
    violations: List[ViolationLocation] = []


class MetricsUpload(BaseModel):
    """Metrics payload uploaded from CI."""

    project: str
    branch: str
    commit: str
    timestamp: Optional[str] = None
    metrics: Dict[str, Any]
    fileResults: Optional[List[FileMetrics]] = []


class ProjectMetricsSummary(BaseModel):
    """Summary metrics for a project."""

    project: str
    total_files: int
    passed_files: int
    failed_files: int
    total_errors: int
    total_warnings: int
    rules_version: str
    last_updated: str
    adoption_score: float


class PortfolioMetrics(BaseModel):
    """Portfolio-wide metrics aggregation."""

    total_projects: int
    projects_passing: int
    projects_failing: int
    total_errors: int
    total_warnings: int
    average_adoption_score: float
    projects: List[ProjectMetricsSummary]


# === Database Setup ===


def init_db():
    """Initialize the metrics database."""
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)

    conn = sqlite3.connect(str(DB_PATH))
    cursor = conn.cursor()

    # Metrics uploads table
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS metrics_uploads (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            project TEXT NOT NULL,
            branch TEXT NOT NULL,
            commit_sha TEXT NOT NULL,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
            total_files INTEGER DEFAULT 0,
            passed_files INTEGER DEFAULT 0,
            failed_files INTEGER DEFAULT 0,
            total_errors INTEGER DEFAULT 0,
            total_warnings INTEGER DEFAULT 0,
            rules_version TEXT,
            raw_data JSON
        )
    """
    )

    # Violations table for detailed tracking
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS violations (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            upload_id INTEGER NOT NULL,
            project TEXT NOT NULL,
            file_path TEXT NOT NULL,
            rule TEXT NOT NULL,
            line INTEGER,
            column_num INTEGER,
            severity TEXT,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (upload_id) REFERENCES metrics_uploads(id)
        )
    """
    )

    # Component usage tracking for UI designers
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS component_usage (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            project TEXT NOT NULL,
            component_name TEXT NOT NULL,
            file_path TEXT NOT NULL,
            line INTEGER,
            import_source TEXT,
            is_ds_component BOOLEAN DEFAULT 0,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
        )
    """
    )

    # Indexes for performance
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_uploads_project ON metrics_uploads(project)"
    )
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_uploads_timestamp ON metrics_uploads(timestamp)"
    )
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_violations_project ON violations(project)"
    )
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_component_project ON component_usage(project)"
    )

    conn.commit()
    conn.close()


# Initialize on module load
init_db()


# === Helper Functions ===


def get_db():
    """Get database connection."""
    return sqlite3.connect(str(DB_PATH))


def calculate_adoption_score(passed: int, total: int, errors: int) -> float:
    """Calculate adoption score (0-100)."""
    if total == 0:
        return 100.0
    base_score = (passed / total) * 100
    # Penalty for errors
    penalty = min(errors * 2, 50)
    return max(0, base_score - penalty)


# === API Endpoints ===


@router.post("/upload")
async def upload_metrics(
    payload: MetricsUpload,
    authorization: Optional[str] = Header(None),
):
    """
    Upload metrics from CI pipeline.

    This is the only write endpoint - called by CI after validation runs.
    Authentication via DSS_API_TOKEN in Authorization header.
    """
    # Validate token (if configured)
    expected_token = os.getenv("DSS_API_TOKEN")
    if expected_token:
        if not authorization:
            raise HTTPException(status_code=401, detail="Authorization required")
        token = authorization.replace("Bearer ", "")
        if token != expected_token:
            raise HTTPException(status_code=403, detail="Invalid token")

    conn = get_db()
    cursor = conn.cursor()

    try:
        # Extract metrics
        metrics = payload.metrics
        total_files = metrics.get("totalFiles", 0)
        passed_files = metrics.get("passedFiles", 0)
        failed_files = metrics.get("failedFiles", 0)
        total_errors = metrics.get("totalErrors", 0)
        total_warnings = metrics.get("totalWarnings", 0)
        rules_version = metrics.get("rulesVersion", "unknown")

        # Insert main metrics record
        cursor.execute(
            """
            INSERT INTO metrics_uploads
            (project, branch, commit_sha, total_files, passed_files, failed_files,
             total_errors, total_warnings, rules_version, raw_data)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """,
            (
                payload.project,
                payload.branch,
                payload.commit,
                total_files,
                passed_files,
                failed_files,
                total_errors,
                total_warnings,
                rules_version,
                json.dumps(payload.model_dump()),
            ),
        )

        upload_id = cursor.lastrowid

        # Insert violations with file locations
        if payload.fileResults:
            for file_result in payload.fileResults:
                for violation in file_result.violations:
                    cursor.execute(
                        """
                        INSERT INTO violations
                        (upload_id, project, file_path, rule, line, column_num, severity)
                        VALUES (?, ?, ?, ?, ?, ?, ?)
                    """,
                        (
                            upload_id,
                            payload.project,
                            file_result.file,
                            violation.rule,
                            violation.line,
                            violation.column,
                            "error" if "error" in violation.rule.lower() else "warning",
                        ),
                    )

        conn.commit()

        return {
            "status": "success",
            "upload_id": upload_id,
            "project": payload.project,
            "metrics": {
                "files": total_files,
                "errors": total_errors,
                "warnings": total_warnings,
            },
        }

    except Exception as e:
        conn.rollback()
        raise HTTPException(status_code=500, detail=f"Failed to store metrics: {str(e)}")
    finally:
        conn.close()


@router.get("/portfolio")
async def get_portfolio_metrics(
    days: int = Query(default=30, description="Number of days to include"),
):
    """
    Get portfolio-wide metrics aggregation.

    Returns summary for all projects - designed for UI designer dashboard.
    """
    conn = get_db()
    cursor = conn.cursor()

    try:
        # Get latest metrics for each project
        cursor.execute(
            """
            SELECT
                project,
                total_files,
                passed_files,
                failed_files,
                total_errors,
                total_warnings,
                rules_version,
                MAX(timestamp) as last_updated
            FROM metrics_uploads
            WHERE timestamp > datetime('now', ?)
            GROUP BY project
            ORDER BY last_updated DESC
        """,
            (f"-{days} days",),
        )

        rows = cursor.fetchall()

        projects = []
        total_errors = 0
        total_warnings = 0
        projects_passing = 0

        for row in rows:
            (
                project,
                total_files,
                passed_files,
                failed_files,
                errors,
                warnings,
                rules_version,
                last_updated,
            ) = row

            adoption_score = calculate_adoption_score(passed_files, total_files, errors)

            projects.append(
                ProjectMetricsSummary(
                    project=project,
                    total_files=total_files,
                    passed_files=passed_files,
                    failed_files=failed_files,
                    total_errors=errors,
                    total_warnings=warnings,
                    rules_version=rules_version or "unknown",
                    last_updated=last_updated,
                    adoption_score=adoption_score,
                )
            )

            total_errors += errors
            total_warnings += warnings
            if errors == 0:
                projects_passing += 1

        avg_score = (
            sum(p.adoption_score for p in projects) / len(projects) if projects else 0
        )

        return PortfolioMetrics(
            total_projects=len(projects),
            projects_passing=projects_passing,
            projects_failing=len(projects) - projects_passing,
            total_errors=total_errors,
            total_warnings=total_warnings,
            average_adoption_score=round(avg_score, 1),
            projects=projects,
        )

    finally:
        conn.close()


@router.get("/projects/{project_name}")
async def get_project_metrics(
    project_name: str,
    limit: int = Query(default=10, description="Number of recent builds"),
):
    """
    Get detailed metrics for a specific project.

    Includes historical data and violation breakdown.
    """
    conn = get_db()
    cursor = conn.cursor()

    try:
        # Get recent builds
        cursor.execute(
            """
            SELECT
                id, branch, commit_sha, timestamp,
                total_files, passed_files, failed_files,
                total_errors, total_warnings, rules_version
            FROM metrics_uploads
            WHERE project = ?
            ORDER BY timestamp DESC
            LIMIT ?
        """,
            (project_name, limit),
        )

        builds = cursor.fetchall()

        if not builds:
            raise HTTPException(status_code=404, detail="Project not found")

        # Get violation breakdown for latest build
        latest_id = builds[0][0]
        cursor.execute(
            """
            SELECT rule, COUNT(*) as count
            FROM violations
            WHERE upload_id = ?
            GROUP BY rule
            ORDER BY count DESC
        """,
            (latest_id,),
        )

        violations_by_rule = dict(cursor.fetchall())

        # Get file locations for violations (for UI designer "where is this used?")
        cursor.execute(
            """
            SELECT file_path, rule, line, column_num
            FROM violations
            WHERE upload_id = ?
            ORDER BY file_path, line
        """,
            (latest_id,),
        )

        violation_locations = [
            {
                "file": row[0],
                "rule": row[1],
                "line": row[2],
                "column": row[3],
            }
            for row in cursor.fetchall()
        ]

        return {
            "project": project_name,
            "latest": {
                "branch": builds[0][1],
                "commit": builds[0][2],
                "timestamp": builds[0][3],
                "total_files": builds[0][4],
                "passed_files": builds[0][5],
                "failed_files": builds[0][6],
                "total_errors": builds[0][7],
                "total_warnings": builds[0][8],
                "rules_version": builds[0][9],
                "adoption_score": calculate_adoption_score(
                    builds[0][5], builds[0][4], builds[0][7]
                ),
            },
            "violations_by_rule": violations_by_rule,
            "violation_locations": violation_locations,
            "history": [
                {
                    "branch": b[1],
                    "commit": b[2],
                    "timestamp": b[3],
                    "errors": b[7],
                    "warnings": b[8],
                }
                for b in builds
            ],
        }

    finally:
        conn.close()


@router.get("/projects/{project_name}/violations")
async def get_project_violations(
    project_name: str,
    rule: Optional[str] = Query(default=None, description="Filter by rule"),
    file_pattern: Optional[str] = Query(default=None, description="Filter by file pattern"),
):
    """
    Get detailed violation locations for a project.

    Designed for UI designers to answer "Where is Button component used?"
    """
    conn = get_db()
    cursor = conn.cursor()

    try:
        # Get latest upload for project
        cursor.execute(
            """
            SELECT id FROM metrics_uploads
            WHERE project = ?
            ORDER BY timestamp DESC
            LIMIT 1
        """,
            (project_name,),
        )

        row = cursor.fetchone()
        if not row:
            raise HTTPException(status_code=404, detail="Project not found")

        upload_id = row[0]

        # Build query with optional filters
        query = """
            SELECT file_path, rule, line, column_num, severity
            FROM violations
            WHERE upload_id = ?
        """
        params = [upload_id]

        if rule:
            query += " AND rule LIKE ?"
            params.append(f"%{rule}%")

        if file_pattern:
            query += " AND file_path LIKE ?"
            params.append(f"%{file_pattern}%")

        query += " ORDER BY file_path, line"

        cursor.execute(query, params)

        return {
            "project": project_name,
            "violations": [
                {
                    "file": row[0],
                    "rule": row[1],
                    "line": row[2],
                    "column": row[3],
                    "severity": row[4],
                }
                for row in cursor.fetchall()
            ],
        }

    finally:
        conn.close()


@router.get("/trends")
async def get_trends(
    project: Optional[str] = Query(default=None, description="Filter by project"),
    days: int = Query(default=30, description="Number of days"),
):
    """
    Get trend data for charts.

    Shows error/warning counts over time for portfolio or specific project.
    """
    conn = get_db()
    cursor = conn.cursor()

    try:
        if project:
            cursor.execute(
                """
                SELECT
                    DATE(timestamp) as date,
                    SUM(total_errors) as errors,
                    SUM(total_warnings) as warnings,
                    AVG(passed_files * 100.0 / NULLIF(total_files, 0)) as pass_rate
                FROM metrics_uploads
                WHERE project = ? AND timestamp > datetime('now', ?)
                GROUP BY DATE(timestamp)
                ORDER BY date
            """,
                (project, f"-{days} days"),
            )
        else:
            cursor.execute(
                """
                SELECT
                    DATE(timestamp) as date,
                    SUM(total_errors) as errors,
                    SUM(total_warnings) as warnings,
                    AVG(passed_files * 100.0 / NULLIF(total_files, 0)) as pass_rate
                FROM metrics_uploads
                WHERE timestamp > datetime('now', ?)
                GROUP BY DATE(timestamp)
                ORDER BY date
            """,
                (f"-{days} days",),
            )

        return {
            "project": project or "portfolio",
            "days": days,
            "data": [
                {
                    "date": row[0],
                    "errors": row[1] or 0,
                    "warnings": row[2] or 0,
                    "pass_rate": round(row[3] or 0, 1),
                }
                for row in cursor.fetchall()
            ],
        }

    finally:
        conn.close()


@router.get("/rules/usage")
async def get_rules_usage(
    days: int = Query(default=30, description="Number of days"),
):
    """
    Get rule violation statistics across all projects.

    Shows which rules are violated most often - useful for identifying
    common patterns and potential training needs.
    """
    conn = get_db()
    cursor = conn.cursor()

    try:
        cursor.execute(
            """
            SELECT
                rule,
                COUNT(*) as total_violations,
                COUNT(DISTINCT project) as affected_projects
            FROM violations v
            JOIN metrics_uploads m ON v.upload_id = m.id
            WHERE m.timestamp > datetime('now', ?)
            GROUP BY rule
            ORDER BY total_violations DESC
        """,
            (f"-{days} days",),
        )

        return {
            "days": days,
            "rules": [
                {
                    "rule": row[0],
                    "total_violations": row[1],
                    "affected_projects": row[2],
                }
                for row in cursor.fetchall()
            ],
        }

    finally:
        conn.close()