dss/dss-claude-plugin/strategies/local/browser.py

"""
Local Browser Strategy implementation using Playwright.

Provides direct, local control over a browser for tasks like DOM inspection,
screenshotting, and running audits. This is the LOCAL mode counterpart to
RemoteBrowserStrategy which uses Shadow State pattern.
"""

import asyncio
import json
import logging
import os
import tempfile
from typing import Any, Dict, List, Optional, Type

from ..base import BrowserStrategy

# Logger setup
logger = logging.getLogger(__name__)

# URL for axe-core accessibility testing library
AXE_CORE_SCRIPT_URL = "https://cdnjs.cloudflare.com/ajax/libs/axe-core/4.8.4/axe.min.js"

# Optional Playwright import for graceful degradation
try:
    from playwright.async_api import (
        Browser,
        ConsoleMessage,
        Error as PlaywrightError,
        Page,
        Playwright,
        TimeoutError as PlaywrightTimeoutError,
        async_playwright,
    )

    PLAYWRIGHT_AVAILABLE = True
except ImportError:
    PLAYWRIGHT_AVAILABLE = False
    # Create dummy types for type hinting when Playwright is not installed
    Playwright = Type[Any]
    Browser = Type[Any]
    Page = Type[Any]
    ConsoleMessage = Type[Any]
    PlaywrightError = Exception
    PlaywrightTimeoutError = Exception


class LocalBrowserStrategy(BrowserStrategy):
    """
    Implements the BrowserStrategy using Playwright for local browser automation.

    This strategy manages a singleton browser instance to perform actions
    directly on the local machine. It is ideal for development environments
    where direct access to a browser is possible.

    Features:
    - Browser pool pattern (reuses browser instances)
    - Console log capture via CDP
    - Screenshot capture (element or full page)
    - DOM snapshot retrieval
    - Accessibility auditing via axe-core injection
    - Core Web Vitals and performance metrics

    Note: This class requires Playwright to be installed.
    Run `pip install "playwright"` and `playwright install chromium`.
    """

    # Class-level browser pool (shared across instances)
    _playwright: Optional[Playwright] = None
    _browser: Optional[Browser] = None
    _browser_lock = asyncio.Lock()

    def __init__(self, context: Any):
        """
        Initialize the LocalBrowserStrategy.

        Args:
            context: The DSSContext providing configuration and session info.
        """
        self.context = context
        self.page: Optional[Page] = None
        self._console_logs: List[Any] = []
        self._page_errors: List[Any] = []

        if not PLAYWRIGHT_AVAILABLE:
            logger.warning(
                "Playwright not found. LocalBrowserStrategy will be non-functional. "
                "Please run 'pip install \"playwright\"' and 'playwright install chromium'."
            )

    def _check_playwright(self) -> None:
        """Ensure Playwright is available, raising an error if not."""
        if not PLAYWRIGHT_AVAILABLE:
            raise NotImplementedError(
                "Playwright is not installed. Cannot use LocalBrowserStrategy. "
                "Install with: pip install playwright && playwright install chromium"
            )

    async def launch(self, headless: bool = True) -> None:
        """
        Launch and initialize the Playwright browser instance.

        This method is idempotent and ensures that a single browser instance
        is shared across the application lifecycle (browser pool pattern).

        Args:
            headless: Whether to run browser in headless mode (default: True)
        """
        self._check_playwright()

        # Check if browser is already running
        if LocalBrowserStrategy._browser and LocalBrowserStrategy._browser.is_connected():
            logger.debug("Browser already running, reusing existing instance.")
            return

        async with LocalBrowserStrategy._browser_lock:
            # Double-check lock to prevent race conditions
            if LocalBrowserStrategy._browser and LocalBrowserStrategy._browser.is_connected():
                return

            logger.info("Starting Playwright...")
            LocalBrowserStrategy._playwright = await async_playwright().start()

            logger.info("Launching new browser instance...")
            LocalBrowserStrategy._browser = await LocalBrowserStrategy._playwright.chromium.launch(
                headless=headless
            )
            logger.info("Browser instance launched successfully.")

    async def navigate(self, url: str, wait_until: str = "domcontentloaded") -> None:
        """
        Navigate the browser to a specific URL.

        This creates a new page context for the session, replacing any
        existing page. It also sets up listeners for console logs and errors.

        Args:
            url: The URL to navigate to.
            wait_until: The navigation event to wait for
                       ('load', 'domcontentloaded', 'networkidle').
        """
        await self.launch()

        # Close existing page if any
        if self.page and not self.page.is_closed():
            await self.page.close()

        if not LocalBrowserStrategy._browser:
            raise RuntimeError("Browser is not launched. Call launch() first.")

        # Create new page
        self.page = await LocalBrowserStrategy._browser.new_page()
        self._console_logs.clear()
        self._page_errors.clear()

        # Set up event listeners for log capture
        self.page.on("console", self._on_console_message)
        self.page.on("pageerror", self._on_page_error)

        logger.info(f"Navigating to {url}...")
        await self.page.goto(url, wait_until=wait_until)
        logger.info(f"Navigation to {url} complete.")

    def _on_console_message(self, msg: Any) -> None:
        """Handle console message events."""
        self._console_logs.append(msg)

    def _on_page_error(self, error: Any) -> None:
        """Handle page error events."""
        self._page_errors.append(error)

    async def get_console_logs(
        self,
        session_id: Optional[str] = None,
        limit: int = 100,
        level: Optional[str] = None,
    ) -> List[Dict[str, Any]]:
        """
        Retrieve captured console logs from the current page.

        Args:
            session_id: Ignored in LOCAL mode (used for API compatibility).
            limit: Maximum number of logs to return.
            level: Filter by log level ('log', 'warn', 'error', 'info', 'debug').

        Returns:
            List of log entries with level, text, and location.
        """
        if not self.page:
            logger.warning("No active page. Returning empty logs.")
            return []

        logs = []
        for msg in self._console_logs:
            try:
                log_entry = {
                    "level": msg.type,
                    "message": msg.text,
                    "timestamp": None,  # Playwright doesn't provide timestamp directly
                    "category": "console",
                    "data": {
                        "location": msg.location if hasattr(msg, 'location') else None,
                    }
                }
                logs.append(log_entry)
            except Exception as e:
                logger.debug(f"Error processing console message: {e}")

        if level:
            logs = [log for log in logs if log["level"] == level]

        # Return most recent logs up to limit
        return logs[-limit:]

    async def capture_screenshot(
        self, selector: Optional[str] = None, full_page: bool = False
    ) -> str:
        """
        Capture a screenshot of the current page or a specific element.

        Args:
            selector: CSS selector to capture a specific element.
                     If None, captures the viewport.
            full_page: If True, captures the full scrollable page content.
                      Ignored if selector is provided.

        Returns:
            Path to the saved screenshot file.

        Raises:
            RuntimeError: If no active page is available.
        """
        if not self.page or self.page.is_closed():
            raise RuntimeError("No active page to capture screenshot from.")

        # Generate unique filename
        session_id = getattr(self.context, 'session_id', 'local')
        path = os.path.join(
            tempfile.gettempdir(), f"dss_screenshot_{session_id}.png"
        )

        try:
            if selector:
                element = self.page.locator(selector)
                await element.screenshot(path=path, timeout=10000)
                logger.info(f"Element screenshot saved to {path}")
            else:
                await self.page.screenshot(path=path, full_page=full_page, timeout=10000)
                logger.info(f"Page screenshot saved to {path}")
            return path
        except Exception as e:
            logger.error(f"Failed to capture screenshot: {e}")
            raise

    async def get_dom_snapshot(self) -> str:
        """
        Get the current DOM state as an HTML string.

        Returns:
            String containing the outer HTML of the document.
        """
        if not self.page or self.page.is_closed():
            return "<!-- No active page to get DOM snapshot from. -->"
        return await self.page.content()

    async def get_errors(
        self, severity: Optional[str] = None, limit: int = 50
    ) -> List[Dict[str, Any]]:
        """
        Retrieve captured page errors (e.g., uncaught exceptions).

        Args:
            severity: Filter by severity (not yet implemented).
            limit: Maximum number of errors to return.

        Returns:
            List of error details with name, message, and stack trace.
        """
        errors = []
        for err in self._page_errors:
            try:
                error_entry = {
                    "level": "error",
                    "category": "uncaughtError",
                    "message": str(err),
                    "data": {
                        "name": getattr(err, 'name', 'Error'),
                        "stack": getattr(err, 'stack', None),
                    }
                }
                errors.append(error_entry)
            except Exception as e:
                logger.debug(f"Error processing page error: {e}")

        return errors[-limit:]

    async def run_accessibility_audit(
        self, selector: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Run an accessibility audit on the current page using axe-core.

        This injects the axe-core library into the page and runs a full
        accessibility scan.

        Args:
            selector: A CSS selector to limit the audit to a specific element.
                      If None, audits the entire page.

        Returns:
            A dictionary containing the axe-core audit results with:
            - violations: List of accessibility violations
            - passes: List of passing rules
            - incomplete: List of rules that need review
            - inapplicable: List of rules that don't apply

        Raises:
            RuntimeError: If no active page is available.
        """
        if not self.page or self.page.is_closed():
            raise RuntimeError("No active page to run accessibility audit on.")

        logger.info("Injecting axe-core library...")
        await self.page.add_script_tag(url=AXE_CORE_SCRIPT_URL)

        # Wait for axe to be available
        await self.page.wait_for_function("typeof axe !== 'undefined'", timeout=5000)

        logger.info(f"Running accessibility audit{' on ' + selector if selector else ''}...")

        # Run axe with selector context if provided
        if selector:
            result = await self.page.evaluate(
                "(selector) => axe.run(selector)", selector
            )
        else:
            result = await self.page.evaluate("() => axe.run()")

        violations_count = len(result.get('violations', []))
        logger.info(f"Accessibility audit complete. Found {violations_count} violations.")

        return result

    async def get_performance_metrics(self) -> Dict[str, Any]:
        """
        Get performance metrics, including Navigation Timing and Core Web Vitals.

        Returns:
            Dictionary containing:
            - navigation_timing: Raw Navigation Timing API data
            - core_web_vitals: FCP, LCP, and CLS metrics

        Raises:
            RuntimeError: If no active page is available.
        """
        if not self.page or self.page.is_closed():
            raise RuntimeError("No active page to get performance metrics from.")

        # 1. Get Navigation Timing API metrics
        timing_raw = await self.page.evaluate(
            "() => JSON.stringify(window.performance.timing)"
        )
        nav_timing = json.loads(timing_raw)

        # 2. Get Core Web Vitals via PerformanceObserver
        # This script collects buffered entries and waits briefly for new ones
        metrics_script = """
        () => new Promise((resolve) => {
            const metrics = { lcp: null, cls: 0, fcp: null, ttfb: null };

            // Get TTFB from navigation timing
            const navEntry = performance.getEntriesByType('navigation')[0];
            if (navEntry) {
                metrics.ttfb = navEntry.responseStart - navEntry.requestStart;
            }

            // Get FCP from paint entries
            const paintEntries = performance.getEntriesByType('paint');
            for (const entry of paintEntries) {
                if (entry.name === 'first-contentful-paint') {
                    metrics.fcp = entry.startTime;
                }
            }

            // Set up observer for LCP and CLS
            try {
                const observer = new PerformanceObserver((list) => {
                    for (const entry of list.getEntries()) {
                        if (entry.entryType === 'largest-contentful-paint') {
                            metrics.lcp = entry.startTime;
                        }
                        if (entry.entryType === 'layout-shift' && !entry.hadRecentInput) {
                            metrics.cls += entry.value;
                        }
                    }
                });

                observer.observe({
                    type: 'largest-contentful-paint',
                    buffered: true
                });
                observer.observe({
                    type: 'layout-shift',
                    buffered: true
                });

                // Give some time for metrics to be collected
                setTimeout(() => {
                    observer.disconnect();
                    resolve(metrics);
                }, 500);
            } catch (e) {
                // PerformanceObserver may not be fully supported
                resolve(metrics);
            }
        })
        """
        core_web_vitals = await self.page.evaluate(metrics_script)

        return {
            "navigation_timing": nav_timing,
            "core_web_vitals": core_web_vitals
        }

    async def close(self) -> None:
        """
        Close the current page. Browser instance is kept in pool for reuse.

        To fully close the browser, use close_browser() class method.
        """
        if self.page and not self.page.is_closed():
            await self.page.close()
            self.page = None
            self._console_logs.clear()
            self._page_errors.clear()
            logger.info("Page closed.")

    @classmethod
    async def close_browser(cls) -> None:
        """
        Close the browser and stop the Playwright instance.

        This is a class method that closes the shared browser pool.
        Should be called during application shutdown.
        """
        async with cls._browser_lock:
            if cls._browser:
                await cls._browser.close()
                cls._browser = None
                logger.info("Browser instance closed.")

            if cls._playwright:
                await cls._playwright.stop()
                cls._playwright = None
                logger.info("Playwright stopped.")