dss/dss-claude-plugin/strategies/remote/browser.py

"""
Remote Browser Strategy implementation.
Connects to the DSS API to retrieve browser state and logs via Shadow State pattern.
"""

import aiohttp
import asyncio
import logging
import base64
from typing import List, Dict, Any, Optional

from ..base import BrowserStrategy
from ...core.context import DSSContext

# Configure module logger
logger = logging.getLogger(__name__)


class RemoteBrowserStrategy(BrowserStrategy):
    """
    Implements browser interaction via remote API calls.
    Relies on the browser-side Logger to sync state to the server.
    """

    def __init__(self, context: DSSContext):
        """Initialize with context."""
        self.context = context

    async def _get_logs_from_api(self, session_id: Optional[str] = None) -> List[Dict[str, Any]]:
        """
        Fetch all logs for a specific session from the remote API.

        Args:
            session_id: The session ID to query. Uses default if None.

        Returns:
            List of log entries.
        """
        if session_id is None:
            session_id = self.context.session_id or "latest"

        base_url = self.context.get_api_url()

        # Ensure base_url doesn't have trailing slash for clean concatenation
        base_url = base_url.rstrip('/')
        url = f"{base_url}/api/browser-logs/{session_id}"

        try:
            timeout = aiohttp.ClientTimeout(total=10.0)
            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.get(url) as response:
                    if response.status == 404:
                        logger.warning(f"Session {session_id} not found on remote server.")
                        return []

                    if response.status != 200:
                        logger.error(f"Failed to fetch logs: {response.status} {response.reason}")
                        return []

                    data = await response.json()

                    # The API is expected to return the exportJSON() structure from browser-logger.js
                    # Structure: { sessionId: "...", logs: [...], diagnostic: {...} }
                    return data.get("logs", [])

        except aiohttp.ClientError as e:
            logger.error(f"Network error fetching browser logs: {str(e)}")
            return []
        except Exception as e:
            logger.error(f"Unexpected error in RemoteBrowserStrategy: {str(e)}")
            return []

    async def get_console_logs(
        self,
        session_id: Optional[str] = None,
        limit: int = 100,
        level: Optional[str] = None
    ) -> List[Dict[str, Any]]:
        """
        Get browser console logs from the remote API.

        Args:
            session_id: The session ID to retrieve logs for.
            limit: Maximum number of logs to return.
            level: Filter by log level (log, info, warn, error).
        """
        logs = await self._get_logs_from_api(session_id)

        # Filter by console category mostly, but also capture uncaught errors
        console_logs = [
            l for l in logs
            if l.get("category") in ["console", "uncaughtError", "unhandledRejection"]
        ]

        # Filter by level if requested
        if level:
            console_logs = [l for l in console_logs if l.get("level") == level]

        # Sort by timestamp descending (newest first)
        console_logs.sort(key=lambda x: x.get("timestamp", 0), reverse=True)

        return console_logs[:limit]

    async def capture_screenshot(
        self,
        selector: Optional[str] = None,
        full_page: bool = False
    ) -> str:
        """
        Capture a screenshot.

        In REMOTE mode, this requests the server to perform the capture or returns
        a placeholder URL if the server capability isn't available.

        Returns:
            URL to screenshot or placeholder message.
        """
        # Placeholder implementation until server-side rendering/CDP proxy is ready.
        # Ideally, we would POST to /api/commands/{session_id}/screenshot
        logger.warning("Remote screenshot capture is not yet fully implemented on server.")

        # Return placeholder URL
        return "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"

    async def get_dom_snapshot(self) -> str:
        """
        Get the current DOM snapshot via Shadow State.

        Retrieves the latest log entry with category='snapshot' containing
        the full HTML state captured by the browser.
        """
        logs = await self._get_logs_from_api()

        # Filter for snapshots
        snapshots = [
            l for l in logs
            if l.get("category") == "snapshot" and "snapshot" in l.get("data", {})
        ]

        if not snapshots:
            return "<!-- No Shadow State snapshot available for this session -->"

        # Get the latest one
        latest = max(snapshots, key=lambda x: x.get("timestamp", 0))

        # Extract HTML from the snapshot data object safely
        # Structure in browser-logger: entry.data.snapshot.html
        try:
            html = latest.get("data", {}).get("snapshot", {}).get("html", "")
            if html:
                return html
            return "<!-- Corrupted Shadow State snapshot data -->"
        except (KeyError, AttributeError):
            return "<!-- Corrupted or unexpected snapshot data format -->"

    async def get_errors(
        self,
        severity: Optional[str] = None,
        limit: int = 50
    ) -> List[Dict[str, Any]]:
        """
        Get error logs from the remote API.

        Args:
            severity: Filter by severity (not implemented yet).
            limit: Maximum number of errors to return.

        Returns:
            List of error entries.
        """
        logs = await self._get_logs_from_api()

        # Filter for errors
        errors = [l for l in logs if l.get("level") == "error"]

        # Sort newest first
        errors.sort(key=lambda x: x.get("timestamp", 0), reverse=True)

        return errors[:limit]

    async def run_accessibility_audit(
        self,
        selector: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Get accessibility audit results from Shadow State.

        In REMOTE mode, this retrieves the accessibility data captured by the
        browser-side logger using the captureAccessibilitySnapshot() method.

        Args:
            selector: Not used in REMOTE mode (filter not supported).

        Returns:
            Accessibility audit results if available in Shadow State.
        """
        logs = await self._get_logs_from_api()

        # Look for accessibility audits in the logs
        audits = [
            l for l in logs
            if l.get("category") == "accessibility" or l.get("category") == "accessibilitySnapshot"
        ]

        if not audits:
            return {
                "violations": [],
                "passes": [],
                "incomplete": [],
                "message": "No accessibility audit found in Shadow State. Trigger audit from browser console using __DSS_BROWSER_LOGS.audit()"
            }

        # Get the latest audit
        latest = max(audits, key=lambda x: x.get("timestamp", 0))
        data = latest.get("data", {})

        # Extract accessibility results
        if "results" in data:
            return data["results"]
        elif "accessibility" in data:
            return data["accessibility"]
        else:
            return data

    async def get_performance_metrics(self) -> Dict[str, Any]:
        """
        Get performance metrics from Shadow State.

        In REMOTE mode, this retrieves Core Web Vitals and performance data
        captured by the browser-side logger.

        Returns:
            Dictionary with performance metrics if available.
        """
        logs = await self._get_logs_from_api()

        # Look for performance metrics in the logs
        perf_logs = [
            l for l in logs
            if l.get("category") in ["performance", "accessibilitySnapshot"]
        ]

        if not perf_logs:
            return {
                "error": "No performance data found in Shadow State.",
                "message": "Performance metrics are captured automatically during page load."
            }

        # Get the latest performance entry
        latest = max(perf_logs, key=lambda x: x.get("timestamp", 0))
        data = latest.get("data", {})

        # Try to extract performance data from accessibilitySnapshot or performance entry
        if "performance" in data:
            return {"core_web_vitals": data["performance"]}
        else:
            return {"performance_data": data}