Some checks failed
DSS Project Analysis / dss-context-update (push) Has been cancelled
This reverts commit 72cb7319f5.
456 lines
16 KiB
Python
456 lines
16 KiB
Python
"""
|
|
Local Browser Strategy implementation using Playwright.
|
|
|
|
Provides direct, local control over a browser for tasks like DOM inspection,
|
|
screenshotting, and running audits. This is the LOCAL mode counterpart to
|
|
RemoteBrowserStrategy which uses Shadow State pattern.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
from typing import Any, Dict, List, Optional, Type
|
|
|
|
from ..base import BrowserStrategy
|
|
|
|
# Logger setup
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# URL for axe-core accessibility testing library
|
|
AXE_CORE_SCRIPT_URL = "https://cdnjs.cloudflare.com/ajax/libs/axe-core/4.8.4/axe.min.js"
|
|
|
|
# Optional Playwright import for graceful degradation
|
|
try:
|
|
from playwright.async_api import (
|
|
Browser,
|
|
ConsoleMessage,
|
|
Error as PlaywrightError,
|
|
Page,
|
|
Playwright,
|
|
TimeoutError as PlaywrightTimeoutError,
|
|
async_playwright,
|
|
)
|
|
|
|
PLAYWRIGHT_AVAILABLE = True
|
|
except ImportError:
|
|
PLAYWRIGHT_AVAILABLE = False
|
|
# Create dummy types for type hinting when Playwright is not installed
|
|
Playwright = Type[Any]
|
|
Browser = Type[Any]
|
|
Page = Type[Any]
|
|
ConsoleMessage = Type[Any]
|
|
PlaywrightError = Exception
|
|
PlaywrightTimeoutError = Exception
|
|
|
|
|
|
class LocalBrowserStrategy(BrowserStrategy):
|
|
"""
|
|
Implements the BrowserStrategy using Playwright for local browser automation.
|
|
|
|
This strategy manages a singleton browser instance to perform actions
|
|
directly on the local machine. It is ideal for development environments
|
|
where direct access to a browser is possible.
|
|
|
|
Features:
|
|
- Browser pool pattern (reuses browser instances)
|
|
- Console log capture via CDP
|
|
- Screenshot capture (element or full page)
|
|
- DOM snapshot retrieval
|
|
- Accessibility auditing via axe-core injection
|
|
- Core Web Vitals and performance metrics
|
|
|
|
Note: This class requires Playwright to be installed.
|
|
Run `pip install "playwright"` and `playwright install chromium`.
|
|
"""
|
|
|
|
# Class-level browser pool (shared across instances)
|
|
_playwright: Optional[Playwright] = None
|
|
_browser: Optional[Browser] = None
|
|
_browser_lock = asyncio.Lock()
|
|
|
|
def __init__(self, context: Any):
|
|
"""
|
|
Initialize the LocalBrowserStrategy.
|
|
|
|
Args:
|
|
context: The DSSContext providing configuration and session info.
|
|
"""
|
|
self.context = context
|
|
self.page: Optional[Page] = None
|
|
self._console_logs: List[Any] = []
|
|
self._page_errors: List[Any] = []
|
|
|
|
if not PLAYWRIGHT_AVAILABLE:
|
|
logger.warning(
|
|
"Playwright not found. LocalBrowserStrategy will be non-functional. "
|
|
"Please run 'pip install \"playwright\"' and 'playwright install chromium'."
|
|
)
|
|
|
|
def _check_playwright(self) -> None:
|
|
"""Ensure Playwright is available, raising an error if not."""
|
|
if not PLAYWRIGHT_AVAILABLE:
|
|
raise NotImplementedError(
|
|
"Playwright is not installed. Cannot use LocalBrowserStrategy. "
|
|
"Install with: pip install playwright && playwright install chromium"
|
|
)
|
|
|
|
async def launch(self, headless: bool = True) -> None:
|
|
"""
|
|
Launch and initialize the Playwright browser instance.
|
|
|
|
This method is idempotent and ensures that a single browser instance
|
|
is shared across the application lifecycle (browser pool pattern).
|
|
|
|
Args:
|
|
headless: Whether to run browser in headless mode (default: True)
|
|
"""
|
|
self._check_playwright()
|
|
|
|
# Check if browser is already running
|
|
if LocalBrowserStrategy._browser and LocalBrowserStrategy._browser.is_connected():
|
|
logger.debug("Browser already running, reusing existing instance.")
|
|
return
|
|
|
|
async with LocalBrowserStrategy._browser_lock:
|
|
# Double-check lock to prevent race conditions
|
|
if LocalBrowserStrategy._browser and LocalBrowserStrategy._browser.is_connected():
|
|
return
|
|
|
|
logger.info("Starting Playwright...")
|
|
LocalBrowserStrategy._playwright = await async_playwright().start()
|
|
|
|
logger.info("Launching new browser instance...")
|
|
LocalBrowserStrategy._browser = await LocalBrowserStrategy._playwright.chromium.launch(
|
|
headless=headless
|
|
)
|
|
logger.info("Browser instance launched successfully.")
|
|
|
|
async def navigate(self, url: str, wait_until: str = "domcontentloaded") -> None:
|
|
"""
|
|
Navigate the browser to a specific URL.
|
|
|
|
This creates a new page context for the session, replacing any
|
|
existing page. It also sets up listeners for console logs and errors.
|
|
|
|
Args:
|
|
url: The URL to navigate to.
|
|
wait_until: The navigation event to wait for
|
|
('load', 'domcontentloaded', 'networkidle').
|
|
"""
|
|
await self.launch()
|
|
|
|
# Close existing page if any
|
|
if self.page and not self.page.is_closed():
|
|
await self.page.close()
|
|
|
|
if not LocalBrowserStrategy._browser:
|
|
raise RuntimeError("Browser is not launched. Call launch() first.")
|
|
|
|
# Create new page
|
|
self.page = await LocalBrowserStrategy._browser.new_page()
|
|
self._console_logs.clear()
|
|
self._page_errors.clear()
|
|
|
|
# Set up event listeners for log capture
|
|
self.page.on("console", self._on_console_message)
|
|
self.page.on("pageerror", self._on_page_error)
|
|
|
|
logger.info(f"Navigating to {url}...")
|
|
await self.page.goto(url, wait_until=wait_until)
|
|
logger.info(f"Navigation to {url} complete.")
|
|
|
|
def _on_console_message(self, msg: Any) -> None:
|
|
"""Handle console message events."""
|
|
self._console_logs.append(msg)
|
|
|
|
def _on_page_error(self, error: Any) -> None:
|
|
"""Handle page error events."""
|
|
self._page_errors.append(error)
|
|
|
|
async def get_console_logs(
|
|
self,
|
|
session_id: Optional[str] = None,
|
|
limit: int = 100,
|
|
level: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Retrieve captured console logs from the current page.
|
|
|
|
Args:
|
|
session_id: Ignored in LOCAL mode (used for API compatibility).
|
|
limit: Maximum number of logs to return.
|
|
level: Filter by log level ('log', 'warn', 'error', 'info', 'debug').
|
|
|
|
Returns:
|
|
List of log entries with level, text, and location.
|
|
"""
|
|
if not self.page:
|
|
logger.warning("No active page. Returning empty logs.")
|
|
return []
|
|
|
|
logs = []
|
|
for msg in self._console_logs:
|
|
try:
|
|
log_entry = {
|
|
"level": msg.type,
|
|
"message": msg.text,
|
|
"timestamp": None, # Playwright doesn't provide timestamp directly
|
|
"category": "console",
|
|
"data": {
|
|
"location": msg.location if hasattr(msg, 'location') else None,
|
|
}
|
|
}
|
|
logs.append(log_entry)
|
|
except Exception as e:
|
|
logger.debug(f"Error processing console message: {e}")
|
|
|
|
if level:
|
|
logs = [log for log in logs if log["level"] == level]
|
|
|
|
# Return most recent logs up to limit
|
|
return logs[-limit:]
|
|
|
|
async def capture_screenshot(
|
|
self, selector: Optional[str] = None, full_page: bool = False
|
|
) -> str:
|
|
"""
|
|
Capture a screenshot of the current page or a specific element.
|
|
|
|
Args:
|
|
selector: CSS selector to capture a specific element.
|
|
If None, captures the viewport.
|
|
full_page: If True, captures the full scrollable page content.
|
|
Ignored if selector is provided.
|
|
|
|
Returns:
|
|
Path to the saved screenshot file.
|
|
|
|
Raises:
|
|
RuntimeError: If no active page is available.
|
|
"""
|
|
if not self.page or self.page.is_closed():
|
|
raise RuntimeError("No active page to capture screenshot from.")
|
|
|
|
# Generate unique filename
|
|
session_id = getattr(self.context, 'session_id', 'local')
|
|
path = os.path.join(
|
|
tempfile.gettempdir(), f"dss_screenshot_{session_id}.png"
|
|
)
|
|
|
|
try:
|
|
if selector:
|
|
element = self.page.locator(selector)
|
|
await element.screenshot(path=path, timeout=10000)
|
|
logger.info(f"Element screenshot saved to {path}")
|
|
else:
|
|
await self.page.screenshot(path=path, full_page=full_page, timeout=10000)
|
|
logger.info(f"Page screenshot saved to {path}")
|
|
return path
|
|
except Exception as e:
|
|
logger.error(f"Failed to capture screenshot: {e}")
|
|
raise
|
|
|
|
async def get_dom_snapshot(self) -> str:
|
|
"""
|
|
Get the current DOM state as an HTML string.
|
|
|
|
Returns:
|
|
String containing the outer HTML of the document.
|
|
"""
|
|
if not self.page or self.page.is_closed():
|
|
return "<!-- No active page to get DOM snapshot from. -->"
|
|
return await self.page.content()
|
|
|
|
async def get_errors(
|
|
self, severity: Optional[str] = None, limit: int = 50
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Retrieve captured page errors (e.g., uncaught exceptions).
|
|
|
|
Args:
|
|
severity: Filter by severity (not yet implemented).
|
|
limit: Maximum number of errors to return.
|
|
|
|
Returns:
|
|
List of error details with name, message, and stack trace.
|
|
"""
|
|
errors = []
|
|
for err in self._page_errors:
|
|
try:
|
|
error_entry = {
|
|
"level": "error",
|
|
"category": "uncaughtError",
|
|
"message": str(err),
|
|
"data": {
|
|
"name": getattr(err, 'name', 'Error'),
|
|
"stack": getattr(err, 'stack', None),
|
|
}
|
|
}
|
|
errors.append(error_entry)
|
|
except Exception as e:
|
|
logger.debug(f"Error processing page error: {e}")
|
|
|
|
return errors[-limit:]
|
|
|
|
async def run_accessibility_audit(
|
|
self, selector: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run an accessibility audit on the current page using axe-core.
|
|
|
|
This injects the axe-core library into the page and runs a full
|
|
accessibility scan.
|
|
|
|
Args:
|
|
selector: A CSS selector to limit the audit to a specific element.
|
|
If None, audits the entire page.
|
|
|
|
Returns:
|
|
A dictionary containing the axe-core audit results with:
|
|
- violations: List of accessibility violations
|
|
- passes: List of passing rules
|
|
- incomplete: List of rules that need review
|
|
- inapplicable: List of rules that don't apply
|
|
|
|
Raises:
|
|
RuntimeError: If no active page is available.
|
|
"""
|
|
if not self.page or self.page.is_closed():
|
|
raise RuntimeError("No active page to run accessibility audit on.")
|
|
|
|
logger.info("Injecting axe-core library...")
|
|
await self.page.add_script_tag(url=AXE_CORE_SCRIPT_URL)
|
|
|
|
# Wait for axe to be available
|
|
await self.page.wait_for_function("typeof axe !== 'undefined'", timeout=5000)
|
|
|
|
logger.info(f"Running accessibility audit{' on ' + selector if selector else ''}...")
|
|
|
|
# Run axe with selector context if provided
|
|
if selector:
|
|
result = await self.page.evaluate(
|
|
"(selector) => axe.run(selector)", selector
|
|
)
|
|
else:
|
|
result = await self.page.evaluate("() => axe.run()")
|
|
|
|
violations_count = len(result.get('violations', []))
|
|
logger.info(f"Accessibility audit complete. Found {violations_count} violations.")
|
|
|
|
return result
|
|
|
|
async def get_performance_metrics(self) -> Dict[str, Any]:
|
|
"""
|
|
Get performance metrics, including Navigation Timing and Core Web Vitals.
|
|
|
|
Returns:
|
|
Dictionary containing:
|
|
- navigation_timing: Raw Navigation Timing API data
|
|
- core_web_vitals: FCP, LCP, and CLS metrics
|
|
|
|
Raises:
|
|
RuntimeError: If no active page is available.
|
|
"""
|
|
if not self.page or self.page.is_closed():
|
|
raise RuntimeError("No active page to get performance metrics from.")
|
|
|
|
# 1. Get Navigation Timing API metrics
|
|
timing_raw = await self.page.evaluate(
|
|
"() => JSON.stringify(window.performance.timing)"
|
|
)
|
|
nav_timing = json.loads(timing_raw)
|
|
|
|
# 2. Get Core Web Vitals via PerformanceObserver
|
|
# This script collects buffered entries and waits briefly for new ones
|
|
metrics_script = """
|
|
() => new Promise((resolve) => {
|
|
const metrics = { lcp: null, cls: 0, fcp: null, ttfb: null };
|
|
|
|
// Get TTFB from navigation timing
|
|
const navEntry = performance.getEntriesByType('navigation')[0];
|
|
if (navEntry) {
|
|
metrics.ttfb = navEntry.responseStart - navEntry.requestStart;
|
|
}
|
|
|
|
// Get FCP from paint entries
|
|
const paintEntries = performance.getEntriesByType('paint');
|
|
for (const entry of paintEntries) {
|
|
if (entry.name === 'first-contentful-paint') {
|
|
metrics.fcp = entry.startTime;
|
|
}
|
|
}
|
|
|
|
// Set up observer for LCP and CLS
|
|
try {
|
|
const observer = new PerformanceObserver((list) => {
|
|
for (const entry of list.getEntries()) {
|
|
if (entry.entryType === 'largest-contentful-paint') {
|
|
metrics.lcp = entry.startTime;
|
|
}
|
|
if (entry.entryType === 'layout-shift' && !entry.hadRecentInput) {
|
|
metrics.cls += entry.value;
|
|
}
|
|
}
|
|
});
|
|
|
|
observer.observe({
|
|
type: 'largest-contentful-paint',
|
|
buffered: true
|
|
});
|
|
observer.observe({
|
|
type: 'layout-shift',
|
|
buffered: true
|
|
});
|
|
|
|
// Give some time for metrics to be collected
|
|
setTimeout(() => {
|
|
observer.disconnect();
|
|
resolve(metrics);
|
|
}, 500);
|
|
} catch (e) {
|
|
// PerformanceObserver may not be fully supported
|
|
resolve(metrics);
|
|
}
|
|
})
|
|
"""
|
|
core_web_vitals = await self.page.evaluate(metrics_script)
|
|
|
|
return {
|
|
"navigation_timing": nav_timing,
|
|
"core_web_vitals": core_web_vitals
|
|
}
|
|
|
|
async def close(self) -> None:
|
|
"""
|
|
Close the current page. Browser instance is kept in pool for reuse.
|
|
|
|
To fully close the browser, use close_browser() class method.
|
|
"""
|
|
if self.page and not self.page.is_closed():
|
|
await self.page.close()
|
|
self.page = None
|
|
self._console_logs.clear()
|
|
self._page_errors.clear()
|
|
logger.info("Page closed.")
|
|
|
|
@classmethod
|
|
async def close_browser(cls) -> None:
|
|
"""
|
|
Close the browser and stop the Playwright instance.
|
|
|
|
This is a class method that closes the shared browser pool.
|
|
Should be called during application shutdown.
|
|
"""
|
|
async with cls._browser_lock:
|
|
if cls._browser:
|
|
await cls._browser.close()
|
|
cls._browser = None
|
|
logger.info("Browser instance closed.")
|
|
|
|
if cls._playwright:
|
|
await cls._playwright.stop()
|
|
cls._playwright = None
|
|
logger.info("Playwright stopped.")
|