diff --git a/.env.example b/.env.example index d7b4f0f..967517c 100644 --- a/.env.example +++ b/.env.example @@ -31,6 +31,7 @@ NEXUS_API_TIMEOUT_SECONDS=30 # LLM analysis (optional — set LLM_ENABLED=1 to activate) LLM_ENABLED=0 +LLM_AUTO_ANALYZE=0 LLM_API_BASE=https://api.openai.com/v1 LLM_API_KEY= LLM_MODEL=gpt-4o-mini diff --git a/AGENTS.md b/AGENTS.md index e462658..f56960f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -197,3 +197,15 @@ curl -X POST http://localhost:8080/webhooks/nexus \ - **No Nexus Pro required:** the system works with Nexus OSS. Webhooks can be triggered manually or via community plugins. - **GuardDog deadlocks:** GuardDog is CPU-intensive. Use `MAX_CONCURRENT_SCANS` to avoid resource exhaustion. - **LLM may be slow:** increase `LLM_TIMEOUT_SECONDS` for large models. Set `LLM_MAX_CONCURRENT_ANALYSES` to limit parallel requests. + +--- + +## Workflow + +**After every change** — follow these steps in order: + +1. **Document** — update `AGENTS.md` if the change introduces a new concept, env var, endpoint, or workflow. +2. **Lint** — `ruff check guarddog_nexus && ruff format guarddog_nexus` +3. **Test** — `python3 -m pytest -v` (must pass 100%) +4. **Commit** — use the existing commit prefix convention (`feat:`, `fix:`, `refactor:`, `docs:`, `ui:`). +5. **Rebuild** — `docker compose up -d --build` to deploy changes. diff --git a/docker-compose.yml b/docker-compose.yml index 264ca1d..216bb6d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,7 @@ services: HOST: "0.0.0.0" PORT: "8080" LLM_ENABLED: "${LLM_ENABLED:-0}" + LLM_AUTO_ANALYZE: "${LLM_AUTO_ANALYZE:-0}" LLM_API_BASE: "${LLM_API_BASE:-https://api.openai.com/v1}" LLM_API_KEY: "${LLM_API_KEY:-}" LLM_MODEL: "${LLM_MODEL:-gpt-4o-mini}" diff --git a/guarddog_nexus/config.py b/guarddog_nexus/config.py index 17f8b95..9afc806 100644 --- a/guarddog_nexus/config.py +++ b/guarddog_nexus/config.py @@ -14,52 +14,59 @@ from guarddog_nexus.constants import ( ) +def _env_int(name: str, default: int) -> int: + val = os.getenv(name) + if val is None: + return default + try: + return int(val) + except ValueError: + return default + + @dataclass class Config: # Nexus connection nexus_url: str = os.getenv("NEXUS_URL", "http://localhost:8081") nexus_username: str = os.getenv("NEXUS_USERNAME", "admin") nexus_password: str = os.getenv("NEXUS_PASSWORD", "admin123") - nexus_download_timeout: int = int( - os.getenv("NEXUS_DOWNLOAD_TIMEOUT_SECONDS", str(HTTP_TIMEOUT_DOWNLOAD)) - ) - nexus_api_timeout: int = int( - os.getenv("NEXUS_API_TIMEOUT_SECONDS", str(HTTP_TIMEOUT_API)) + nexus_download_timeout: int = _env_int( + "NEXUS_DOWNLOAD_TIMEOUT_SECONDS", HTTP_TIMEOUT_DOWNLOAD ) + nexus_api_timeout: int = _env_int("NEXUS_API_TIMEOUT_SECONDS", HTTP_TIMEOUT_API) # Database database_path: str = os.getenv("DATABASE_PATH", "data/guarddog.db") # Server host: str = os.getenv("HOST", "0.0.0.0") - port: int = int(os.getenv("PORT", "8080")) + port: int = _env_int("PORT", 8080) # Logging log_level: str = os.getenv("LOG_LEVEL", "INFO") log_syslog_host: str = os.getenv("LOG_SYSLOG_HOST", "") - log_syslog_port: int = int(os.getenv("LOG_SYSLOG_PORT", "514")) + log_syslog_port: int = _env_int("LOG_SYSLOG_PORT", 514) log_syslog_facility: str = os.getenv("LOG_SYSLOG_FACILITY", "") # Webhooks webhook_secret: str = os.getenv("WEBHOOK_SECRET", "") # Scanner - scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300")) + scan_timeout_seconds: int = _env_int("SCAN_TIMEOUT_SECONDS", 300) temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus") guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK) - max_concurrent_scans: int = int( - os.getenv("MAX_CONCURRENT_SCANS", str(DEFAULT_MAX_CONCURRENT_SCANS)) + max_concurrent_scans: int = _env_int( + "MAX_CONCURRENT_SCANS", DEFAULT_MAX_CONCURRENT_SCANS ) # LLM analysis llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes") + llm_auto_analyze: bool = os.getenv("LLM_AUTO_ANALYZE", "").lower() in ("1", "true", "yes") llm_api_base: str = os.getenv("LLM_API_BASE", LLM_DEFAULT_API_BASE) llm_api_key: str = os.getenv("LLM_API_KEY", "") llm_model: str = os.getenv("LLM_MODEL", LLM_DEFAULT_MODEL) - llm_timeout: int = int(os.getenv("LLM_TIMEOUT_SECONDS", str(LLM_DEFAULT_TIMEOUT))) - llm_max_concurrent: int = int( - os.getenv("LLM_MAX_CONCURRENT_ANALYSES", "2") - ) + llm_timeout: int = _env_int("LLM_TIMEOUT_SECONDS", LLM_DEFAULT_TIMEOUT) + llm_max_concurrent: int = _env_int("LLM_MAX_CONCURRENT_ANALYSES", 2) config = Config() diff --git a/guarddog_nexus/constants.py b/guarddog_nexus/constants.py index 448ed70..8f28f81 100644 --- a/guarddog_nexus/constants.py +++ b/guarddog_nexus/constants.py @@ -13,9 +13,8 @@ used across the codebase live here to avoid duplication and drift. # harvester uses it to decide whether to download and scan. PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip") -# Prefix used in PyPI-style asset paths ("/packages/name/ver/file") -PYPI_PATH_PREFIX = "packages" -NPM_PATH_PREFIX = "packages" +# Prefix used in PyPI/NPM asset paths ("/packages/name/ver/file") +PKG_PATH_PREFIX = "packages" # Metadata file patterns that should never be scanned METADATA_PATTERNS = ( @@ -39,7 +38,6 @@ DEFAULT_ECOSYSTEM = "pypi" # --------------------------------------------------------------------------- SEVERITY_WARNING = "WARNING" -SEVERITY_ERROR = "ERROR" # --------------------------------------------------------------------------- # Sorting @@ -81,20 +79,9 @@ WEB_PER_PAGE = 50 DASHBOARD_LATEST_FLAGGED_LIMIT = 8 DASHBOARD_LATEST_SCANS_LIMIT = 10 -DASHBOARD_MOST_FLAGGED_LIMIT = 8 TOP_RULES_LIMIT = 10 RECENT_FLAGGED_DAYS = 7 -HEATMAP_DAYS = 14 - -# --------------------------------------------------------------------------- -# Database fields -# --------------------------------------------------------------------------- - -MAX_PACKAGE_NAME_LENGTH = 255 -MAX_PACKAGE_VERSION_LENGTH = 255 -MAX_ECOSYSTEM_LENGTH = 50 -SHA256_HEX_LENGTH = 64 # --------------------------------------------------------------------------- # Scanner @@ -114,8 +101,7 @@ SCAN_ERROR_DOWNLOAD_FAILED = "Download failed" ERROR_MESSAGE_MAX_LENGTH = 1000 SHA256_CHUNK_SIZE = 8192 -# Finding data dict keys -FINDING_KEYS = ("rule", "severity", "message", "location", "code") +# Finding severity default DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING # --------------------------------------------------------------------------- @@ -141,8 +127,6 @@ WEBHOOK_STATUS_IGNORED = "ignored" # API # --------------------------------------------------------------------------- -API_PREFIX_V1 = "/api/v1" -HEALTH_PATH = "/health" STATIC_MOUNT_PATH = "/static" CSV_MEDIA_TYPE = "text/csv" diff --git a/guarddog_nexus/core/harvester.py b/guarddog_nexus/core/harvester.py index 7a9b434..7c27338 100644 --- a/guarddog_nexus/core/harvester.py +++ b/guarddog_nexus/core/harvester.py @@ -63,16 +63,20 @@ async def harvest( return None async with lock: - # Re-check DB in case another task already created and finished a scan - active = await session.scalar( - select(Scan.id).where( - Scan.nexus_asset_url == download_url, - Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]), + try: + # Re-check DB in case another task already created and finished a scan + active = await session.scalar( + select(Scan.id).where( + Scan.nexus_asset_url == download_url, + Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]), + ) ) - ) - if active: - log.info("Already scanning this URL, skipping") - return None + if active: + log.info("Already scanning this URL, skipping") + return None + finally: + async with _url_lock: + _url_locks.pop(download_url, None) scan = Scan( package_name=package_name, @@ -88,10 +92,9 @@ async def harvest( await session.commit() await session.refresh(scan) - os.makedirs(config.temp_dir, exist_ok=True) - tmpdir = tempfile.mkdtemp(dir=config.temp_dir) - try: + os.makedirs(config.temp_dir, exist_ok=True) + tmpdir = tempfile.mkdtemp(dir=config.temp_dir) scan.status = ScanStatus.SCANNING.value await session.commit() @@ -103,7 +106,7 @@ async def harvest( await session.commit() return scan - scan.sha256 = compute_sha256(downloaded) + scan.sha256 = await compute_sha256(downloaded) await session.commit() existing = await session.scalar( @@ -148,8 +151,12 @@ async def harvest( # Auto-trigger LLM analysis for flagged packages llm_reports = [] - if scan.flagged and config.llm_enabled: - llm_reports = await _run_llm_analysis(created_findings, session) + if scan.flagged and config.llm_enabled and config.llm_auto_analyze: + try: + llm_reports = await _run_llm_analysis(created_findings, session) + except Exception as e: + log.error("LLM analysis failed for %s==%s: %s", package_name, package_version, e) + llm_reports = [] if scan.flagged: extra = { @@ -199,11 +206,18 @@ async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> l """Run LLM analysis on findings and persist reports to the database.""" from .llm import analyze_finding + # Mark all as analyzing so the UI shows a spinner + for finding in findings: + finding.report = {"status": "analyzing"} + await session.commit() + reports = [] for finding in findings: report = await analyze_finding(finding.data) if report: finding.report = report reports.append(report) + else: + finding.report = None await session.commit() return reports diff --git a/guarddog_nexus/core/nexus.py b/guarddog_nexus/core/nexus.py index bd207de..ec8dc9e 100644 --- a/guarddog_nexus/core/nexus.py +++ b/guarddog_nexus/core/nexus.py @@ -1,5 +1,6 @@ """Sonatype Nexus REST API client using httpx async.""" +import asyncio import hashlib import os @@ -7,8 +8,7 @@ import httpx from ..config import config from ..constants import ( - NPM_PATH_PREFIX, - PYPI_PATH_PREFIX, + PKG_PATH_PREFIX, SHA256_CHUNK_SIZE, ) from ..logging_setup import log @@ -20,7 +20,7 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: Path format: packages/requests/2.31.0/requests-2.31.0.tar.gz """ parts = asset_path.strip("/").split("/") - if len(parts) >= 3 and parts[0] == PYPI_PATH_PREFIX: + if len(parts) >= 3 and parts[0] == PKG_PATH_PREFIX: return parts[1], parts[2] return None @@ -35,8 +35,8 @@ def extract_go_info(asset_path: str) -> tuple[str, str] | None: idx = cleaned.find("/@v/") if idx == -1: return None - if cleaned.startswith(PYPI_PATH_PREFIX + "/"): - module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx] + if cleaned.startswith(PKG_PATH_PREFIX + "/"): + module = cleaned[len(PKG_PATH_PREFIX) + 1 : idx] else: module = cleaned[:idx] if not module: @@ -56,7 +56,7 @@ def extract_npm_info(asset_path: str) -> tuple[str, str] | None: Path format: packages/react/-/react-18.2.0.tgz """ parts = asset_path.strip("/").split("/") - if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX: + if len(parts) < 4 or parts[0] != PKG_PATH_PREFIX: return None name = parts[1] # Last segment: -.tgz @@ -100,14 +100,19 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None: try: response = await client.get(download_url) response.raise_for_status() - with open(dest_path, "wb") as f: - f.write(response.content) + content = response.content + await asyncio.to_thread(_write_file, dest_path, content) return dest_path except Exception as e: log.warning("Failed to download %s: %s", download_url, e) return None +def _write_file(path: str, content: bytes) -> None: + with open(path, "wb") as f: + f.write(content) + + async def nexus_get(path: str) -> httpx.Response: """Make an authenticated GET request to Nexus REST API.""" auth = httpx.BasicAuth(config.nexus_username, config.nexus_password) @@ -117,7 +122,11 @@ async def nexus_get(path: str) -> httpx.Response: return await client.get(f"{config.nexus_url.rstrip('/')}{path}") -def compute_sha256(filepath: str) -> str: +async def compute_sha256(filepath: str) -> str: + return await asyncio.to_thread(_compute_sha256_sync, filepath) + + +def _compute_sha256_sync(filepath: str) -> str: h = hashlib.sha256() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(SHA256_CHUNK_SIZE), b""): diff --git a/guarddog_nexus/core/scanner.py b/guarddog_nexus/core/scanner.py index 18ac1bf..24639f6 100644 --- a/guarddog_nexus/core/scanner.py +++ b/guarddog_nexus/core/scanner.py @@ -43,6 +43,9 @@ async def scan_package(filepath: str, ecosystem: str = DEFAULT_ECOSYSTEM) -> dic log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode()) return {"findings": [], "errors": [stderr.decode().strip()]} + if proc.returncode == 1 and stderr: + log.warning("GuardDog stderr (exit 1): %s", stderr.decode().strip()) + try: data = json.loads(stdout.decode()) except json.JSONDecodeError: @@ -96,6 +99,17 @@ def _normalize_output(data: dict) -> dict: ) elif isinstance(value, dict) and not value: continue + elif isinstance(value, dict): + # Non-empty dict — treat as a single finding + findings.append( + { + "rule": rule_name, + "severity": value.get("severity", DEFAULT_FINDING_SEVERITY), + "message": value.get("message", ""), + "location": value.get("location", ""), + "code": value.get("code", ""), + } + ) errors = data.get("errors", {}) if isinstance(errors, dict): diff --git a/guarddog_nexus/db/engine.py b/guarddog_nexus/db/engine.py index 6a16bbe..54b6d43 100644 --- a/guarddog_nexus/db/engine.py +++ b/guarddog_nexus/db/engine.py @@ -68,8 +68,25 @@ async def init_db(): async with _engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) await _migrate() + await _ensure_indexes() async def get_session() -> AsyncSession: async with _async_session() as session: yield session + + +async def _ensure_indexes(): + """Create indexes that are not covered by ORM model definitions.""" + indexes = [ + "CREATE INDEX IF NOT EXISTS idx_scans_status ON scans(status)", + "CREATE INDEX IF NOT EXISTS idx_scans_sha256 ON scans(sha256)", + "CREATE INDEX IF NOT EXISTS idx_scans_package_name ON scans(package_name)", + "CREATE INDEX IF NOT EXISTS idx_scans_package_version ON scans(package_version)", + "CREATE INDEX IF NOT EXISTS idx_scans_flagged ON scans(flagged)", + "CREATE INDEX IF NOT EXISTS idx_scans_nexus_asset_url ON scans(nexus_asset_url)", + "CREATE INDEX IF NOT EXISTS idx_findings_scan_id ON findings(scan_id)", + ] + async with _engine.begin() as conn: + for sql in indexes: + await conn.execute(text(sql)) diff --git a/guarddog_nexus/db/queries.py b/guarddog_nexus/db/queries.py index 9692aa0..a83af8d 100644 --- a/guarddog_nexus/db/queries.py +++ b/guarddog_nexus/db/queries.py @@ -5,16 +5,13 @@ Eliminates ~90% duplicated SQL between api/*.py and web/routes.py. import datetime -from sqlalchemy import Integer, cast, func, select, text +from sqlalchemy import func, select, text from sqlalchemy.ext.asyncio import AsyncSession from guarddog_nexus.constants import ( DASHBOARD_LATEST_FLAGGED_LIMIT, DASHBOARD_LATEST_SCANS_LIMIT, - DASHBOARD_MOST_FLAGGED_LIMIT, - HEATMAP_DAYS, JSON_PATH_RULE, - JSON_PATH_SEVERITY, PACKAGE_SORT_FIELDS, RECENT_FLAGGED_DAYS, SCAN_SORT_FIELDS, @@ -143,15 +140,13 @@ async def get_dashboard_stats(session: AsyncSession) -> dict: ) total_findings = await session.scalar(select(func.count(Finding.id))) - warnings_count = await session.scalar( + llm_analyzed = await session.scalar( select(func.count(Finding.id)).where( - func.json_extract(Finding.data, JSON_PATH_SEVERITY) == "WARNING" + func.json_extract(Finding.report, "$.verdict").isnot(None) ) ) - errors_count = await session.scalar( - select(func.count(Finding.id)).where( - func.json_extract(Finding.data, JSON_PATH_SEVERITY) == "ERROR" - ) + llm_pending = await session.scalar( + select(func.count(Finding.id)).where(Finding.report.is_(None)) ) latest_flagged = ( @@ -191,48 +186,15 @@ async def get_dashboard_stats(session: AsyncSession) -> dict: ) ).all() - most_flagged = ( - await session.execute( - select( - Scan.package_name, - Scan.package_version, - func.sum(Scan.total_findings).label("total"), - func.max(Scan.started_at).label("last_scan"), - ) - .where(Scan.flagged == True) - .group_by(Scan.package_name, Scan.package_version) - .order_by(func.sum(Scan.total_findings).desc()) - .limit(DASHBOARD_MOST_FLAGGED_LIMIT) - ) - ).all() - - max_findings = max((r.total for r in most_flagged), default=1) - - days_raw = ( - await session.execute( - select( - func.date(Scan.started_at).label("day"), - func.count(Scan.id).label("cnt"), - func.sum(cast(Scan.flagged, Integer)).label("flagged_cnt"), - ) - .where(Scan.started_at >= func.datetime("now", f"-{HEATMAP_DAYS} days")) - .group_by("day") - .order_by("day") - ) - ).all() - return { "total_scans": total_scans or 0, "flagged_scans": flagged_scans or 0, "recent_flagged": recent_flagged or 0, "total_findings": total_findings or 0, - "warnings_count": warnings_count or 0, - "errors_count": errors_count or 0, + "llm_analyzed": llm_analyzed or 0, + "llm_pending": llm_pending or 0, "latest_flagged": latest_flagged, "latest_scans": latest_scans, "top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules], - "most_flagged": most_flagged, - "max_findings": max_findings, - "days": [(d.day, d.cnt, d.flagged_cnt) for d in days_raw], "now": datetime.datetime.now(datetime.timezone.utc), } diff --git a/guarddog_nexus/i18n.py b/guarddog_nexus/i18n.py index ad8533e..98a1306 100644 --- a/guarddog_nexus/i18n.py +++ b/guarddog_nexus/i18n.py @@ -17,14 +17,12 @@ _STRINGS = { "heading_packages": {"en": "Packages", "ru": "Пакеты"}, "heading_latest_flagged": {"en": "Latest Flagged", "ru": "Последние обнаружения"}, "heading_latest_scans": {"en": "Latest Scans", "ru": "Последние сканирования"}, - "heading_findings": {"en": "Findings", "ru": "Находки"}, "heading_findings_count": {"en": "Findings ({})", "ru": "Находки ({})"}, "heading_scans_count": {"en": "Scans ({})", "ru": "Сканирований ({})"}, "col_id": {"en": "ID", "ru": "ID"}, "col_package": {"en": "Package", "ru": "Пакет"}, "col_version": {"en": "Version", "ru": "Версия"}, "col_repo": {"en": "Repo", "ru": "Репозиторий"}, - "col_repository": {"en": "Repository", "ru": "Репозиторий"}, "col_status": {"en": "Status", "ru": "Статус"}, "col_findings": {"en": "Findings", "ru": "Находки"}, "col_time": {"en": "Time", "ru": "Время"}, @@ -82,6 +80,11 @@ _STRINGS = { "ru": "⚠ Анализ сгенерирован AI — может содержать неточности. " "Всегда проверяйте находки перед принятием мер.", }, + "llm_analyzing": {"en": "Analyzing...", "ru": "Анализирую..."}, + "llm_retry": {"en": "Retry", "ru": "Повторить"}, + "llm_analyzed": {"en": "LLM analyzed", "ru": "LLM проанализ."}, + "llm_pending": {"en": "Pending", "ru": "Ожидают"}, + "not_found": {"en": "Not found", "ru": "Не найдено"}, "breadcrumb_home": {"en": "Home", "ru": "Главная"}, "breadcrumb_dashboard": {"en": "Dashboard", "ru": "Панель"}, "breadcrumb_scans": {"en": "Scans", "ru": "Сканирования"}, diff --git a/guarddog_nexus/routes/api_findings.py b/guarddog_nexus/routes/api_findings.py index 823b51c..d3cf7ce 100644 --- a/guarddog_nexus/routes/api_findings.py +++ b/guarddog_nexus/routes/api_findings.py @@ -4,7 +4,6 @@ from fastapi import APIRouter, Depends, Query from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession -from ..config import config from ..constants import ( DEFAULT_OFFSET, DEFAULT_PAGE_SIZE, @@ -55,32 +54,3 @@ async def list_findings( } -@router.post("/{finding_id}/analyze") -async def analyze_finding_endpoint( - finding_id: int, - session: AsyncSession = Depends(get_session), -): - """Manually trigger LLM analysis for a single finding.""" - if not config.llm_enabled: - return {"detail": "LLM analysis is disabled"} - - finding = await session.scalar( - select(Finding).where(Finding.id == finding_id) - ) - if not finding: - return {"detail": "Not found"} - - from ..core.llm import analyze_finding - - report = await analyze_finding(finding.data) - if report is None: - return {"detail": "LLM analysis failed"} - - finding.report = report - await session.commit() - - return { - "id": finding.id, - **finding.data, - "report": report, - } diff --git a/guarddog_nexus/routes/api_scans.py b/guarddog_nexus/routes/api_scans.py index 8d33fcb..8935c24 100644 --- a/guarddog_nexus/routes/api_scans.py +++ b/guarddog_nexus/routes/api_scans.py @@ -127,7 +127,7 @@ async def scan_stats(session: AsyncSession = Depends(get_session)): "total_findings": dashboard["total_findings"], "top_rules": dashboard["top_rules"], "latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat() - if dashboard["latest_flagged"] + if dashboard["latest_flagged"] and dashboard["latest_flagged"][0].started_at else None, } diff --git a/guarddog_nexus/routes/metrics.py b/guarddog_nexus/routes/metrics.py index 82b6f58..7747b64 100644 --- a/guarddog_nexus/routes/metrics.py +++ b/guarddog_nexus/routes/metrics.py @@ -1,6 +1,6 @@ """Prometheus-compatible metrics endpoint.""" -import time +import calendar from fastapi import APIRouter, Depends, Response from sqlalchemy import func, select @@ -69,7 +69,7 @@ async def metrics(session: AsyncSession = Depends(get_session)): lines.append(f'guarddog_scans_by_ecosystem{{ecosystem="{eco}"}} {count}') if latest: - ts = time.mktime(latest.timetuple()) + ts = calendar.timegm(latest.timetuple()) lines += [ "", "# HELP guarddog_last_scan_timestamp_seconds Unix timestamp of most recent scan.", diff --git a/guarddog_nexus/routes/web.py b/guarddog_nexus/routes/web.py index 706c989..a9c39ec 100644 --- a/guarddog_nexus/routes/web.py +++ b/guarddog_nexus/routes/web.py @@ -1,5 +1,6 @@ """Web UI routes — Jinja2 + htmx pages.""" +import asyncio from urllib.parse import unquote from fastapi import APIRouter, Depends, Request @@ -8,6 +9,7 @@ from jinja2 import Environment, PackageLoader, select_autoescape from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from ..config import config from ..constants import ( APP_PACKAGE, DEFAULT_SORT_BY_PACKAGES, @@ -26,11 +28,15 @@ from ..i18n import t as _t router = APIRouter(tags=["web"]) +_llm_locks: dict[int, asyncio.Lock] = {} +_llm_lock = asyncio.Lock() + _jinja_env = Environment( loader=PackageLoader(APP_PACKAGE, "web/templates"), autoescape=select_autoescape(), ) _jinja_env.globals["t"] = _t +_jinja_env.globals["config"] = config def _render(name: str, **context) -> HTMLResponse: @@ -109,7 +115,7 @@ async def scan_detail( .options(selectinload(Scan.findings)) ) if not scan: - return HTMLResponse("

Not found

", status_code=404) + return HTMLResponse(f"

{_t('not_found', request.state.lang)}

", status_code=404) return _render("scan_detail.html", scan=scan, request=request) @@ -186,7 +192,7 @@ async def package_detail( ) if not scans: - return HTMLResponse("

Not found

", status_code=404) + return HTMLResponse(f"

{_t('not_found', request.state.lang)}

", status_code=404) all_findings = [] for s in scans: @@ -205,31 +211,72 @@ async def package_detail( @router.post("/api/v1/findings/{finding_id}/analyze", response_class=HTMLResponse) async def analyze_finding_htmx( finding_id: int, + request: Request, + retry: bool = False, session: AsyncSession = Depends(get_session), ): """HTMX fragment: trigger LLM analysis and return styled result HTML.""" from ..config import config from ..core.llm import analyze_finding + lang = request.state.lang + if not config.llm_enabled: + msg = _t("llm_disabled", lang) return HTMLResponse( - '
LLM analysis is disabled
' + f'
{msg}
' ) finding = await session.scalar(select(Finding).where(Finding.id == finding_id)) if not finding: + msg = _t("llm_not_found", lang) return HTMLResponse( - '
Finding not found
', + f'
{msg}
', status_code=404, ) - report = await analyze_finding(finding.data) + if not retry and finding.report and finding.report.get("verdict"): + return _render( + "_llm_report_fragment.html", + report=finding.report, + finding_id=finding_id, + request=request, + ) + + if not retry and finding.report and finding.report.get("status") == "analyzing": + return _render("_llm_spinner.html", request=request) + + async with _llm_lock: + if finding_id not in _llm_locks: + _llm_locks[finding_id] = asyncio.Lock() + + lock = _llm_locks[finding_id] + if lock.locked(): + return _render("_llm_spinner.html", request=request) + + async with lock: + try: + finding.report = {"status": "analyzing"} + await session.commit() + report = await analyze_finding(finding.data) + finally: + async with _llm_lock: + _llm_locks.pop(finding_id, None) + if report is None: + finding.report = None + await session.commit() + msg = _t("llm_failed", lang) return HTMLResponse( - '
LLM analysis failed
' + f'
{msg}
' ) finding.report = report await session.commit() - return _render("_llm_report_fragment.html", report=report) + return _render( + "_llm_report_fragment.html", + report=report, + finding_id=finding_id, + request=request, + ) diff --git a/guarddog_nexus/routes/webhooks.py b/guarddog_nexus/routes/webhooks.py index 427d170..d3664f3 100644 --- a/guarddog_nexus/routes/webhooks.py +++ b/guarddog_nexus/routes/webhooks.py @@ -86,10 +86,10 @@ async def nexus_webhook( try: data = json.loads(payload.decode("utf-8")) - except json.JSONDecodeError: - log.warning("Webhook received invalid JSON") + except (json.JSONDecodeError, UnicodeDecodeError): + log.warning("Webhook received invalid body") raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON" + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body" ) action = data.get("action", "").upper() @@ -112,6 +112,11 @@ async def nexus_webhook( action, initiator, source_ip) repository = data.get("repositoryName", "") + if not repository: + log.warning("Webhook rejected: missing repositoryName") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Missing repository name" + ) asset = data.get("asset") component = data.get("component") diff --git a/guarddog_nexus/web/static/style.css b/guarddog_nexus/web/static/style.css index efd9591..c1c3039 100644 --- a/guarddog_nexus/web/static/style.css +++ b/guarddog_nexus/web/static/style.css @@ -15,25 +15,8 @@ .severity-ERROR { color: var(--pico-color-red-400); } /* ------------------------------------------------------------------ */ -/* Dashboard mini-bar */ +/* Dashboard blocks */ /* ------------------------------------------------------------------ */ -.stat-minibar { - display: flex; - gap: 1.5rem; - padding: 0.6rem 0; - margin-bottom: 1.5rem; - border-bottom: 1px solid var(--pico-color-gray-500); - font-size: 0.9rem; - opacity: 0.9; -} - -/* Dashboard block grid (2 cols → 1 on mobile) */ -.dashboard-grid { - display: grid; - grid-template-columns: 1fr 1fr; - gap: 1rem; - margin-bottom: 1rem; -} .dash-block { padding: 1rem; @@ -57,51 +40,6 @@ table.compact { font-size: 0.82rem; } table.compact th, table.compact td { padding: 0.35rem 0.5rem; } -/* ------------------------------------------------------------------ */ -/* Heatmap */ -/* ------------------------------------------------------------------ */ -.heatmap { - display: flex; - align-items: flex-end; - gap: 2px; - height: 40px; - margin: 0.4rem 0 0 0; -} - -.heatmap-day { - flex: 1; - display: flex; - flex-direction: column; - justify-content: flex-end; - position: relative; -} - -.heatmap-day .bar { - border-radius: 2px 2px 0 0; - opacity: 0.8; - transition: height 0.3s ease, opacity 0.2s; -} - -.heatmap-day:hover .bar { opacity: 1; } - -.heatmap-day .tooltip { - display: none; - position: absolute; - bottom: 100%; - left: 50%; - transform: translateX(-50%); - background: var(--pico-color-gray-700); - color: var(--pico-color-white); - padding: 0.25rem 0.5rem; - border-radius: 4px; - font-size: 0.7rem; - white-space: nowrap; - z-index: 10; - margin-bottom: 4px; -} - -.heatmap-day:hover .tooltip { display: block; } - /* ------------------------------------------------------------------ */ /* Scan info block (detail page) */ /* ------------------------------------------------------------------ */ @@ -167,17 +105,6 @@ table.compact td { padding: 0.35rem 0.5rem; } margin-bottom: 0; } -.finding-header-row { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: 0.75rem; -} - -.finding-header-row h2 { - margin-bottom: 0; -} - /* ------------------------------------------------------------------ */ /* LLM report — verdict-based colour scheme */ /* ------------------------------------------------------------------ */ @@ -237,6 +164,15 @@ table.compact td { padding: 0.35rem 0.5rem; } .llm-actions { margin-top: 0.5rem; } .llm-actions button { font-size: 0.8rem; } +.llm-retry { + margin-left: auto; + font-size: 0.7rem; + opacity: 0.5; + cursor: pointer; + border-bottom: 1px dashed; +} +.llm-retry:hover { opacity: 0.8; } + .llm-disclaimer { margin-top: 0.6rem; font-size: 0.72rem; @@ -269,18 +205,6 @@ table.compact td { padding: 0.35rem 0.5rem; } .copy-btn:hover { background: var(--pico-color-gray-600); } .copy-btn.copied { color: var(--pico-color-green-400); border-color: var(--pico-color-green-400); } -.toggle-all-btn { - font-size: 0.8rem; - cursor: pointer; - background: none; - border: 1px solid var(--pico-color-gray-500); - padding: 0.2rem 0.6rem; - border-radius: 3px; - color: var(--pico-color-gray-300); -} - -.toggle-all-btn:hover { background: var(--pico-color-gray-600); } - .htmx-indicator { display: inline; } /* ------------------------------------------------------------------ */ @@ -336,9 +260,7 @@ th.sortable.active .sort-icon { opacity: 1; } /* Responsive */ /* ------------------------------------------------------------------ */ @media (max-width: 768px) { - .dashboard-grid { grid-template-columns: 1fr; } .scan-info-grid { grid-template-columns: 1fr 1fr; } - .stat-minibar { flex-wrap: wrap; gap: 0.75rem; } .filter-bar { flex-direction: column; align-items: stretch; } nav ul { flex-wrap: wrap; } table, table.compact { font-size: 0.78rem; } @@ -347,14 +269,13 @@ th.sortable.active .sort-icon { opacity: 1; } @media (max-width: 480px) { .scan-info-grid { grid-template-columns: 1fr; } - .stat-minibar { font-size: 0.8rem; } } /* ------------------------------------------------------------------ */ /* Print */ /* ------------------------------------------------------------------ */ @media print { - nav, .filter-bar, .copy-btn, .toggle-all-btn, nav.sticky, + nav, .filter-bar, .copy-btn, nav.sticky, .llm-actions, .breadcrumbs { display: none !important; } body { background: white; color: black; } .llm-report { border: 1px solid #ccc; background: none; } diff --git a/guarddog_nexus/web/templates/_llm_report_fragment.html b/guarddog_nexus/web/templates/_llm_report_fragment.html index d311adf..06847b5 100644 --- a/guarddog_nexus/web/templates/_llm_report_fragment.html +++ b/guarddog_nexus/web/templates/_llm_report_fragment.html @@ -4,8 +4,15 @@ {% if report.severity_rating %} {{ report.severity_rating }} {% endif %} + {% if config.llm_enabled and not config.llm_auto_analyze %} + {{ t('llm_retry', request.state.lang) }} + {% endif %}

{{ report.summary }}

{{ report.analysis }}

-

⚠ AI-generated analysis — may contain inaccuracies. Always verify findings before taking action.

+

{{ t('llm_disclaimer', request.state.lang) }}

diff --git a/guarddog_nexus/web/templates/_llm_spinner.html b/guarddog_nexus/web/templates/_llm_spinner.html new file mode 100644 index 0000000..9e56a01 --- /dev/null +++ b/guarddog_nexus/web/templates/_llm_spinner.html @@ -0,0 +1,3 @@ +
+ {{ t('llm_analyzing', request.state.lang) }} +
diff --git a/guarddog_nexus/web/templates/_packages_table.html b/guarddog_nexus/web/templates/_packages_table.html index e466b40..2f40565 100644 --- a/guarddog_nexus/web/templates/_packages_table.html +++ b/guarddog_nexus/web/templates/_packages_table.html @@ -38,14 +38,5 @@ -{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %} -{% if total_pages > 1 %} - -{% endif %} +{% include "_pagination.html" %} {{ t('total_packages', request.state.lang, total) }} diff --git a/guarddog_nexus/web/templates/_pagination.html b/guarddog_nexus/web/templates/_pagination.html new file mode 100644 index 0000000..ac611cd --- /dev/null +++ b/guarddog_nexus/web/templates/_pagination.html @@ -0,0 +1,10 @@ +{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %} +{% if total_pages > 1 %} + +{% endif %} diff --git a/guarddog_nexus/web/templates/_scans_table.html b/guarddog_nexus/web/templates/_scans_table.html index 25f4177..b9c3411 100644 --- a/guarddog_nexus/web/templates/_scans_table.html +++ b/guarddog_nexus/web/templates/_scans_table.html @@ -28,7 +28,7 @@ {{ s.package_version }} {{ s.repository }} - {% if s.status == 'scanning' %}scanning{% else %}{{ s.status }}{% endif %} + {% with status=s.status %}{% include "_status_badge.html" %}{% endwith %} {% if s.flagged %}{{ s.total_findings }}{% else %}0{% endif %} {{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }} @@ -42,14 +42,5 @@ -{% set total_pages = (total // per_page) + (1 if total % per_page else 0) %} -{% if total_pages > 1 %} - -{% endif %} +{% include "_pagination.html" %} {{ t('total_scans', request.state.lang, total) }} diff --git a/guarddog_nexus/web/templates/_status_badge.html b/guarddog_nexus/web/templates/_status_badge.html new file mode 100644 index 0000000..ee8a38e --- /dev/null +++ b/guarddog_nexus/web/templates/_status_badge.html @@ -0,0 +1 @@ +{% if status == 'scanning' %}scanning{% else %}{{ status }}{% endif %} diff --git a/guarddog_nexus/web/templates/dashboard_stats.html b/guarddog_nexus/web/templates/dashboard_stats.html index 62bf27a..a6565b3 100644 --- a/guarddog_nexus/web/templates/dashboard_stats.html +++ b/guarddog_nexus/web/templates/dashboard_stats.html @@ -1,3 +1,10 @@ +{% if total_findings %} +
+ {{ t('col_findings', request.state.lang) }}: {{ total_findings }} + {{ t('llm_analyzed', request.state.lang) }}: {{ llm_analyzed }} + {{ t('llm_pending', request.state.lang) }}: {{ llm_pending }} +
+{% endif %} {% if latest_flagged %}

{{ t('heading_latest_flagged', request.state.lang) }}

@@ -30,7 +37,7 @@ {{ s.package_version }} {{ s.repository }} - {% if s.status == 'scanning' %}scanning{% else %}{{ s.status }}{% endif %} + {% with status=s.status %}{% include "_status_badge.html" %}{% endwith %} {% if s.flagged %}⚠ {{ s.total_findings }}{% elif s.status == 'completed' %}{% else %}-{% endif %} {{ s.started_at.strftime('%m-%d %H:%M') if s.started_at }} diff --git a/guarddog_nexus/web/templates/package_detail.html b/guarddog_nexus/web/templates/package_detail.html index fd744c5..733365a 100644 --- a/guarddog_nexus/web/templates/package_detail.html +++ b/guarddog_nexus/web/templates/package_detail.html @@ -24,7 +24,7 @@ #{{ s.id }} {{ s.repository }} - {% if s.status == 'scanning' %}scanning{% else %}{{ s.status }}{% endif %} + {% with status=s.status %}{% include "_status_badge.html" %}{% endwith %} {% if s.flagged %}{{ s.total_findings }}{% else %}0{% endif %} {{ s.started_at.strftime('%Y-%m-%d %H:%M') if s.started_at }} @@ -54,19 +54,28 @@
{{ f.data.code }}
{% endif %} - {% if f.report %} + {% if f.report and f.report.status == "analyzing" %} + {% include "_llm_spinner.html" %} + {% elif f.report and f.report.verdict %}
{{ f.report.verdict }} {% if f.report.severity_rating %} {{ f.report.severity_rating }} {% endif %} + {% if config.llm_enabled and not config.llm_auto_analyze %} + {{ t('llm_retry', request.state.lang) }} + {% endif %}

{{ f.report.summary }}

{{ f.report.analysis }}

{{ t('llm_disclaimer', request.state.lang) }}

- {% else %} + {% elif config.llm_enabled and not config.llm_auto_analyze %}
{{ t('scan_info_repository', request.state.lang) }}
{{ scan.repository }}
{{ t('scan_info_status', request.state.lang) }}
- {% if scan.status == 'scanning' %}scanning{% else %}{{ scan.status }}{% endif %} + {% with status=scan.status %}{% include "_status_badge.html" %}{% endwith %}
{{ t('scan_info_sha256', request.state.lang) }}
{{ scan.sha256 or '-' }}
{{ t('scan_info_started', request.state.lang) }}
{{ scan.started_at.strftime('%Y-%m-%d %H:%M') if scan.started_at }}
@@ -50,19 +50,28 @@
{{ f.data.code }}
{% endif %} - {% if f.report %} + {% if f.report and f.report.status == "analyzing" %} + {% include "_llm_spinner.html" %} + {% elif f.report and f.report.verdict %}
{{ f.report.verdict }} {% if f.report.severity_rating %} {{ f.report.severity_rating }} {% endif %} + {% if config.llm_enabled and not config.llm_auto_analyze %} + {{ t('llm_retry', request.state.lang) }} + {% endif %}

{{ f.report.summary }}

{{ f.report.analysis }}

{{ t('llm_disclaimer', request.state.lang) }}

- {% else %} + {% elif config.llm_enabled and not config.llm_auto_analyze %}