From 1341404568e8591d7132abecbd2115618a003651 Mon Sep 17 00:00:00 2001 From: Marker689 Date: Sun, 10 May 2026 10:45:44 +0300 Subject: [PATCH] =?UTF-8?q?fix:=20=D0=B0=D1=83=D0=B4=D0=B8=D1=82=20?= =?UTF-8?q?=E2=80=94=2019=20=D1=84=D0=B8=D0=BA=D1=81=D0=BE=D0=B2=20=D0=B1?= =?UTF-8?q?=D0=B5=D0=B7=D0=BE=D0=BF=D0=B0=D1=81=D0=BD=D0=BE=D1=81=D1=82?= =?UTF-8?q?=D0=B8,=20=D0=BD=D0=B0=D0=B4=D1=91=D0=B6=D0=BD=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D0=B8,=20UI=20=D0=B8=2016=20=D0=BD=D0=BE=D0=B2=D1=8B?= =?UTF-8?q?=D1=85=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - S4: bump jinja2>=3.1.4, python-multipart>=0.0.18, httpx>=0.28.0 - S5: _detect_ecosystem — DEFAULT_ECOSYSTEM для неизвестных форматов - S6: harvester — log.exception() вместо log.error() - S8: _scan_component — urlencode параметров - P1: scanner — proc.kill() при таймауте - P3: api_packages — selectinload(Scan.findings), убран N+1 - P4+P5: утечка _url_locks и _llm_locks при early return - P6: DB reaper — сброс {'status':'analyzing'} при старте - UI: htmx-пагинация, фильтры не теряют flagged, 404 с layout - UI: мобильные таблицы overflow-x, полная стата на дашборде - UI: i18n статусов в _status_badge, urlencode package_name - 16 новых тестов: analyze endpoint (6), scanner errors (4), webhook signature (2), llm client (4) --- .gitignore | 2 + README.en.md | 52 +++- README.md | 32 ++- guarddog_nexus/config.py | 8 +- guarddog_nexus/core/harvester.py | 4 +- guarddog_nexus/core/llm.py | 10 +- guarddog_nexus/core/nexus.py | 4 +- guarddog_nexus/core/scanner.py | 5 + guarddog_nexus/db/engine.py | 16 +- guarddog_nexus/db/queries.py | 23 +- guarddog_nexus/i18n.py | 11 +- guarddog_nexus/routes/api_findings.py | 2 - guarddog_nexus/routes/api_packages.py | 25 +- guarddog_nexus/routes/api_scans.py | 25 +- guarddog_nexus/routes/metrics.py | 16 +- guarddog_nexus/routes/web.py | 25 +- guarddog_nexus/routes/webhooks.py | 59 ++--- guarddog_nexus/web/static/style.css | 2 +- guarddog_nexus/web/templates/404.html | 6 + guarddog_nexus/web/templates/_pagination.html | 10 +- .../web/templates/_scans_table.html | 2 +- .../web/templates/_status_badge.html | 3 +- .../web/templates/dashboard_stats.html | 16 +- .../web/templates/packages_list.html | 3 +- guarddog_nexus/web/templates/scans_list.html | 5 +- pyproject.toml | 6 +- tests/test_api.py | 4 + tests/test_llm_analysis.py | 232 ++++++++++++++++++ tests/test_nexus.py | 33 +-- tests/test_scanner.py | 51 +++- tests/test_webhooks.py | 35 ++- 31 files changed, 575 insertions(+), 152 deletions(-) create mode 100644 guarddog_nexus/web/templates/404.html create mode 100644 tests/test_llm_analysis.py diff --git a/.gitignore b/.gitignore index 4a8c57a..a106a81 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ data/ .env .venv/ venv/ +.agents/ +skills-lock.json diff --git a/README.en.md b/README.en.md index c0b4ac8..4b7f447 100644 --- a/README.en.md +++ b/README.en.md @@ -63,6 +63,7 @@ After startup: | `TEMP_DIR` | `/tmp/guarddog-nexus` | Temporary download directory | | `MAX_CONCURRENT_SCANS` | `4` | Maximum simultaneous GuardDog processes | | `LLM_ENABLED` | `0` | Set to `1` to enable LLM analysis | +| `LLM_AUTO_ANALYZE` | `0` | Set to `1` to auto-analyze after scan; `0` = manual mode via UI button | | `LLM_API_KEY` | _(empty)_ | API key (OpenAI / Groq / Ollama / etc.) | | `LLM_API_BASE` | `https://api.openai.com/v1` | OpenAI-compatible base URL | | `LLM_MODEL` | `gpt-4o-mini` | Model name | @@ -128,15 +129,54 @@ GuardDog Nexus accepts `UPDATED` webhook events from Nexus. ## LLM Analysis -GuardDog Nexus can automatically analyze each finding through an LLM. When enabled (`LLM_ENABLED=1`), every flagged scan gets an AI breakdown: threat assessment, code analysis, and recommendations. +GuardDog Nexus can analyze findings through an LLM. When enabled (`LLM_ENABLED=1`), flagged findings receive an AI breakdown: threat assessment, code analysis, and recommendations. -**Auto mode:** after a flagged scan completes, each finding is sent to the LLM. Reports are saved to the database and included in JSON log output. +### Operating Modes -**Manual mode:** the web UI has an "Analyze with LLM" button next to each finding — click to get an inline verdict. +The `LLM_AUTO_ANALYZE` variable controls the analysis mode: -Supported providers: any OpenAI-compatible API (OpenAI, Groq, Ollama, vLLM, etc.). +- **`LLM_AUTO_ANALYZE=1` (automatic):** each finding is automatically sent to the LLM after a scan completes. Reports are saved to the database and included in JSON log output. No "Analyze" button is shown in the UI. +- **`LLM_AUTO_ANALYZE=0` (manual, default):** an "Analyze with LLM" button is shown next to each finding in the web UI. The user clicks to trigger analysis and see the inline verdict. -LLM response format (JSON): +### finding.report State Machine + +The `finding.report` field transitions through these states: + +| Value | UI | +|-------|----| +| `None` | "Analyze with LLM" button (manual mode only) | +| `{"status": "analyzing"}` | Spinner | +| `{verdict:, summary:, ...}` | Report + "Retry" link | + +### Supported Providers + +Any OpenAI-compatible API. Configuration examples: + +```bash +# OpenAI (manual mode) +LLM_ENABLED=1 +LLM_AUTO_ANALYZE=0 +LLM_API_KEY=sk-... +LLM_API_BASE=https://api.openai.com/v1 +LLM_MODEL=gpt-4o-mini + +# Groq with auto-analysis (faster, free tier) +LLM_ENABLED=1 +LLM_AUTO_ANALYZE=1 +LLM_API_KEY=gsk_... +LLM_API_BASE=https://api.groq.com/openai/v1 +LLM_MODEL=llama-3.3-70b-versatile + +# Local Ollama +LLM_ENABLED=1 +LLM_API_KEY=ollama +LLM_API_BASE=http://host.docker.internal:11434/v1 +LLM_MODEL=llama3.2 +``` + +### Response Format + +LLM returns JSON with fields: - `verdict` — `safe` / `suspicious` / `malicious` - `summary` — one-line verdict - `analysis` — detailed breakdown (2–3 paragraphs) @@ -156,7 +196,7 @@ guarddog-nexus/ │ ├── i18n.py # RU/EN translations │ ├── logging_setup.py # JSON structured logging │ └── main.py # FastAPI app entry point -├── tests/ # pytest tests (50+) +├── tests/ # pytest tests (85 tests) ├── scripts/ # Setup scripts ├── docker-compose.yml ├── Dockerfile diff --git a/README.md b/README.md index 395d8f4..528dd63 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ python -m guarddog_nexus.main | `MAX_CONCURRENT_SCANS` | `4` | Максимум одновременных сканирований GuardDog | | `LOG_SYSLOG_FACILITY` | `local0` | Syslog facility (local0–local7) | | `LLM_ENABLED` | `0` | `1` — включить LLM-анализ уязвимостей | +| `LLM_AUTO_ANALYZE` | `0` | `1` — автоанализ после скана; `0` = ручной режим через кнопку в UI | | `LLM_API_KEY` | _(пусто)_ | API-ключ (OpenAI / Groq / Ollama / etc.) | | `LLM_API_BASE` | `https://api.openai.com/v1` | Базовый URL OpenAI-совместимого API | | `LLM_MODEL` | `gpt-4o-mini` | Название модели | @@ -149,13 +150,14 @@ GuardDog Nexus принимает вебхуки от Nexus при событи | Метод | Путь | Описание | |-------|------|----------| | GET | `/api/v1/findings` | Список уязвимостей (фильтр по правилу, severity, scan_id) | -| POST | `/api/v1/findings/{id}/analyze` | Запустить LLM-анализ уязвимости | +| POST | `/api/v1/findings/{id}/analyze` | Запустить LLM-анализ уязвимости (возвращает HTMX-фрагмент при вызове из веб-интерфейса) | -### Здоровье +### Здоровье и метрики | Метод | Путь | Описание | |-------|------|----------| | GET | `/health` | Проверка работоспособности | +| GET | `/metrics` | Метрики в формате Prometheus | ## Веб-интерфейс @@ -227,26 +229,40 @@ guarddog-nexus/ ## LLM-анализ -GuardDog Nexus может автоматически анализировать каждую найденную уязвимость через LLM (языковую модель). При включении (`LLM_ENABLED=1`) каждый flagged скан получает AI-разбор: насколько угроза реальна, что делает подозрительный код, рекомендации. +GuardDog Nexus может анализировать найденные уязвимости через LLM (языковую модель). При включении (`LLM_ENABLED=1`) уязвимые находки получают AI-разбор: насколько угроза реальна, что делает подозрительный код, рекомендации. -### Как работает +### Режимы работы -1. **Автоматический режим:** после завершения скана с уязвимостями GuardDog Nexus отправляет каждую находку в LLM, сохраняет отчёт в БД и включает его в syslog-событие -2. **Ручной режим:** в веб-интерфейсе на странице сканирования у каждой уязвимости есть кнопка «Analyze with LLM» — нажатие отправляет запрос и показывает вердикт inline +Переменная `LLM_AUTO_ANALYZE` управляет режимом анализа: + +- **`LLM_AUTO_ANALYZE=1` (автоматический):** после завершения скана каждая находка автоматически отправляется в LLM. Отчёт сохраняется в БД и включается в syslog-событие. Кнопка анализа в UI не отображается. +- **`LLM_AUTO_ANALYZE=0` (ручной, по умолчанию):** в веб-интерфейсе рядом с каждой уязвимостью отображается кнопка «Analyze with LLM». Пользователь нажимает кнопку — запускается анализ, результат показывается inline. + +### Состояния finding.report + +Поле `finding.report` проходит через конечный автомат: + +| Значение | UI | +|----------|----| +| `None` | Кнопка «Analyze with LLM» (только в ручном режиме) | +| `{"status": "analyzing"}` | Спиннер | +| `{verdict:, summary:, ...}` | Отчёт + ссылка «Retry» | ### Поддерживаемые провайдеры Любой OpenAI-совместимый API. Примеры конфигурации: ```bash -# OpenAI +# OpenAI (ручной режим) LLM_ENABLED=1 +LLM_AUTO_ANALYZE=0 LLM_API_KEY=sk-... LLM_API_BASE=https://api.openai.com/v1 LLM_MODEL=gpt-4o-mini -# Groq (быстрее, бесплатный тир) +# Groq с автоанализом (быстрее, бесплатный тир) LLM_ENABLED=1 +LLM_AUTO_ANALYZE=1 LLM_API_KEY=gsk_... LLM_API_BASE=https://api.groq.com/openai/v1 LLM_MODEL=llama-3.3-70b-versatile diff --git a/guarddog_nexus/config.py b/guarddog_nexus/config.py index 9afc806..e80150f 100644 --- a/guarddog_nexus/config.py +++ b/guarddog_nexus/config.py @@ -30,9 +30,7 @@ class Config: nexus_url: str = os.getenv("NEXUS_URL", "http://localhost:8081") nexus_username: str = os.getenv("NEXUS_USERNAME", "admin") nexus_password: str = os.getenv("NEXUS_PASSWORD", "admin123") - nexus_download_timeout: int = _env_int( - "NEXUS_DOWNLOAD_TIMEOUT_SECONDS", HTTP_TIMEOUT_DOWNLOAD - ) + nexus_download_timeout: int = _env_int("NEXUS_DOWNLOAD_TIMEOUT_SECONDS", HTTP_TIMEOUT_DOWNLOAD) nexus_api_timeout: int = _env_int("NEXUS_API_TIMEOUT_SECONDS", HTTP_TIMEOUT_API) # Database @@ -55,9 +53,7 @@ class Config: scan_timeout_seconds: int = _env_int("SCAN_TIMEOUT_SECONDS", 300) temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus") guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK) - max_concurrent_scans: int = _env_int( - "MAX_CONCURRENT_SCANS", DEFAULT_MAX_CONCURRENT_SCANS - ) + max_concurrent_scans: int = _env_int("MAX_CONCURRENT_SCANS", DEFAULT_MAX_CONCURRENT_SCANS) # LLM analysis llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes") diff --git a/guarddog_nexus/core/harvester.py b/guarddog_nexus/core/harvester.py index 7c27338..ee303ef 100644 --- a/guarddog_nexus/core/harvester.py +++ b/guarddog_nexus/core/harvester.py @@ -60,6 +60,8 @@ async def harvest( lock = _url_locks[download_url] if lock.locked(): log.info("URL already being processed, skipping: %s", download_url) + async with _url_lock: + _url_locks.pop(download_url, None) return None async with lock: @@ -191,7 +193,7 @@ async def harvest( return scan except Exception as e: - log.error("Scan failed for %s==%s: %s", package_name, package_version, e) + log.exception("Scan failed for %s==%s", package_name, package_version) scan.status = ScanStatus.FAILED.value scan.error_message = str(e)[:ERROR_MESSAGE_MAX_LENGTH] scan.finished_at = datetime.datetime.now(datetime.timezone.utc) diff --git a/guarddog_nexus/core/llm.py b/guarddog_nexus/core/llm.py index 74af3cd..cc90c47 100644 --- a/guarddog_nexus/core/llm.py +++ b/guarddog_nexus/core/llm.py @@ -23,11 +23,7 @@ def _build_user_message(finding: dict) -> str: location = finding.get("location", "") code = finding.get("code", "") - prompt = ( - f"Rule: {rule}\n" - f"Severity: {severity}\n" - f"Message: {message}\n" - ) + prompt = f"Rule: {rule}\nSeverity: {severity}\nMessage: {message}\n" if location: prompt += f"Location: {location}\n" if code: @@ -66,9 +62,7 @@ async def analyze_finding(finding_data: dict) -> dict | None: try: async with _llm_semaphore: - async with httpx.AsyncClient( - timeout=config.llm_timeout, headers=headers - ) as client: + async with httpx.AsyncClient(timeout=config.llm_timeout, headers=headers) as client: resp = await client.post(url, json=payload) resp.raise_for_status() body = resp.json() diff --git a/guarddog_nexus/core/nexus.py b/guarddog_nexus/core/nexus.py index ec8dc9e..56ad992 100644 --- a/guarddog_nexus/core/nexus.py +++ b/guarddog_nexus/core/nexus.py @@ -116,9 +116,7 @@ def _write_file(path: str, content: bytes) -> None: async def nexus_get(path: str) -> httpx.Response: """Make an authenticated GET request to Nexus REST API.""" auth = httpx.BasicAuth(config.nexus_username, config.nexus_password) - async with httpx.AsyncClient( - auth=auth, timeout=config.nexus_api_timeout - ) as client: + async with httpx.AsyncClient(auth=auth, timeout=config.nexus_api_timeout) as client: return await client.get(f"{config.nexus_url.rstrip('/')}{path}") diff --git a/guarddog_nexus/core/scanner.py b/guarddog_nexus/core/scanner.py index 24639f6..d85f159 100644 --- a/guarddog_nexus/core/scanner.py +++ b/guarddog_nexus/core/scanner.py @@ -34,6 +34,11 @@ async def scan_package(filepath: str, ecosystem: str = DEFAULT_ECOSYSTEM) -> dic ) except asyncio.TimeoutError: log.error("GuardDog scan timed out for %s", filepath) + try: + proc.kill() + await proc.wait() + except (ProcessLookupError, Exception): + pass return {"findings": [], "errors": [SCAN_ERROR_TIMEOUT]} except FileNotFoundError: log.error("GuardDog binary not found at %s", guarddog_bin) diff --git a/guarddog_nexus/db/engine.py b/guarddog_nexus/db/engine.py index 54b6d43..9f3392c 100644 --- a/guarddog_nexus/db/engine.py +++ b/guarddog_nexus/db/engine.py @@ -1,6 +1,5 @@ """Async SQLite database setup via SQLAlchemy.""" - from sqlalchemy import inspect, text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.orm import DeclarativeBase @@ -69,6 +68,7 @@ async def init_db(): await conn.run_sync(Base.metadata.create_all) await _migrate() await _ensure_indexes() + await _reap_stale_analysis() async def get_session() -> AsyncSession: @@ -90,3 +90,17 @@ async def _ensure_indexes(): async with _engine.begin() as conn: for sql in indexes: await conn.execute(text(sql)) + + +async def _reap_stale_analysis(): + """Reset stuck 'analyzing' statuses left from crashes.""" + sql = ( + "UPDATE findings SET report = NULL " + "WHERE report IS NOT NULL " + "AND json_extract(report, '$.status') = 'analyzing'" + ) + async with _engine.begin() as conn: + result = await conn.execute(text(sql)) + count = result.rowcount + if count: + log.warning("Reset %d stale LLM analysis statuses", count) diff --git a/guarddog_nexus/db/queries.py b/guarddog_nexus/db/queries.py index a83af8d..ef277d0 100644 --- a/guarddog_nexus/db/queries.py +++ b/guarddog_nexus/db/queries.py @@ -23,6 +23,7 @@ from guarddog_nexus.db.models import Finding, Scan # Scan list query builder # --------------------------------------------------------------------------- + def build_scan_list_query( flagged: bool | None = None, status: str | None = None, @@ -51,9 +52,7 @@ def build_scan_list_query( count_q = count_q.where(Scan.repository == repository) if search: pattern = f"%{search}%" - condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike( - pattern - ) + condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern) q = q.where(condition) count_q = count_q.where(condition) @@ -70,6 +69,7 @@ def build_scan_list_query( # Package list query builder # --------------------------------------------------------------------------- + def build_package_list_query( flagged: bool | None = None, ecosystem: str | None = None, @@ -101,9 +101,7 @@ def build_package_list_query( subq = subq.where(Scan.repository == repository) if search: pattern = f"%{search}%" - subq = subq.where( - Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern) - ) + subq = subq.where(Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)) if flagged is not None: subq = subq.having(func.max(Scan.flagged) == flagged) @@ -112,9 +110,7 @@ def build_package_list_query( sort_field_name = PACKAGE_SORT_FIELDS.get(sort_by, "started_at") sort_col_from = getattr(Scan, sort_field_name, Scan.started_at) sort_col = func.max(sort_col_from) - subq = subq.order_by( - sort_col.desc() if sort_dir == "desc" else sort_col.asc() - ) + subq = subq.order_by(sort_col.desc() if sort_dir == "desc" else sort_col.asc()) sq = subq.subquery() total_q = select(func.count()).select_from(sq) @@ -126,12 +122,11 @@ def build_package_list_query( # Dashboard stats (shared between API /stats and web dashboard) # --------------------------------------------------------------------------- + async def get_dashboard_stats(session: AsyncSession) -> dict: """Return all dashboard statistics as a single dict.""" total_scans = await session.scalar(select(func.count(Scan.id))) - flagged_scans = await session.scalar( - select(func.count(Scan.id)).where(Scan.flagged == True) - ) + flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True)) recent_flagged = await session.scalar( select(func.count(Scan.id)).where( Scan.flagged == True, @@ -165,9 +160,7 @@ async def get_dashboard_stats(session: AsyncSession) -> dict: latest_scans = ( ( await session.execute( - select(Scan) - .order_by(Scan.started_at.desc()) - .limit(DASHBOARD_LATEST_SCANS_LIMIT) + select(Scan).order_by(Scan.started_at.desc()).limit(DASHBOARD_LATEST_SCANS_LIMIT) ) ) .scalars() diff --git a/guarddog_nexus/i18n.py b/guarddog_nexus/i18n.py index 98a1306..0d25845 100644 --- a/guarddog_nexus/i18n.py +++ b/guarddog_nexus/i18n.py @@ -76,14 +76,21 @@ _STRINGS = { "llm_not_found": {"en": "Finding not found", "ru": "Находка не найдена"}, "llm_disclaimer": { "en": "⚠ AI-generated analysis — may contain inaccuracies. " - "Always verify findings before taking action.", + "Always verify findings before taking action.", "ru": "⚠ Анализ сгенерирован AI — может содержать неточности. " - "Всегда проверяйте находки перед принятием мер.", + "Всегда проверяйте находки перед принятием мер.", }, "llm_analyzing": {"en": "Analyzing...", "ru": "Анализирую..."}, "llm_retry": {"en": "Retry", "ru": "Повторить"}, "llm_analyzed": {"en": "LLM analyzed", "ru": "LLM проанализ."}, "llm_pending": {"en": "Pending", "ru": "Ожидают"}, + "total_scans_label": {"en": "Scans", "ru": "Сканов"}, + "flagged_scans_label": {"en": "Flagged", "ru": "Помечено"}, + "heading_top_rules": {"en": "Top Finding Rules", "ru": "Топ правил"}, + "status_scanning": {"en": "scanning", "ru": "сканирование"}, + "status_pending": {"en": "pending", "ru": "ожидание"}, + "status_completed": {"en": "completed", "ru": "завершено"}, + "status_failed": {"en": "failed", "ru": "ошибка"}, "not_found": {"en": "Not found", "ru": "Не найдено"}, "breadcrumb_home": {"en": "Home", "ru": "Главная"}, "breadcrumb_dashboard": {"en": "Dashboard", "ru": "Панель"}, diff --git a/guarddog_nexus/routes/api_findings.py b/guarddog_nexus/routes/api_findings.py index d3cf7ce..2f7b140 100644 --- a/guarddog_nexus/routes/api_findings.py +++ b/guarddog_nexus/routes/api_findings.py @@ -52,5 +52,3 @@ async def list_findings( for f in findings ], } - - diff --git a/guarddog_nexus/routes/api_packages.py b/guarddog_nexus/routes/api_packages.py index ec9d4d0..9f0efd8 100644 --- a/guarddog_nexus/routes/api_packages.py +++ b/guarddog_nexus/routes/api_packages.py @@ -7,6 +7,7 @@ from urllib.parse import unquote from fastapi import APIRouter, Depends, Query, Response from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload from ..constants import ( CSV_MEDIA_TYPE, @@ -17,7 +18,7 @@ from ..constants import ( MAX_PAGE_SIZE, ) from ..db.engine import get_session -from ..db.models import Finding, Scan +from ..db.models import Scan from ..db.queries import build_package_list_query router = APIRouter(prefix="/api/v1/packages", tags=["packages"]) @@ -88,14 +89,22 @@ async def export_packages_csv( writer = csv.writer(output) writer.writerow( [ - "name", "version", "ecosystem", "repository", - "last_scanned_at", "flagged", "total_findings", + "name", + "version", + "ecosystem", + "repository", + "last_scanned_at", + "flagged", + "total_findings", ] ) for r in rows: writer.writerow( [ - r.pkg_name, r.pkg_ver, r.ecosystem, r.repository, + r.pkg_name, + r.pkg_ver, + r.ecosystem, + r.repository, r.last_scan.isoformat() if r.last_scan else "", bool(r.is_flagged), r.findings_sum, @@ -123,6 +132,7 @@ async def get_package( await session.execute( select(Scan) .where(Scan.package_name == pkg_name, Scan.package_version == pkg_version) + .options(selectinload(Scan.findings)) .order_by(Scan.started_at.desc()) ) ) @@ -135,12 +145,7 @@ async def get_package( all_findings: list[dict] = [] for s in scans: - findings = ( - (await session.execute(select(Finding).where(Finding.scan_id == s.id))) - .scalars() - .all() - ) - for f in findings: + for f in s.findings: all_findings.append({"id": f.id, **f.data, "report": f.report}) return { diff --git a/guarddog_nexus/routes/api_scans.py b/guarddog_nexus/routes/api_scans.py index 8935c24..3d307d1 100644 --- a/guarddog_nexus/routes/api_scans.py +++ b/guarddog_nexus/routes/api_scans.py @@ -93,16 +93,31 @@ async def export_scans_csv( writer = csv.writer(output) writer.writerow( [ - "id", "package_name", "package_version", "ecosystem", "repository", - "status", "total_findings", "flagged", "started_at", "finished_at", - "error_message", "sha256", + "id", + "package_name", + "package_version", + "ecosystem", + "repository", + "status", + "total_findings", + "flagged", + "started_at", + "finished_at", + "error_message", + "sha256", ] ) for s in scans: writer.writerow( [ - s.id, s.package_name, s.package_version, s.ecosystem, s.repository, - s.status, s.total_findings, s.flagged, + s.id, + s.package_name, + s.package_version, + s.ecosystem, + s.repository, + s.status, + s.total_findings, + s.flagged, s.started_at.isoformat() if s.started_at else "", s.finished_at.isoformat() if s.finished_at else "", s.error_message or "", diff --git a/guarddog_nexus/routes/metrics.py b/guarddog_nexus/routes/metrics.py index 7747b64..20d24e1 100644 --- a/guarddog_nexus/routes/metrics.py +++ b/guarddog_nexus/routes/metrics.py @@ -15,31 +15,23 @@ router = APIRouter(tags=["metrics"]) @router.get("/metrics") async def metrics(session: AsyncSession = Depends(get_session)): total = await session.scalar(select(func.count(Scan.id))) or 0 - flagged = await session.scalar( - select(func.count(Scan.id)).where(Scan.flagged == True) - ) or 0 + flagged = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True)) or 0 findings_total = await session.scalar(select(func.count(Finding.id))) or 0 # By status status_rows = ( - await session.execute( - select(Scan.status, func.count(Scan.id)).group_by(Scan.status) - ) + await session.execute(select(Scan.status, func.count(Scan.id)).group_by(Scan.status)) ).all() by_status = {row[0]: row[1] for row in status_rows} # By ecosystem eco_rows = ( - await session.execute( - select(Scan.ecosystem, func.count(Scan.id)).group_by(Scan.ecosystem) - ) + await session.execute(select(Scan.ecosystem, func.count(Scan.id)).group_by(Scan.ecosystem)) ).all() by_eco = {row[0]: row[1] for row in eco_rows} # Latest scan timestamp - latest = await session.scalar( - select(func.max(Scan.started_at)) - ) + latest = await session.scalar(select(func.max(Scan.started_at))) lines = [ "# HELP guarddog_scans_total Total number of package scans.", diff --git a/guarddog_nexus/routes/web.py b/guarddog_nexus/routes/web.py index a9c39ec..5002c5f 100644 --- a/guarddog_nexus/routes/web.py +++ b/guarddog_nexus/routes/web.py @@ -41,7 +41,8 @@ _jinja_env.globals["config"] = config def _render(name: str, **context) -> HTMLResponse: template = _jinja_env.get_template(name) - return HTMLResponse(template.render(**context)) + status_code = context.pop("_status_code", 200) + return HTMLResponse(template.render(**context), status_code=status_code) @router.get("/", response_class=HTMLResponse) @@ -104,18 +105,14 @@ async def scans_list( @router.get("/scans/{scan_id}", response_class=HTMLResponse) -async def scan_detail( - scan_id: int, request: Request, session: AsyncSession = Depends(get_session) -): +async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)): from sqlalchemy.orm import selectinload scan = await session.scalar( - select(Scan) - .where(Scan.id == scan_id) - .options(selectinload(Scan.findings)) + select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings)) ) if not scan: - return HTMLResponse(f"

{_t('not_found', request.state.lang)}

", status_code=404) + return _render("404.html", request=request, _status_code=404) return _render("scan_detail.html", scan=scan, request=request) @@ -192,7 +189,7 @@ async def package_detail( ) if not scans: - return HTMLResponse(f"

{_t('not_found', request.state.lang)}

", status_code=404) + return _render("404.html", request=request, _status_code=404) all_findings = [] for s in scans: @@ -223,9 +220,7 @@ async def analyze_finding_htmx( if not config.llm_enabled: msg = _t("llm_disabled", lang) - return HTMLResponse( - f'
{msg}
' - ) + return HTMLResponse(f'
{msg}
') finding = await session.scalar(select(Finding).where(Finding.id == finding_id)) if not finding: @@ -252,6 +247,8 @@ async def analyze_finding_htmx( lock = _llm_locks[finding_id] if lock.locked(): + async with _llm_lock: + _llm_locks.pop(finding_id, None) return _render("_llm_spinner.html", request=request) async with lock: @@ -267,9 +264,7 @@ async def analyze_finding_htmx( finding.report = None await session.commit() msg = _t("llm_failed", lang) - return HTMLResponse( - f'
{msg}
' - ) + return HTMLResponse(f'
{msg}
') finding.report = report await session.commit() diff --git a/guarddog_nexus/routes/webhooks.py b/guarddog_nexus/routes/webhooks.py index d3664f3..be28665 100644 --- a/guarddog_nexus/routes/webhooks.py +++ b/guarddog_nexus/routes/webhooks.py @@ -4,6 +4,7 @@ import hashlib import hmac import json import re +from urllib.parse import urlencode from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status @@ -58,7 +59,7 @@ def _detect_ecosystem(source: dict) -> str: return "go" if fmt in ("npm", "node"): return "npm" - return fmt or DEFAULT_ECOSYSTEM + return DEFAULT_ECOSYSTEM @router.post("/nexus") @@ -75,22 +76,16 @@ async def nexus_webhook( raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature" ) - expected = hmac.new( - config.webhook_secret.encode(), payload, hashlib.sha256 - ).hexdigest() + expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest() if not hmac.compare_digest(x_nexus_webhook_signature, expected): log.warning("Webhook rejected: invalid signature") - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature" - ) + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature") try: data = json.loads(payload.decode("utf-8")) except (json.JSONDecodeError, UnicodeDecodeError): log.warning("Webhook received invalid body") - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body" - ) + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request body") action = data.get("action", "").upper() if action not in RELEVANT_WEBHOOK_ACTIONS: @@ -108,8 +103,7 @@ async def nexus_webhook( initiator = raw_initiator source_ip = request.client.host if request.client else None - log.info("Webhook: action=%s initiator=%s source_ip=%s", - action, initiator, source_ip) + log.info("Webhook: action=%s initiator=%s source_ip=%s", action, initiator, source_ip) repository = data.get("repositoryName", "") if not repository: @@ -125,16 +119,19 @@ async def nexus_webhook( if not asset_path or not _is_package_asset(asset_path): return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE} - download_url = asset.get("downloadUrl") or _build_download_url( - repository, asset_path - ) + download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path) ecosystem = _detect_ecosystem(asset) log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository) background_tasks.add_task( - _scan_in_background, download_url, repository, ecosystem, asset_path, - initiator=initiator, source_ip=source_ip, + _scan_in_background, + download_url, + repository, + ecosystem, + asset_path, + initiator=initiator, + source_ip=source_ip, ) return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} @@ -164,10 +161,15 @@ async def nexus_webhook( async def _scan_component(repository: str, name: str, version: str, ecosystem: str): from ..core.nexus import nexus_get - api_path = ( - f"/service/rest/v1/search" - f"?repository={repository}&name={name}&version={version}&format={ecosystem}" + params = urlencode( + { + "repository": repository, + "name": name, + "version": version, + "format": ecosystem, + } ) + api_path = f"/service/rest/v1/search?{params}" try: resp = await nexus_get(api_path) resp.raise_for_status() @@ -186,14 +188,10 @@ async def _scan_component(repository: str, name: str, version: str, ecosystem: s asset_path = _extract_asset_path(asset) if not asset_path or not _is_package_asset(asset_path): continue - download_url = asset.get("downloadUrl") or _build_download_url( - repository, asset_path - ) + download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path) log.info("Scanning component asset: %s", asset_path) async for session in get_session(): - await harvest( - download_url, repository, ecosystem, asset_path, session - ) + await harvest(download_url, repository, ecosystem, asset_path, session) break @@ -208,8 +206,13 @@ async def _scan_in_background( try: async for session in get_session(): await harvest( - download_url, repository, format_, asset_path, session, - initiator=initiator, source_ip=source_ip, + download_url, + repository, + format_, + asset_path, + session, + initiator=initiator, + source_ip=source_ip, ) break except Exception as e: diff --git a/guarddog_nexus/web/static/style.css b/guarddog_nexus/web/static/style.css index c1c3039..df4e25c 100644 --- a/guarddog_nexus/web/static/style.css +++ b/guarddog_nexus/web/static/style.css @@ -35,7 +35,7 @@ /* ------------------------------------------------------------------ */ /* Tables */ /* ------------------------------------------------------------------ */ -table { font-size: 0.9rem; } +table { font-size: 0.9rem; display: block; overflow-x: auto; } table.compact { font-size: 0.82rem; } table.compact th, table.compact td { padding: 0.35rem 0.5rem; } diff --git a/guarddog_nexus/web/templates/404.html b/guarddog_nexus/web/templates/404.html new file mode 100644 index 0000000..4d715d5 --- /dev/null +++ b/guarddog_nexus/web/templates/404.html @@ -0,0 +1,6 @@ +{% extends "base.html" %} +{% block title %}{{ t('not_found', request.state.lang) }}{% endblock %} +{% block content %} +

{{ t('not_found', request.state.lang) }}

+

{{ t('nav_dashboard', request.state.lang) }}

+{% endblock %} diff --git a/guarddog_nexus/web/templates/_pagination.html b/guarddog_nexus/web/templates/_pagination.html index ac611cd..1574fd2 100644 --- a/guarddog_nexus/web/templates/_pagination.html +++ b/guarddog_nexus/web/templates/_pagination.html @@ -2,9 +2,15 @@ {% if total_pages > 1 %} {% endif %} diff --git a/guarddog_nexus/web/templates/_scans_table.html b/guarddog_nexus/web/templates/_scans_table.html index b9c3411..99f80f0 100644 --- a/guarddog_nexus/web/templates/_scans_table.html +++ b/guarddog_nexus/web/templates/_scans_table.html @@ -24,7 +24,7 @@ {% for s in scans %} #{{ s.id }} - {{ s.package_name }} + {{ s.package_name }} {{ s.package_version }} {{ s.repository }} diff --git a/guarddog_nexus/web/templates/_status_badge.html b/guarddog_nexus/web/templates/_status_badge.html index ee8a38e..1756905 100644 --- a/guarddog_nexus/web/templates/_status_badge.html +++ b/guarddog_nexus/web/templates/_status_badge.html @@ -1 +1,2 @@ -{% if status == 'scanning' %}scanning{% else %}{{ status }}{% endif %} +{% set label = t('status_' + status, request.state.lang) %} +{% if status == 'scanning' %}{{ label }}{% else %}{{ label }}{% endif %} diff --git a/guarddog_nexus/web/templates/dashboard_stats.html b/guarddog_nexus/web/templates/dashboard_stats.html index a6565b3..41759fe 100644 --- a/guarddog_nexus/web/templates/dashboard_stats.html +++ b/guarddog_nexus/web/templates/dashboard_stats.html @@ -1,10 +1,24 @@ {% if total_findings %} -
+
+ {{ t('total_scans_label', request.state.lang) }}: {{ total_scans }} + {{ t('flagged_scans_label', request.state.lang) }}: {{ flagged_scans }} {{ t('col_findings', request.state.lang) }}: {{ total_findings }} {{ t('llm_analyzed', request.state.lang) }}: {{ llm_analyzed }} {{ t('llm_pending', request.state.lang) }}: {{ llm_pending }}
{% endif %} +{% if top_rules %} +
+

{{ t('heading_top_rules', request.state.lang) }}

+ + + {% for r in top_rules %} + + {% endfor %} + +
{{ r.rule }}{{ r.count }}
+
+{% endif %} {% if latest_flagged %}

{{ t('heading_latest_flagged', request.state.lang) }}

diff --git a/guarddog_nexus/web/templates/packages_list.html b/guarddog_nexus/web/templates/packages_list.html index e01cf34..4635642 100644 --- a/guarddog_nexus/web/templates/packages_list.html +++ b/guarddog_nexus/web/templates/packages_list.html @@ -11,7 +11,8 @@

{{ t('heading_packages', request.state.lang) }}

- + + {% if flagged_filter == '1' %}{{ t('btn_show_all', request.state.lang) }}{% else %}{{ t('btn_flagged_only', request.state.lang) }}{% endif %} diff --git a/guarddog_nexus/web/templates/scans_list.html b/guarddog_nexus/web/templates/scans_list.html index f1bb38d..3d21c72 100644 --- a/guarddog_nexus/web/templates/scans_list.html +++ b/guarddog_nexus/web/templates/scans_list.html @@ -11,8 +11,9 @@

{{ t('heading_scans', request.state.lang) }}

- - + +