refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
@@ -4,6 +4,14 @@ from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
JSON_PATH_RULE,
|
||||
JSON_PATH_SEVERITY,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding
|
||||
|
||||
@@ -12,8 +20,8 @@ router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
|
||||
|
||||
@router.get("")
|
||||
async def list_findings(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
rule: str | None = Query(None),
|
||||
severity: str | None = Query(None),
|
||||
scan_id: int | None = Query(None),
|
||||
@@ -21,9 +29,9 @@ async def list_findings(
|
||||
):
|
||||
q = select(Finding)
|
||||
if rule:
|
||||
q = q.where(func.json_extract(Finding.data, "$.rule") == rule)
|
||||
q = q.where(func.json_extract(Finding.data, JSON_PATH_RULE) == rule)
|
||||
if severity:
|
||||
q = q.where(func.json_extract(Finding.data, "$.severity") == severity)
|
||||
q = q.where(func.json_extract(Finding.data, JSON_PATH_SEVERITY) == severity)
|
||||
if scan_id:
|
||||
q = q.where(Finding.scan_id == scan_id)
|
||||
|
||||
@@ -39,8 +47,40 @@ async def list_findings(
|
||||
"id": f.id,
|
||||
"scan_id": f.scan_id,
|
||||
**f.data,
|
||||
"report": f.report,
|
||||
"created_at": f.created_at.isoformat() if f.created_at else None,
|
||||
}
|
||||
for f in findings
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{finding_id}/analyze")
|
||||
async def analyze_finding_endpoint(
|
||||
finding_id: int,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""Manually trigger LLM analysis for a single finding."""
|
||||
if not config.llm_enabled:
|
||||
return {"detail": "LLM analysis is disabled"}
|
||||
|
||||
finding = await session.scalar(
|
||||
select(Finding).where(Finding.id == finding_id)
|
||||
)
|
||||
if not finding:
|
||||
return {"detail": "Not found"}
|
||||
|
||||
from guarddog_nexus.llm import analyze_finding
|
||||
|
||||
report = await analyze_finding(finding.data)
|
||||
if report is None:
|
||||
return {"detail": "LLM analysis failed"}
|
||||
|
||||
finding.report = report
|
||||
await session.commit()
|
||||
|
||||
return {
|
||||
"id": finding.id,
|
||||
**finding.data,
|
||||
"report": report,
|
||||
}
|
||||
|
||||
@@ -4,71 +4,48 @@ import csv
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Response
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
CSV_MEDIA_TYPE,
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
DEFAULT_SORT_BY_PACKAGES,
|
||||
DEFAULT_SORT_DIR,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
from guarddog_nexus.queries import build_package_list_query
|
||||
|
||||
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
|
||||
|
||||
VALID_SORT_FIELDS = {
|
||||
"name": Scan.package_name,
|
||||
"version": Scan.package_version,
|
||||
"last_scanned_at": Scan.started_at,
|
||||
"total_findings": Scan.total_findings,
|
||||
"flagged": Scan.flagged,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_packages(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
ecosystem: str | None = Query(None),
|
||||
flagged: bool | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
repository: str | None = Query(None),
|
||||
sort_by: str = Query("last_scanned_at"),
|
||||
sort_dir: str = Query("desc"),
|
||||
sort_by: str = Query(DEFAULT_SORT_BY_PACKAGES),
|
||||
sort_dir: str = Query(DEFAULT_SORT_DIR),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
subq = select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scanned_at"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("total_findings"),
|
||||
func.max(Scan.id).label("latest_scan_id"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
if ecosystem:
|
||||
subq = subq.where(Scan.ecosystem == ecosystem)
|
||||
if flagged is not None:
|
||||
subq = subq.having(func.max(Scan.flagged) == flagged)
|
||||
if repository:
|
||||
subq = subq.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
|
||||
sort_col = func.max(sort_field)
|
||||
subq = subq.order_by(
|
||||
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
|
||||
rows_q, total_q = build_package_list_query(
|
||||
flagged=flagged,
|
||||
ecosystem=ecosystem,
|
||||
repository=repository,
|
||||
search=search,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
total_q = select(func.count()).select_from(subq.subquery())
|
||||
total = await session.scalar(total_q)
|
||||
|
||||
rows = (
|
||||
await session.execute(subq.offset(offset).limit(limit))
|
||||
).all()
|
||||
rows = (await session.execute(rows_q)).all()
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
@@ -76,14 +53,14 @@ async def list_packages(
|
||||
"offset": offset,
|
||||
"packages": [
|
||||
{
|
||||
"name": r.package_name,
|
||||
"version": r.package_version,
|
||||
"name": r.pkg_name,
|
||||
"version": r.pkg_ver,
|
||||
"ecosystem": r.ecosystem,
|
||||
"repository": r.repository,
|
||||
"last_scanned_at": r.last_scanned_at.isoformat() if r.last_scanned_at else None,
|
||||
"last_scanned_at": r.last_scan.isoformat() if r.last_scan else None,
|
||||
"flagged": bool(r.is_flagged),
|
||||
"total_findings": r.total_findings,
|
||||
"latest_scan_id": r.latest_scan_id,
|
||||
"total_findings": r.findings_sum,
|
||||
"latest_scan_id": r.sid,
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
@@ -96,44 +73,37 @@ async def export_packages_csv(
|
||||
search: str | None = Query(None),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
subq = select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scanned_at"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("total_findings"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
if flagged is not None:
|
||||
subq = subq.having(func.max(Scan.flagged) == flagged)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
subq = subq.order_by(func.max(Scan.started_at).desc())
|
||||
rows = (await session.execute(subq)).all()
|
||||
rows_q, _total_q = build_package_list_query(
|
||||
flagged=flagged,
|
||||
search=search,
|
||||
sort_by=DEFAULT_SORT_BY_PACKAGES,
|
||||
sort_dir=DEFAULT_SORT_DIR,
|
||||
limit=MAX_PAGE_SIZE,
|
||||
offset=0,
|
||||
)
|
||||
rows = (await session.execute(rows_q)).all()
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow([
|
||||
"name", "version", "ecosystem", "repository",
|
||||
"last_scanned_at", "flagged", "total_findings"
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
"name", "version", "ecosystem", "repository",
|
||||
"last_scanned_at", "flagged", "total_findings",
|
||||
]
|
||||
)
|
||||
for r in rows:
|
||||
writer.writerow([
|
||||
r.package_name, r.package_version, r.ecosystem, r.repository,
|
||||
r.last_scanned_at.isoformat() if r.last_scanned_at else "",
|
||||
bool(r.is_flagged),
|
||||
r.total_findings,
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
|
||||
r.last_scan.isoformat() if r.last_scan else "",
|
||||
bool(r.is_flagged),
|
||||
r.findings_sum,
|
||||
]
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=output.getvalue(),
|
||||
media_type="text/csv",
|
||||
media_type=CSV_MEDIA_TYPE,
|
||||
headers={"Content-Disposition": "attachment; filename=packages_export.csv"},
|
||||
)
|
||||
|
||||
@@ -162,10 +132,12 @@ async def get_package(
|
||||
all_findings: list[dict] = []
|
||||
for s in scans:
|
||||
findings = (
|
||||
(await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all()
|
||||
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
for f in findings:
|
||||
all_findings.append({"id": f.id, **f.data})
|
||||
all_findings.append({"id": f.id, **f.data, "report": f.report})
|
||||
|
||||
return {
|
||||
"name": scans[0].package_name,
|
||||
|
||||
@@ -4,64 +4,50 @@ import csv
|
||||
import io
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Response
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
CSV_MEDIA_TYPE,
|
||||
DEFAULT_OFFSET,
|
||||
DEFAULT_PAGE_SIZE,
|
||||
DEFAULT_SORT_BY_SCANS,
|
||||
DEFAULT_SORT_DIR,
|
||||
MAX_PAGE_SIZE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
from guarddog_nexus.models import Scan
|
||||
from guarddog_nexus.queries import build_scan_list_query, get_dashboard_stats
|
||||
|
||||
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
|
||||
|
||||
VALID_SORT_FIELDS = {
|
||||
"id": Scan.id,
|
||||
"package_name": Scan.package_name,
|
||||
"started_at": Scan.started_at,
|
||||
"status": Scan.status,
|
||||
"total_findings": Scan.total_findings,
|
||||
"flagged": Scan.flagged,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_scans(
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
|
||||
offset: int = Query(DEFAULT_OFFSET, ge=0),
|
||||
flagged: bool | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
status: str | None = Query(None),
|
||||
repository: str | None = Query(None),
|
||||
sort_by: str = Query("started_at"),
|
||||
sort_dir: str = Query("desc"),
|
||||
sort_by: str = Query(DEFAULT_SORT_BY_SCANS),
|
||||
sort_dir: str = Query(DEFAULT_SORT_DIR),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
q = select(Scan)
|
||||
count_q = select(func.count(Scan.id))
|
||||
|
||||
if flagged is not None:
|
||||
q = q.where(Scan.flagged == flagged)
|
||||
count_q = count_q.where(Scan.flagged == flagged)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
count_q = count_q.where(Scan.status == status)
|
||||
if repository:
|
||||
q = q.where(Scan.repository == repository)
|
||||
count_q = count_q.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
q = q.where(condition)
|
||||
count_q = count_q.where(condition)
|
||||
|
||||
sort_field = VALID_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
sort_dir = "asc" if sort_dir.lower() == "asc" else "desc"
|
||||
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
|
||||
|
||||
q = q.offset(offset).limit(limit)
|
||||
|
||||
q, count_q = build_scan_list_query(
|
||||
flagged=flagged,
|
||||
status=status,
|
||||
repository=repository,
|
||||
search=search,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
total = await session.scalar(count_q)
|
||||
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
return {
|
||||
"total": total,
|
||||
"limit": limit,
|
||||
@@ -92,78 +78,57 @@ async def export_scans_csv(
|
||||
status: str | None = Query(None),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
q = select(Scan)
|
||||
|
||||
if flagged is not None:
|
||||
q = q.where(Scan.flagged == flagged)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
q = q.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
q = q.order_by(Scan.started_at.desc())
|
||||
q, _count_q = build_scan_list_query(
|
||||
flagged=flagged,
|
||||
status=status,
|
||||
search=search,
|
||||
sort_by=DEFAULT_SORT_BY_SCANS,
|
||||
sort_dir=DEFAULT_SORT_DIR,
|
||||
limit=MAX_PAGE_SIZE,
|
||||
offset=0,
|
||||
)
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerow([
|
||||
"id", "package_name", "package_version", "ecosystem", "repository",
|
||||
"status", "total_findings", "flagged", "started_at", "finished_at",
|
||||
"error_message", "sha256"
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
"id", "package_name", "package_version", "ecosystem", "repository",
|
||||
"status", "total_findings", "flagged", "started_at", "finished_at",
|
||||
"error_message", "sha256",
|
||||
]
|
||||
)
|
||||
for s in scans:
|
||||
writer.writerow([
|
||||
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
|
||||
s.status, s.total_findings, s.flagged,
|
||||
s.started_at.isoformat() if s.started_at else "",
|
||||
s.finished_at.isoformat() if s.finished_at else "",
|
||||
s.error_message or "",
|
||||
s.sha256 or "",
|
||||
])
|
||||
writer.writerow(
|
||||
[
|
||||
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
|
||||
s.status, s.total_findings, s.flagged,
|
||||
s.started_at.isoformat() if s.started_at else "",
|
||||
s.finished_at.isoformat() if s.finished_at else "",
|
||||
s.error_message or "",
|
||||
s.sha256 or "",
|
||||
]
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=output.getvalue(),
|
||||
media_type="text/csv",
|
||||
media_type=CSV_MEDIA_TYPE,
|
||||
headers={"Content-Disposition": "attachment; filename=scans_export.csv"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def scan_stats(session: AsyncSession = Depends(get_session)):
|
||||
total_scans = await session.scalar(select(func.count(Scan.id)))
|
||||
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
|
||||
recent_flagged = await session.scalar(
|
||||
select(func.count(Scan.id)).where(
|
||||
Scan.flagged == True,
|
||||
Scan.started_at >= func.datetime("now", "-7 days"),
|
||||
)
|
||||
)
|
||||
total_findings = await session.scalar(select(func.count(Finding.id)))
|
||||
|
||||
top_rules = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.json_extract(Finding.data, "$.rule").label("rule"),
|
||||
func.count(Finding.id).label("cnt"),
|
||||
)
|
||||
.group_by(text("rule"))
|
||||
.order_by(text("cnt DESC"))
|
||||
.limit(10)
|
||||
)
|
||||
).all()
|
||||
|
||||
latest_scan = await session.scalar(select(Scan).order_by(Scan.started_at.desc()).limit(1))
|
||||
|
||||
dashboard = await get_dashboard_stats(session)
|
||||
return {
|
||||
"total_scans": total_scans,
|
||||
"flagged_scans": flagged_scans,
|
||||
"recent_flagged": recent_flagged,
|
||||
"total_findings": total_findings,
|
||||
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
|
||||
"latest_scan_at": latest_scan.started_at.isoformat() if latest_scan else None,
|
||||
"total_scans": dashboard["total_scans"],
|
||||
"flagged_scans": dashboard["flagged_scans"],
|
||||
"recent_flagged": dashboard["recent_flagged"],
|
||||
"total_findings": dashboard["total_findings"],
|
||||
"top_rules": dashboard["top_rules"],
|
||||
"latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat()
|
||||
if dashboard["latest_flagged"]
|
||||
else None,
|
||||
}
|
||||
|
||||
|
||||
@@ -188,5 +153,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
|
||||
"started_at": scan.started_at.isoformat() if scan.started_at else None,
|
||||
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
|
||||
"error_message": scan.error_message,
|
||||
"findings": [{"id": f.id, **f.data} for f in scan.findings],
|
||||
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
|
||||
}
|
||||
|
||||
@@ -3,26 +3,56 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
GUARDDOG_BINARY_FALLBACK,
|
||||
HTTP_TIMEOUT_API,
|
||||
HTTP_TIMEOUT_DOWNLOAD,
|
||||
LLM_DEFAULT_API_BASE,
|
||||
LLM_DEFAULT_MODEL,
|
||||
LLM_DEFAULT_TIMEOUT,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
# Nexus connection
|
||||
nexus_url: str = os.getenv("NEXUS_URL", "http://localhost:8081")
|
||||
nexus_username: str = os.getenv("NEXUS_USERNAME", "admin")
|
||||
nexus_password: str = os.getenv("NEXUS_PASSWORD", "admin123")
|
||||
nexus_download_timeout: int = int(
|
||||
os.getenv("NEXUS_DOWNLOAD_TIMEOUT_SECONDS", str(HTTP_TIMEOUT_DOWNLOAD))
|
||||
)
|
||||
nexus_api_timeout: int = int(
|
||||
os.getenv("NEXUS_API_TIMEOUT_SECONDS", str(HTTP_TIMEOUT_API))
|
||||
)
|
||||
|
||||
# Database
|
||||
database_path: str = os.getenv("DATABASE_PATH", "data/guarddog.db")
|
||||
|
||||
# Server
|
||||
host: str = os.getenv("HOST", "0.0.0.0")
|
||||
port: int = int(os.getenv("PORT", "8080"))
|
||||
|
||||
# Logging
|
||||
log_level: str = os.getenv("LOG_LEVEL", "INFO")
|
||||
log_syslog_host: str = os.getenv("LOG_SYSLOG_HOST", "")
|
||||
log_syslog_port: int = int(os.getenv("LOG_SYSLOG_PORT", "514"))
|
||||
log_syslog_facility: str = os.getenv("LOG_SYSLOG_FACILITY", "")
|
||||
|
||||
# Webhooks
|
||||
webhook_secret: str = os.getenv("WEBHOOK_SECRET", "")
|
||||
|
||||
# Scanner
|
||||
scan_timeout_seconds: int = int(os.getenv("SCAN_TIMEOUT_SECONDS", "300"))
|
||||
temp_dir: str = os.getenv("TEMP_DIR", "/tmp/guarddog-nexus")
|
||||
guarddog_binary: str = os.getenv("GUARDDOG_BINARY", GUARDDOG_BINARY_FALLBACK)
|
||||
|
||||
# LLM analysis
|
||||
llm_enabled: bool = os.getenv("LLM_ENABLED", "").lower() in ("1", "true", "yes")
|
||||
llm_api_base: str = os.getenv("LLM_API_BASE", LLM_DEFAULT_API_BASE)
|
||||
llm_api_key: str = os.getenv("LLM_API_KEY", "")
|
||||
llm_model: str = os.getenv("LLM_MODEL", LLM_DEFAULT_MODEL)
|
||||
llm_timeout: int = int(os.getenv("LLM_TIMEOUT_SECONDS", str(LLM_DEFAULT_TIMEOUT)))
|
||||
|
||||
|
||||
config = Config()
|
||||
|
||||
181
guarddog_nexus/constants.py
Normal file
181
guarddog_nexus/constants.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""Centralized constants for GuardDog Nexus.
|
||||
|
||||
All magic strings, limits, enumerations, and shared data structures
|
||||
used across the codebase live here to avoid duplication and drift.
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Package handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Unified list of recognised package file extensions.
|
||||
# NOTE: webhooks uses this to decide whether to accept an asset;
|
||||
# harvester uses it to decide whether to download and scan.
|
||||
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||
|
||||
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
|
||||
PYPI_PATH_PREFIX = "packages"
|
||||
|
||||
# Metadata file patterns that should never be scanned
|
||||
METADATA_PATTERNS = (
|
||||
r"^/?simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"/?index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ecosystem
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_ECOSYSTEM = "pypi"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Severity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SEVERITY_WARNING = "WARNING"
|
||||
SEVERITY_ERROR = "ERROR"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sorting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCAN_SORT_FIELDS = {
|
||||
"id": "id",
|
||||
"package_name": "package_name",
|
||||
"started_at": "started_at",
|
||||
"status": "status",
|
||||
"total_findings": "total_findings",
|
||||
"flagged": "flagged",
|
||||
}
|
||||
|
||||
PACKAGE_SORT_FIELDS = {
|
||||
"name": "package_name",
|
||||
"version": "package_version",
|
||||
"last_scanned_at": "started_at",
|
||||
"total_findings": "total_findings",
|
||||
"flagged": "flagged",
|
||||
}
|
||||
|
||||
DEFAULT_SORT_BY_SCANS = "started_at"
|
||||
DEFAULT_SORT_BY_PACKAGES = "last_scanned_at"
|
||||
DEFAULT_SORT_DIR = "desc"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pagination
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_PAGE_SIZE = 50
|
||||
MAX_PAGE_SIZE = 200
|
||||
DEFAULT_OFFSET = 0
|
||||
WEB_PER_PAGE = 50
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard limits
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DASHBOARD_LATEST_FLAGGED_LIMIT = 8
|
||||
DASHBOARD_LATEST_SCANS_LIMIT = 10
|
||||
DASHBOARD_MOST_FLAGGED_LIMIT = 8
|
||||
TOP_RULES_LIMIT = 10
|
||||
|
||||
RECENT_FLAGGED_DAYS = 7
|
||||
HEATMAP_DAYS = 14
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database fields
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MAX_PACKAGE_NAME_LENGTH = 255
|
||||
MAX_PACKAGE_VERSION_LENGTH = 255
|
||||
MAX_ECOSYSTEM_LENGTH = 50
|
||||
SHA256_HEX_LENGTH = 64
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
GUARDDOG_BINARY_FALLBACK = "guarddog"
|
||||
GUARDDOG_OUTPUT_KEY = "--output-format"
|
||||
GUARDDOG_OUTPUT_FORMAT = "json"
|
||||
GUARDDOG_RESULTS_KEY = "results"
|
||||
GUARDDOG_ERRORS_KEY = "errors"
|
||||
|
||||
SCAN_ERROR_TIMEOUT = "timeout"
|
||||
SCAN_ERROR_BINARY_NOT_FOUND = "guarddog_not_found"
|
||||
SCAN_ERROR_JSON_PARSE = "json_parse_error"
|
||||
SCAN_ERROR_DOWNLOAD_FAILED = "Download failed"
|
||||
|
||||
ERROR_MESSAGE_MAX_LENGTH = 1000
|
||||
SHA256_CHUNK_SIZE = 8192
|
||||
|
||||
# Finding data dict keys
|
||||
FINDING_KEYS = ("rule", "severity", "message", "location", "code")
|
||||
DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON paths (used in SQL json_extract queries)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
JSON_PATH_RULE = "$.rule"
|
||||
JSON_PATH_SEVERITY = "$.severity"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webhook
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
|
||||
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
|
||||
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT = "no_asset_or_component"
|
||||
WEBHOOK_STATUS_ACCEPTED = "accepted"
|
||||
WEBHOOK_STATUS_IGNORED = "ignored"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
API_PREFIX_V1 = "/api/v1"
|
||||
HEALTH_PATH = "/health"
|
||||
STATIC_MOUNT_PATH = "/static"
|
||||
|
||||
CSV_MEDIA_TYPE = "text/csv"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
LLM_DEFAULT_MODEL = "gpt-4o-mini"
|
||||
LLM_DEFAULT_API_BASE = "https://api.openai.com/v1"
|
||||
LLM_DEFAULT_TIMEOUT = 30
|
||||
LLM_ANALYSIS_SYSTEM_PROMPT = (
|
||||
"You are a security analyst reviewing GuardDog findings for a Python package. "
|
||||
"Given a finding (rule name, severity, message, code snippet, location), "
|
||||
"provide a concise security analysis in 2-3 paragraphs. "
|
||||
"Assess whether this is likely a real threat or a false positive. "
|
||||
"Explain the risk, potential impact, and recommend an action. "
|
||||
"Be specific about the code pattern found and its implications. "
|
||||
"Respond in JSON with keys: verdict (safe|suspicious|malicious), "
|
||||
"summary (1-line verdict), analysis (2-3 paragraphs), "
|
||||
"and severity_rating (low|medium|high|critical)."
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Application metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
APP_NAME = "GuardDog Nexus"
|
||||
APP_DESCRIPTION = "Scan PyPI packages from Sonatype Nexus webhooks using GuardDog"
|
||||
APP_PACKAGE = "guarddog_nexus"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HTTP_TIMEOUT_DOWNLOAD = 120
|
||||
HTTP_TIMEOUT_API = 30
|
||||
@@ -2,20 +2,22 @@
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_ECOSYSTEM,
|
||||
ERROR_MESSAGE_MAX_LENGTH,
|
||||
PACKAGE_EXTENSIONS,
|
||||
SCAN_ERROR_DOWNLOAD_FAILED,
|
||||
)
|
||||
from guarddog_nexus.logging_setup import log
|
||||
from guarddog_nexus.models import Finding, Scan, ScanStatus
|
||||
from guarddog_nexus.nexus_client import (
|
||||
SUPPORTED_EXTENSIONS,
|
||||
compute_sha256,
|
||||
download_asset,
|
||||
extract_pypi_info,
|
||||
)
|
||||
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
||||
from guarddog_nexus.scanner import scan_package
|
||||
|
||||
|
||||
@@ -26,11 +28,10 @@ async def harvest(
|
||||
asset_path: str,
|
||||
session: AsyncSession,
|
||||
) -> Scan | None:
|
||||
"""Download, scan, and store results for a single package asset."""
|
||||
ecosystem = "pypi" if format_ in ("pypi",) else format_
|
||||
ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_
|
||||
|
||||
filename = os.path.basename(download_url.split("?")[0])
|
||||
if not filename.endswith(SUPPORTED_EXTENSIONS):
|
||||
if not filename.endswith(PACKAGE_EXTENSIONS):
|
||||
log.info("Skipping non-package asset: %s", filename)
|
||||
return None
|
||||
|
||||
@@ -73,7 +74,7 @@ async def harvest(
|
||||
downloaded = await download_asset(download_url, tmpdir)
|
||||
if not downloaded:
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = "Download failed"
|
||||
scan.error_message = SCAN_ERROR_DOWNLOAD_FAILED
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
@@ -103,9 +104,12 @@ async def harvest(
|
||||
result = await scan_package(downloaded, ecosystem)
|
||||
|
||||
findings_list = result.get("findings", [])
|
||||
created_findings: list[Finding] = []
|
||||
|
||||
for fdata in findings_list:
|
||||
session.add(Finding(scan_id=scan.id, data=fdata))
|
||||
f = Finding(scan_id=scan.id, data=fdata)
|
||||
session.add(f)
|
||||
created_findings.append(f)
|
||||
|
||||
scan.total_findings = len(findings_list)
|
||||
scan.flagged = len(findings_list) > 0
|
||||
@@ -113,7 +117,24 @@ async def harvest(
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
# Refresh to get IDs
|
||||
for f in created_findings:
|
||||
await session.refresh(f)
|
||||
|
||||
# Auto-trigger LLM analysis for flagged packages
|
||||
llm_reports = []
|
||||
if scan.flagged and config.llm_enabled:
|
||||
llm_reports = await _run_llm_analysis(created_findings, session)
|
||||
|
||||
if scan.flagged:
|
||||
extra = {
|
||||
"scan_id": scan.id,
|
||||
"package": f"{package_name}=={package_version}",
|
||||
"findings_count": scan.total_findings,
|
||||
"repository": repository,
|
||||
}
|
||||
if llm_reports:
|
||||
extra["llm_analysis"] = llm_reports
|
||||
log.warning(
|
||||
"FLAGGED %s==%s: %d findings in repo %s",
|
||||
package_name,
|
||||
@@ -121,6 +142,13 @@ async def harvest(
|
||||
scan.total_findings,
|
||||
repository,
|
||||
)
|
||||
if llm_reports:
|
||||
log.info(
|
||||
"LLM analysis complete for %s==%s: %d reports",
|
||||
package_name,
|
||||
package_version,
|
||||
len(llm_reports),
|
||||
)
|
||||
|
||||
log.info(
|
||||
"Scan complete: %s==%s (%d findings)",
|
||||
@@ -133,7 +161,24 @@ async def harvest(
|
||||
except Exception as e:
|
||||
log.error("Scan failed for %s==%s: %s", package_name, package_version, e)
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = str(e)[:1000]
|
||||
scan.error_message = str(e)[:ERROR_MESSAGE_MAX_LENGTH]
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
|
||||
finally:
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
|
||||
async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> list[dict]:
|
||||
"""Run LLM analysis on findings and persist reports to the database."""
|
||||
from guarddog_nexus.llm import analyze_finding
|
||||
|
||||
reports = []
|
||||
for finding in findings:
|
||||
report = await analyze_finding(finding.data)
|
||||
if report:
|
||||
finding.report = report
|
||||
reports.append(report)
|
||||
await session.commit()
|
||||
return reports
|
||||
|
||||
87
guarddog_nexus/llm.py
Normal file
87
guarddog_nexus/llm.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""LLM analysis client for GuardDog findings.
|
||||
|
||||
Supports any OpenAI-compatible API endpoint with configurable model.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import LLM_ANALYSIS_SYSTEM_PROMPT
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
|
||||
def _build_user_message(finding: dict) -> str:
|
||||
"""Build a concise prompt from a finding's data."""
|
||||
rule = finding.get("rule", "unknown")
|
||||
severity = finding.get("severity", "unknown")
|
||||
message = finding.get("message", "")
|
||||
location = finding.get("location", "")
|
||||
code = finding.get("code", "")
|
||||
|
||||
prompt = (
|
||||
f"Rule: {rule}\n"
|
||||
f"Severity: {severity}\n"
|
||||
f"Message: {message}\n"
|
||||
)
|
||||
if location:
|
||||
prompt += f"Location: {location}\n"
|
||||
if code:
|
||||
prompt += f"Code snippet:\n```\n{code}\n```\n"
|
||||
|
||||
prompt += (
|
||||
"\nAnalyse this finding and return JSON with keys: "
|
||||
"verdict, summary, analysis, severity_rating."
|
||||
)
|
||||
return prompt
|
||||
|
||||
|
||||
async def analyze_finding(finding_data: dict) -> dict | None:
|
||||
"""Send a finding to the LLM for security analysis.
|
||||
|
||||
Returns parsed JSON dict on success, or None on failure.
|
||||
"""
|
||||
if not config.llm_api_key:
|
||||
log.warning("LLM_API_KEY not set — skipping LLM analysis")
|
||||
return None
|
||||
|
||||
url = f"{config.llm_api_base.rstrip('/')}/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.llm_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.llm_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": LLM_ANALYSIS_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": _build_user_message(finding_data)},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=config.llm_timeout, headers=headers
|
||||
) as client:
|
||||
resp = await client.post(url, json=payload)
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
except httpx.TimeoutException:
|
||||
log.error(
|
||||
"LLM analysis timed out after %ds for rule=%s",
|
||||
config.llm_timeout,
|
||||
finding_data.get("rule"),
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
log.warning("LLM analysis failed for rule=%s: %s", finding_data.get("rule"), e)
|
||||
return None
|
||||
|
||||
try:
|
||||
content = body["choices"][0]["message"]["content"]
|
||||
return json.loads(content)
|
||||
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
||||
log.warning("LLM response parse error for rule=%s: %s", finding_data.get("rule"), e)
|
||||
return None
|
||||
@@ -6,6 +6,7 @@ import sys
|
||||
from logging.handlers import SysLogHandler
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import APP_PACKAGE
|
||||
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
@@ -21,8 +22,23 @@ class JsonFormatter(logging.Formatter):
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
|
||||
def _resolve_facility(value: str) -> int:
|
||||
"""Resolve a facility name string to a SysLogHandler constant."""
|
||||
mapping = {
|
||||
"local0": SysLogHandler.LOG_LOCAL0,
|
||||
"local1": SysLogHandler.LOG_LOCAL1,
|
||||
"local2": SysLogHandler.LOG_LOCAL2,
|
||||
"local3": SysLogHandler.LOG_LOCAL3,
|
||||
"local4": SysLogHandler.LOG_LOCAL4,
|
||||
"local5": SysLogHandler.LOG_LOCAL5,
|
||||
"local6": SysLogHandler.LOG_LOCAL6,
|
||||
"local7": SysLogHandler.LOG_LOCAL7,
|
||||
}
|
||||
return mapping.get(value.lower(), SysLogHandler.LOG_LOCAL0)
|
||||
|
||||
|
||||
def setup_logging() -> logging.Logger:
|
||||
logger = logging.getLogger("guarddog_nexus")
|
||||
logger = logging.getLogger(APP_PACKAGE)
|
||||
logger.setLevel(config.log_level.upper())
|
||||
|
||||
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||
@@ -30,9 +46,10 @@ def setup_logging() -> logging.Logger:
|
||||
logger.addHandler(stdout_handler)
|
||||
|
||||
if config.log_syslog_host:
|
||||
facility = _resolve_facility(config.log_syslog_facility)
|
||||
syslog_handler = SysLogHandler(
|
||||
address=(config.log_syslog_host, config.log_syslog_port),
|
||||
facility=SysLogHandler.LOG_LOCAL0,
|
||||
facility=facility,
|
||||
)
|
||||
syslog_handler.setFormatter(JsonFormatter())
|
||||
logger.addHandler(syslog_handler)
|
||||
|
||||
@@ -9,6 +9,7 @@ from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from guarddog_nexus.api import findings, packages, scans
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import APP_DESCRIPTION, APP_NAME, APP_PACKAGE, STATIC_MOUNT_PATH
|
||||
from guarddog_nexus.database import init_db
|
||||
from guarddog_nexus.logging_setup import log
|
||||
from guarddog_nexus.web.routes import router as web_router
|
||||
@@ -20,15 +21,15 @@ STATIC_DIR = os.path.join(os.path.dirname(__file__), "web", "static")
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
await init_db()
|
||||
log.info("GuardDog Nexus started on %s:%s", config.host, config.port)
|
||||
log.info("%s started on %s:%s", APP_NAME, config.host, config.port)
|
||||
yield
|
||||
log.info("GuardDog Nexus shutting down")
|
||||
log.info("%s shutting down", APP_NAME)
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="GuardDog Nexus",
|
||||
title=APP_NAME,
|
||||
version="0.1.0",
|
||||
description="Scan PyPI packages from Sonatype Nexus webhooks using GuardDog",
|
||||
description=APP_DESCRIPTION,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
@@ -39,7 +40,7 @@ app.include_router(findings.router)
|
||||
app.include_router(web_router)
|
||||
|
||||
if os.path.isdir(STATIC_DIR):
|
||||
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
||||
app.mount(STATIC_MOUNT_PATH, StaticFiles(directory=STATIC_DIR), name="static")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
@@ -49,7 +50,7 @@ async def health():
|
||||
|
||||
def main():
|
||||
uvicorn.run(
|
||||
"guarddog_nexus.main:app",
|
||||
f"{APP_PACKAGE}.main:app",
|
||||
host=config.host,
|
||||
port=config.port,
|
||||
log_level=config.log_level.lower(),
|
||||
|
||||
@@ -48,6 +48,7 @@ class Finding(Base):
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
scan_id: Mapped[int] = mapped_column(Integer, ForeignKey("scans.id"), nullable=False)
|
||||
data: Mapped[dict] = mapped_column(JSON, nullable=False)
|
||||
report: Mapped[dict | None] = mapped_column(JSON, nullable=True)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime, nullable=False, default=func.now()
|
||||
)
|
||||
|
||||
@@ -6,10 +6,12 @@ import os
|
||||
import httpx
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
PYPI_PATH_PREFIX,
|
||||
SHA256_CHUNK_SIZE,
|
||||
)
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
SUPPORTED_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip")
|
||||
|
||||
|
||||
def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
||||
"""Extract package name and version from a PyPI asset path.
|
||||
@@ -17,7 +19,7 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
||||
Path format: packages/requests/2.31.0/requests-2.31.0.tar.gz
|
||||
"""
|
||||
parts = asset_path.strip("/").split("/")
|
||||
if len(parts) >= 3 and parts[0] == "packages":
|
||||
if len(parts) >= 3 and parts[0] == PYPI_PATH_PREFIX:
|
||||
return parts[1], parts[2]
|
||||
return None
|
||||
|
||||
@@ -27,7 +29,9 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
||||
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
||||
|
||||
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||
async with httpx.AsyncClient(auth=auth, timeout=120, follow_redirects=True) as client:
|
||||
async with httpx.AsyncClient(
|
||||
auth=auth, timeout=config.nexus_download_timeout, follow_redirects=True
|
||||
) as client:
|
||||
try:
|
||||
response = await client.get(download_url)
|
||||
response.raise_for_status()
|
||||
@@ -42,13 +46,15 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
||||
async def nexus_get(path: str) -> httpx.Response:
|
||||
"""Make an authenticated GET request to Nexus REST API."""
|
||||
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
|
||||
async with httpx.AsyncClient(auth=auth, timeout=30) as client:
|
||||
async with httpx.AsyncClient(
|
||||
auth=auth, timeout=config.nexus_api_timeout
|
||||
) as client:
|
||||
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")
|
||||
|
||||
|
||||
def compute_sha256(filepath: str) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(filepath, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
for chunk in iter(lambda: f.read(SHA256_CHUNK_SIZE), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
239
guarddog_nexus/queries.py
Normal file
239
guarddog_nexus/queries.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""Shared SQL query builders for GuardDog Nexus.
|
||||
|
||||
Eliminates ~90% duplicated SQL between api/*.py and web/routes.py.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
from sqlalchemy import Integer, cast, func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
DASHBOARD_LATEST_FLAGGED_LIMIT,
|
||||
DASHBOARD_LATEST_SCANS_LIMIT,
|
||||
DASHBOARD_MOST_FLAGGED_LIMIT,
|
||||
HEATMAP_DAYS,
|
||||
JSON_PATH_RULE,
|
||||
JSON_PATH_SEVERITY,
|
||||
PACKAGE_SORT_FIELDS,
|
||||
RECENT_FLAGGED_DAYS,
|
||||
SCAN_SORT_FIELDS,
|
||||
TOP_RULES_LIMIT,
|
||||
)
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scan list query builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_scan_list_query(
|
||||
flagged: bool | None = None,
|
||||
status: str | None = None,
|
||||
repository: str | None = None,
|
||||
search: str | None = None,
|
||||
sort_by: str = "started_at",
|
||||
sort_dir: str = "desc",
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
return_total: bool = True,
|
||||
):
|
||||
"""Builds a filtered, sorted, paginated query for scans.
|
||||
|
||||
Returns (query, total_count) — both still awaiting execution.
|
||||
"""
|
||||
q = select(Scan)
|
||||
count_q = select(func.count(Scan.id))
|
||||
|
||||
if flagged is not None:
|
||||
q = q.where(Scan.flagged == flagged)
|
||||
count_q = count_q.where(Scan.flagged == flagged)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
count_q = count_q.where(Scan.status == status)
|
||||
if repository:
|
||||
q = q.where(Scan.repository == repository)
|
||||
count_q = count_q.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(
|
||||
pattern
|
||||
)
|
||||
q = q.where(condition)
|
||||
count_q = count_q.where(condition)
|
||||
|
||||
# Resolve sort field
|
||||
sort_field_name = SCAN_SORT_FIELDS.get(sort_by, "started_at")
|
||||
sort_col = getattr(Scan, sort_field_name, Scan.started_at)
|
||||
q = q.order_by(sort_col.desc() if sort_dir == "desc" else sort_col.asc())
|
||||
|
||||
q = q.offset(offset).limit(limit)
|
||||
return q, count_q
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Package list query builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_package_list_query(
|
||||
flagged: bool | None = None,
|
||||
ecosystem: str | None = None,
|
||||
repository: str | None = None,
|
||||
search: str | None = None,
|
||||
sort_by: str = "last_scanned_at",
|
||||
sort_dir: str = "desc",
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
):
|
||||
"""Builds an aggregated package list query (distinct by name+version).
|
||||
|
||||
Returns (query, total_count_query) — both awaiting execution.
|
||||
"""
|
||||
subq = select(
|
||||
Scan.package_name.label("pkg_name"),
|
||||
Scan.package_version.label("pkg_ver"),
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scan"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("findings_sum"),
|
||||
func.max(Scan.id).label("sid"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
if ecosystem:
|
||||
subq = subq.where(Scan.ecosystem == ecosystem)
|
||||
if repository:
|
||||
subq = subq.where(Scan.repository == repository)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
|
||||
if flagged is not None:
|
||||
subq = subq.having(func.max(Scan.flagged) == flagged)
|
||||
|
||||
# Resolve sort field
|
||||
sort_field_name = PACKAGE_SORT_FIELDS.get(sort_by, "started_at")
|
||||
sort_col_from = getattr(Scan, sort_field_name, Scan.started_at)
|
||||
sort_col = func.max(sort_col_from)
|
||||
subq = subq.order_by(
|
||||
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
|
||||
)
|
||||
|
||||
sq = subq.subquery()
|
||||
total_q = select(func.count()).select_from(sq)
|
||||
rows_q = select(sq).offset(offset).limit(limit)
|
||||
return rows_q, total_q
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard stats (shared between API /stats and web dashboard)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def get_dashboard_stats(session: AsyncSession) -> dict:
|
||||
"""Return all dashboard statistics as a single dict."""
|
||||
total_scans = await session.scalar(select(func.count(Scan.id)))
|
||||
flagged_scans = await session.scalar(
|
||||
select(func.count(Scan.id)).where(Scan.flagged == True)
|
||||
)
|
||||
recent_flagged = await session.scalar(
|
||||
select(func.count(Scan.id)).where(
|
||||
Scan.flagged == True,
|
||||
Scan.started_at >= func.datetime("now", f"-{RECENT_FLAGGED_DAYS} days"),
|
||||
)
|
||||
)
|
||||
total_findings = await session.scalar(select(func.count(Finding.id)))
|
||||
|
||||
warnings_count = await session.scalar(
|
||||
select(func.count(Finding.id)).where(
|
||||
func.json_extract(Finding.data, JSON_PATH_SEVERITY) == "WARNING"
|
||||
)
|
||||
)
|
||||
errors_count = await session.scalar(
|
||||
select(func.count(Finding.id)).where(
|
||||
func.json_extract(Finding.data, JSON_PATH_SEVERITY) == "ERROR"
|
||||
)
|
||||
)
|
||||
|
||||
latest_flagged = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Scan)
|
||||
.where(Scan.flagged == True)
|
||||
.order_by(Scan.started_at.desc())
|
||||
.limit(DASHBOARD_LATEST_FLAGGED_LIMIT)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
latest_scans = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Scan)
|
||||
.order_by(Scan.started_at.desc())
|
||||
.limit(DASHBOARD_LATEST_SCANS_LIMIT)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
top_rules = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.json_extract(Finding.data, JSON_PATH_RULE).label("rule"),
|
||||
func.count(Finding.id).label("cnt"),
|
||||
)
|
||||
.group_by(text("rule"))
|
||||
.order_by(text("cnt DESC"))
|
||||
.limit(TOP_RULES_LIMIT)
|
||||
)
|
||||
).all()
|
||||
|
||||
most_flagged = (
|
||||
await session.execute(
|
||||
select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
func.sum(Scan.total_findings).label("total"),
|
||||
func.max(Scan.started_at).label("last_scan"),
|
||||
)
|
||||
.where(Scan.flagged == True)
|
||||
.group_by(Scan.package_name, Scan.package_version)
|
||||
.order_by(func.sum(Scan.total_findings).desc())
|
||||
.limit(DASHBOARD_MOST_FLAGGED_LIMIT)
|
||||
)
|
||||
).all()
|
||||
|
||||
max_findings = max((r.total for r in most_flagged), default=1)
|
||||
|
||||
days_raw = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.date(Scan.started_at).label("day"),
|
||||
func.count(Scan.id).label("cnt"),
|
||||
func.sum(cast(Scan.flagged, Integer)).label("flagged_cnt"),
|
||||
)
|
||||
.where(Scan.started_at >= func.datetime("now", f"-{HEATMAP_DAYS} days"))
|
||||
.group_by("day")
|
||||
.order_by("day")
|
||||
)
|
||||
).all()
|
||||
|
||||
return {
|
||||
"total_scans": total_scans or 0,
|
||||
"flagged_scans": flagged_scans or 0,
|
||||
"recent_flagged": recent_flagged or 0,
|
||||
"total_findings": total_findings or 0,
|
||||
"warnings_count": warnings_count or 0,
|
||||
"errors_count": errors_count or 0,
|
||||
"latest_flagged": latest_flagged,
|
||||
"latest_scans": latest_scans,
|
||||
"top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules],
|
||||
"most_flagged": most_flagged,
|
||||
"max_findings": max_findings,
|
||||
"days": [(d.day, d.cnt, d.flagged_cnt) for d in days_raw],
|
||||
"now": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
@@ -2,17 +2,25 @@
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_ECOSYSTEM,
|
||||
DEFAULT_FINDING_SEVERITY,
|
||||
GUARDDOG_OUTPUT_FORMAT,
|
||||
GUARDDOG_OUTPUT_KEY,
|
||||
GUARDDOG_RESULTS_KEY,
|
||||
SCAN_ERROR_BINARY_NOT_FOUND,
|
||||
SCAN_ERROR_JSON_PARSE,
|
||||
SCAN_ERROR_TIMEOUT,
|
||||
)
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
GUARDDOG_BIN = shutil.which("guarddog") or "guarddog"
|
||||
|
||||
|
||||
async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
|
||||
async def scan_package(filepath: str, ecosystem: str = DEFAULT_ECOSYSTEM) -> dict:
|
||||
"""Run guarddog scan on a downloaded package file. Returns normalized dict."""
|
||||
cmd = [GUARDDOG_BIN, ecosystem, "scan", filepath, "--output-format", "json"]
|
||||
guarddog_bin = config.guarddog_binary
|
||||
cmd = [guarddog_bin, ecosystem, "scan", filepath, GUARDDOG_OUTPUT_KEY, GUARDDOG_OUTPUT_FORMAT]
|
||||
log.info("Running: %s", " ".join(cmd))
|
||||
|
||||
try:
|
||||
@@ -26,10 +34,10 @@ async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
log.error("GuardDog scan timed out for %s", filepath)
|
||||
return {"findings": [], "errors": ["timeout"]}
|
||||
return {"findings": [], "errors": [SCAN_ERROR_TIMEOUT]}
|
||||
except FileNotFoundError:
|
||||
log.error("GuardDog binary not found at %s", GUARDDOG_BIN)
|
||||
return {"findings": [], "errors": ["guarddog_not_found"]}
|
||||
log.error("GuardDog binary not found at %s", guarddog_bin)
|
||||
return {"findings": [], "errors": [SCAN_ERROR_BINARY_NOT_FOUND]}
|
||||
|
||||
if proc.returncode not in (0, 1):
|
||||
log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode())
|
||||
@@ -39,7 +47,7 @@ async def scan_package(filepath: str, ecosystem: str = "pypi") -> dict:
|
||||
data = json.loads(stdout.decode())
|
||||
except json.JSONDecodeError:
|
||||
log.error("GuardDog returned invalid JSON for %s", filepath)
|
||||
return {"findings": [], "errors": ["json_parse_error"]}
|
||||
return {"findings": [], "errors": [SCAN_ERROR_JSON_PARSE]}
|
||||
|
||||
return _normalize_output(data)
|
||||
|
||||
@@ -56,7 +64,7 @@ def _normalize_output(data: dict) -> dict:
|
||||
- list → semgrep findings [{message, location, code}]
|
||||
"""
|
||||
findings = []
|
||||
results = data.get("results", {})
|
||||
results = data.get(GUARDDOG_RESULTS_KEY, {})
|
||||
|
||||
if isinstance(results, list):
|
||||
results = {}
|
||||
@@ -68,7 +76,7 @@ def _normalize_output(data: dict) -> dict:
|
||||
findings.append(
|
||||
{
|
||||
"rule": rule_name,
|
||||
"severity": "WARNING",
|
||||
"severity": DEFAULT_FINDING_SEVERITY,
|
||||
"message": value,
|
||||
"location": "",
|
||||
"code": "",
|
||||
@@ -80,7 +88,7 @@ def _normalize_output(data: dict) -> dict:
|
||||
findings.append(
|
||||
{
|
||||
"rule": rule_name,
|
||||
"severity": item.get("severity", "WARNING"),
|
||||
"severity": item.get("severity", DEFAULT_FINDING_SEVERITY),
|
||||
"message": item.get("message", ""),
|
||||
"location": item.get("location", ""),
|
||||
"code": item.get("code", ""),
|
||||
|
||||
@@ -1,38 +1,33 @@
|
||||
"""Web UI routes — Jinja2 + htmx pages."""
|
||||
|
||||
import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from jinja2 import Environment, PackageLoader, select_autoescape
|
||||
from sqlalchemy import Integer, cast, func, select, text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.constants import (
|
||||
APP_PACKAGE,
|
||||
DEFAULT_SORT_BY_PACKAGES,
|
||||
DEFAULT_SORT_BY_SCANS,
|
||||
DEFAULT_SORT_DIR,
|
||||
WEB_PER_PAGE,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.models import Finding, Scan
|
||||
from guarddog_nexus.queries import (
|
||||
build_package_list_query,
|
||||
build_scan_list_query,
|
||||
get_dashboard_stats,
|
||||
)
|
||||
|
||||
router = APIRouter(tags=["web"])
|
||||
|
||||
_jinja_env = Environment(
|
||||
loader=PackageLoader("guarddog_nexus", "web/templates"),
|
||||
loader=PackageLoader(APP_PACKAGE, "web/templates"),
|
||||
autoescape=select_autoescape(),
|
||||
)
|
||||
|
||||
SCAN_SORT_FIELDS = {
|
||||
"id": Scan.id,
|
||||
"package_name": Scan.package_name,
|
||||
"started_at": Scan.started_at,
|
||||
"status": Scan.status,
|
||||
"total_findings": Scan.total_findings,
|
||||
}
|
||||
|
||||
PACKAGE_SORT_FIELDS = {
|
||||
"name": Scan.package_name,
|
||||
"last_scanned_at": Scan.started_at,
|
||||
"total_findings": Scan.total_findings,
|
||||
"flagged": Scan.flagged,
|
||||
}
|
||||
|
||||
|
||||
def _render(name: str, **context) -> HTMLResponse:
|
||||
template = _jinja_env.get_template(name)
|
||||
@@ -41,113 +36,16 @@ def _render(name: str, **context) -> HTMLResponse:
|
||||
|
||||
@router.get("/", response_class=HTMLResponse)
|
||||
async def dashboard(request: Request, session: AsyncSession = Depends(get_session)):
|
||||
ctx = await _dashboard_data(session)
|
||||
ctx = await get_dashboard_stats(session)
|
||||
return _render("dashboard.html", **ctx, request=request)
|
||||
|
||||
|
||||
@router.get("/dashboard/stats", response_class=HTMLResponse)
|
||||
async def dashboard_stats_fragment(session: AsyncSession = Depends(get_session)):
|
||||
ctx = await _dashboard_data(session)
|
||||
ctx = await get_dashboard_stats(session)
|
||||
return _render("dashboard_stats.html", **ctx)
|
||||
|
||||
|
||||
async def _dashboard_data(session: AsyncSession) -> dict:
|
||||
total_scans = await session.scalar(select(func.count(Scan.id)))
|
||||
flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True))
|
||||
recent_flagged = await session.scalar(
|
||||
select(func.count(Scan.id)).where(
|
||||
Scan.flagged == True,
|
||||
Scan.started_at >= func.datetime("now", "-7 days"),
|
||||
)
|
||||
)
|
||||
total_findings = await session.scalar(select(func.count(Finding.id)))
|
||||
|
||||
warnings_count = await session.scalar(
|
||||
select(func.count(Finding.id)).where(
|
||||
func.json_extract(Finding.data, "$.severity") == "WARNING"
|
||||
)
|
||||
)
|
||||
errors_count = await session.scalar(
|
||||
select(func.count(Finding.id)).where(
|
||||
func.json_extract(Finding.data, "$.severity") == "ERROR"
|
||||
)
|
||||
)
|
||||
|
||||
latest_flagged = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Scan).where(Scan.flagged == True).order_by(Scan.started_at.desc()).limit(8)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
latest_scans = (
|
||||
(await session.execute(select(Scan).order_by(Scan.started_at.desc()).limit(10)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
top_rules = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.json_extract(Finding.data, "$.rule").label("rule"),
|
||||
func.count(Finding.id).label("cnt"),
|
||||
)
|
||||
.group_by(text("rule"))
|
||||
.order_by(text("cnt DESC"))
|
||||
.limit(10)
|
||||
)
|
||||
).all()
|
||||
|
||||
most_flagged = (
|
||||
await session.execute(
|
||||
select(
|
||||
Scan.package_name,
|
||||
Scan.package_version,
|
||||
func.sum(Scan.total_findings).label("total"),
|
||||
func.max(Scan.started_at).label("last_scan"),
|
||||
)
|
||||
.where(Scan.flagged == True)
|
||||
.group_by(Scan.package_name, Scan.package_version)
|
||||
.order_by(func.sum(Scan.total_findings).desc())
|
||||
.limit(8)
|
||||
)
|
||||
).all()
|
||||
|
||||
max_findings = max((r.total for r in most_flagged), default=1)
|
||||
|
||||
days_raw = (
|
||||
await session.execute(
|
||||
select(
|
||||
func.date(Scan.started_at).label("day"),
|
||||
func.count(Scan.id).label("cnt"),
|
||||
func.sum(cast(Scan.flagged, Integer)).label("flagged_cnt"),
|
||||
)
|
||||
.where(Scan.started_at >= func.datetime("now", "-14 days"))
|
||||
.group_by("day")
|
||||
.order_by("day")
|
||||
)
|
||||
).all()
|
||||
|
||||
return {
|
||||
"total_scans": total_scans or 0,
|
||||
"flagged_scans": flagged_scans or 0,
|
||||
"recent_flagged": recent_flagged or 0,
|
||||
"total_findings": total_findings or 0,
|
||||
"warnings_count": warnings_count or 0,
|
||||
"errors_count": errors_count or 0,
|
||||
"latest_flagged": latest_flagged,
|
||||
"latest_scans": latest_scans,
|
||||
"top_rules": [(r.rule, r.cnt) for r in top_rules],
|
||||
"most_flagged": most_flagged,
|
||||
"max_findings": max_findings,
|
||||
"days": [(d.day, d.cnt, d.flagged_cnt) for d in days_raw],
|
||||
"now": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/scans", response_class=HTMLResponse)
|
||||
async def scans_list(
|
||||
request: Request,
|
||||
@@ -155,32 +53,26 @@ async def scans_list(
|
||||
flagged: str = "",
|
||||
search: str = "",
|
||||
status: str = "",
|
||||
sort_by: str = "started_at",
|
||||
sort_dir: str = "desc",
|
||||
sort_by: str = DEFAULT_SORT_BY_SCANS,
|
||||
sort_dir: str = DEFAULT_SORT_DIR,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
per_page = 50
|
||||
per_page = WEB_PER_PAGE
|
||||
offset = (page - 1) * per_page
|
||||
|
||||
count_q = select(func.count(Scan.id))
|
||||
q = select(Scan)
|
||||
|
||||
flagged_bool = None
|
||||
if flagged == "1":
|
||||
q = q.where(Scan.flagged == True)
|
||||
count_q = count_q.where(Scan.flagged == True)
|
||||
if status:
|
||||
q = q.where(Scan.status == status)
|
||||
count_q = count_q.where(Scan.status == status)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
q = q.where(condition)
|
||||
count_q = count_q.where(condition)
|
||||
|
||||
sort_field = SCAN_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
q = q.order_by(sort_field.desc() if sort_dir == "desc" else sort_field.asc())
|
||||
q = q.offset(offset).limit(per_page)
|
||||
flagged_bool = True
|
||||
|
||||
q, count_q = build_scan_list_query(
|
||||
flagged=flagged_bool,
|
||||
status=status or None,
|
||||
search=search or None,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=per_page,
|
||||
offset=offset,
|
||||
)
|
||||
scans = (await session.execute(q)).scalars().all()
|
||||
total = await session.scalar(count_q)
|
||||
|
||||
@@ -200,11 +92,15 @@ async def scans_list(
|
||||
|
||||
|
||||
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
|
||||
async def scan_detail(scan_id: int, request: Request, session: AsyncSession = Depends(get_session)):
|
||||
async def scan_detail(
|
||||
scan_id: int, request: Request, session: AsyncSession = Depends(get_session)
|
||||
):
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
scan = await session.scalar(
|
||||
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
|
||||
select(Scan)
|
||||
.where(Scan.id == scan_id)
|
||||
.options(selectinload(Scan.findings))
|
||||
)
|
||||
if not scan:
|
||||
return HTMLResponse("<h1>Not found</h1>", status_code=404)
|
||||
@@ -218,45 +114,27 @@ async def packages_list(
|
||||
page: int = 1,
|
||||
flagged: str = "",
|
||||
search: str = "",
|
||||
sort_by: str = "last_scanned_at",
|
||||
sort_dir: str = "desc",
|
||||
sort_by: str = DEFAULT_SORT_BY_PACKAGES,
|
||||
sort_dir: str = DEFAULT_SORT_DIR,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
per_page = 50
|
||||
per_page = WEB_PER_PAGE
|
||||
offset = (page - 1) * per_page
|
||||
|
||||
subq = select(
|
||||
Scan.package_name.label("pkg_name"),
|
||||
Scan.package_version.label("pkg_ver"),
|
||||
Scan.ecosystem,
|
||||
Scan.repository,
|
||||
func.max(Scan.started_at).label("last_scan"),
|
||||
func.max(Scan.flagged).label("is_flagged"),
|
||||
func.sum(Scan.total_findings).label("findings_sum"),
|
||||
func.max(Scan.id).label("sid"),
|
||||
).group_by(Scan.package_name, Scan.package_version)
|
||||
|
||||
flagged_bool = None
|
||||
if flagged == "1":
|
||||
subq = subq.having(func.max(Scan.flagged) == True)
|
||||
if search:
|
||||
pattern = f"%{search}%"
|
||||
subq = subq.where(
|
||||
Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)
|
||||
)
|
||||
flagged_bool = True
|
||||
|
||||
sort_field = PACKAGE_SORT_FIELDS.get(sort_by, Scan.started_at)
|
||||
sort_col = func.max(sort_field)
|
||||
subq = subq.order_by(
|
||||
sort_col.desc() if sort_dir == "desc" else sort_col.asc()
|
||||
rows_q, total_q = build_package_list_query(
|
||||
flagged=flagged_bool,
|
||||
search=search or None,
|
||||
sort_by=sort_by,
|
||||
sort_dir=sort_dir,
|
||||
limit=per_page,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
sq = subq.subquery()
|
||||
total = await session.scalar(select(func.count()).select_from(sq))
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(sq).offset(offset).limit(per_page)
|
||||
)
|
||||
).all()
|
||||
total = await session.scalar(total_q)
|
||||
rows = (await session.execute(rows_q)).all()
|
||||
|
||||
return _render(
|
||||
"packages_list.html",
|
||||
@@ -309,3 +187,36 @@ async def package_detail(
|
||||
findings=all_findings,
|
||||
request=request,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/v1/findings/{finding_id}/analyze", response_class=HTMLResponse)
|
||||
async def analyze_finding_htmx(
|
||||
finding_id: int,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
):
|
||||
"""HTMX fragment: trigger LLM analysis and return styled result HTML."""
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.llm import analyze_finding
|
||||
|
||||
if not config.llm_enabled:
|
||||
return HTMLResponse(
|
||||
'<div class="llm-actions"><small class="flagged">LLM analysis is disabled</small></div>'
|
||||
)
|
||||
|
||||
finding = await session.scalar(select(Finding).where(Finding.id == finding_id))
|
||||
if not finding:
|
||||
return HTMLResponse(
|
||||
'<div class="llm-actions"><small class="flagged">Finding not found</small></div>',
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
report = await analyze_finding(finding.data)
|
||||
if report is None:
|
||||
return HTMLResponse(
|
||||
'<div class="llm-actions"><small class="flagged">LLM analysis failed</small></div>'
|
||||
)
|
||||
|
||||
finding.report = report
|
||||
await session.commit()
|
||||
|
||||
return _render("_llm_report_fragment.html", report=report)
|
||||
|
||||
@@ -373,6 +373,49 @@ th.sortable.active .sort-icon {
|
||||
justify-content: flex-end;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
/* LLM report */
|
||||
.llm-report {
|
||||
margin-top: 0.75rem;
|
||||
padding: 0.6rem 0.8rem;
|
||||
background: var(--pico-color-gray-700);
|
||||
border-radius: 6px;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1.5;
|
||||
border-left: 3px solid var(--pico-color-blue-400);
|
||||
}
|
||||
|
||||
.llm-report strong {
|
||||
color: var(--pico-color-blue-300);
|
||||
}
|
||||
|
||||
.verdict-safe {
|
||||
color: var(--pico-color-green-400);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.verdict-suspicious {
|
||||
color: var(--pico-color-yellow-400);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.verdict-malicious {
|
||||
color: var(--pico-color-red-400);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.llm-actions {
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.llm-actions button {
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* htmx indicator */
|
||||
.htmx-indicator {
|
||||
display: inline;
|
||||
}
|
||||
.toggle-all-btn {
|
||||
font-size: 0.8rem;
|
||||
margin-bottom: 0.5rem;
|
||||
|
||||
9
guarddog_nexus/web/templates/_llm_report_fragment.html
Normal file
9
guarddog_nexus/web/templates/_llm_report_fragment.html
Normal file
@@ -0,0 +1,9 @@
|
||||
<div class="llm-report">
|
||||
<strong>LLM Analysis</strong>
|
||||
<span class="verdict-{{ report.verdict }}">[{{ report.verdict }}]</span>
|
||||
{% if report.severity_rating %}
|
||||
<span class="severity-{{ report.severity_rating }}">({{ report.severity_rating }})</span>
|
||||
{% endif %}
|
||||
<p><em>{{ report.summary }}</em></p>
|
||||
<p>{{ report.analysis }}</p>
|
||||
</div>
|
||||
@@ -135,13 +135,13 @@
|
||||
{% if top_rules %}
|
||||
<div class="top-rules-chart">
|
||||
<h3>Top Rules Triggered</h3>
|
||||
{% for rule, cnt in top_rules %}
|
||||
{% for r in top_rules %}
|
||||
<div class="rule-bar-row">
|
||||
<span class="rule-name" title="{{ rule }}"><code>{{ rule }}</code></span>
|
||||
<span class="rule-name" title="{{ r.rule }}"><code>{{ r.rule }}</code></span>
|
||||
<div class="rule-bar-container">
|
||||
<div class="rule-bar" style="width: {{ (cnt / top_rules[0][1] * 100) | int if top_rules[0][1] > 0 else 0 }}%;"></div>
|
||||
<div class="rule-bar" style="width: {{ (r.count / top_rules[0].count * 100) | int if top_rules[0].count > 0 else 0 }}%;"></div>
|
||||
</div>
|
||||
<span class="rule-count">{{ cnt }}</span>
|
||||
<span class="rule-count">{{ r.count }}</span>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
@@ -57,6 +57,29 @@
|
||||
</div>
|
||||
<pre><code id="code-{{ f.id }}">{{ f.data.code }}</code></pre>
|
||||
{% endif %}
|
||||
|
||||
{% if f.report %}
|
||||
<div class="llm-report">
|
||||
<strong>LLM Analysis</strong>
|
||||
<span class="verdict-{{ f.report.verdict }}">[{{ f.report.verdict }}]</span>
|
||||
<span class="severity-{{ f.report.severity_rating }}">({{ f.report.severity_rating }})</span>
|
||||
<p><em>{{ f.report.summary }}</em></p>
|
||||
<p>{{ f.report.analysis }}</p>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="llm-actions" id="llm-{{ f.id }}">
|
||||
<button class="outline"
|
||||
hx-post="/api/v1/findings/{{ f.id }}/analyze"
|
||||
hx-target="#llm-{{ f.id }}"
|
||||
hx-swap="outerHTML"
|
||||
hx-indicator="#llm-spinner-{{ f.id }}">
|
||||
<span id="llm-spinner-{{ f.id }}" class="htmx-indicator" style="display:none;">
|
||||
<span class="spinner"></span>
|
||||
</span>
|
||||
Analyze with LLM
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</details>
|
||||
{% endfor %}
|
||||
|
||||
@@ -51,6 +51,29 @@
|
||||
</div>
|
||||
<pre><code id="code-{{ f.id }}">{{ f.data.code }}</code></pre>
|
||||
{% endif %}
|
||||
|
||||
{% if f.report %}
|
||||
<div class="llm-report">
|
||||
<strong>LLM Analysis</strong>
|
||||
<span class="verdict-{{ f.report.verdict }}">[{{ f.report.verdict }}]</span>
|
||||
<span class="severity-{{ f.report.severity_rating }}">({{ f.report.severity_rating }})</span>
|
||||
<p><em>{{ f.report.summary }}</em></p>
|
||||
<p>{{ f.report.analysis }}</p>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="llm-actions" id="llm-{{ f.id }}">
|
||||
<button class="outline"
|
||||
hx-post="/api/v1/findings/{{ f.id }}/analyze"
|
||||
hx-target="#llm-{{ f.id }}"
|
||||
hx-swap="outerHTML"
|
||||
hx-indicator="#llm-spinner-{{ f.id }}">
|
||||
<span id="llm-spinner-{{ f.id }}" class="htmx-indicator" style="display:none;">
|
||||
<span class="spinner"></span>
|
||||
</span>
|
||||
Analyze with LLM
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</details>
|
||||
{% endfor %}
|
||||
|
||||
@@ -8,32 +8,28 @@ import re
|
||||
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_ECOSYSTEM,
|
||||
METADATA_PATTERNS,
|
||||
PACKAGE_EXTENSIONS,
|
||||
RELEVANT_WEBHOOK_ACTIONS,
|
||||
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
|
||||
WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
||||
WEBHOOK_IGNORE_NON_PACKAGE,
|
||||
WEBHOOK_STATUS_ACCEPTED,
|
||||
WEBHOOK_STATUS_IGNORED,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.harvester import harvest
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
|
||||
|
||||
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
METADATA_PATTERNS = [
|
||||
re.compile(p)
|
||||
for p in [
|
||||
r"^/?simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"/?index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
]
|
||||
]
|
||||
|
||||
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||
_METADATA_RE = [re.compile(p) for p in METADATA_PATTERNS]
|
||||
|
||||
|
||||
def _is_package_asset(name: str) -> bool:
|
||||
for pat in METADATA_PATTERNS:
|
||||
for pat in _METADATA_RE:
|
||||
if pat.search(name):
|
||||
return False
|
||||
return name.endswith(PACKAGE_EXTENSIONS)
|
||||
@@ -41,7 +37,7 @@ def _is_package_asset(name: str) -> bool:
|
||||
|
||||
def _build_download_url(repo: str, asset_path: str) -> str:
|
||||
base = config.nexus_url.rstrip("/")
|
||||
asset_path = asset_path.lstrip("/")
|
||||
asset_path = asset_path.strip("/")
|
||||
return f"{base}/repository/{repo}/{asset_path}"
|
||||
|
||||
|
||||
@@ -60,7 +56,6 @@ async def nexus_webhook(
|
||||
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
|
||||
):
|
||||
payload = await request.body()
|
||||
payload_str = payload.decode("utf-8")
|
||||
|
||||
if config.webhook_secret:
|
||||
if not x_nexus_webhook_signature:
|
||||
@@ -68,58 +63,75 @@ async def nexus_webhook(
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
|
||||
)
|
||||
expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest()
|
||||
expected = hmac.new(
|
||||
config.webhook_secret.encode(), payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
|
||||
log.warning("Webhook rejected: invalid signature")
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature"
|
||||
)
|
||||
|
||||
try:
|
||||
data = json.loads(payload_str)
|
||||
data = json.loads(payload.decode("utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
log.warning("Webhook received invalid JSON")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON"
|
||||
)
|
||||
|
||||
action = data.get("action", "").upper()
|
||||
if action not in RELEVANT_ACTIONS:
|
||||
return {"status": "ignored", "action": action}
|
||||
if action not in RELEVANT_WEBHOOK_ACTIONS:
|
||||
return {"status": WEBHOOK_STATUS_IGNORED, "action": action}
|
||||
|
||||
repository = data.get("repositoryName", "")
|
||||
|
||||
asset = data.get("asset")
|
||||
component = data.get("component")
|
||||
|
||||
if asset:
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
return {"status": "ignored", "reason": "non_package_asset"}
|
||||
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
|
||||
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(
|
||||
repository, asset_path
|
||||
)
|
||||
|
||||
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
|
||||
|
||||
background_tasks.add_task(_scan_in_background, download_url, repository, "pypi", asset_path)
|
||||
return {"status": "accepted", "asset": asset_path, "action": action}
|
||||
background_tasks.add_task(
|
||||
_scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path
|
||||
)
|
||||
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
|
||||
|
||||
if component:
|
||||
name = component.get("name", "")
|
||||
version = component.get("version", "")
|
||||
if not name or not version:
|
||||
return {"status": "ignored", "reason": "no_name_or_version"}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_IGNORED,
|
||||
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
||||
}
|
||||
|
||||
# For component events, look up assets via Nexus REST API
|
||||
background_tasks.add_task(_scan_component, repository, name, version)
|
||||
return {"status": "accepted", "component": f"{name}=={version}", "action": action}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_ACCEPTED,
|
||||
"component": f"{name}=={version}",
|
||||
"action": action,
|
||||
}
|
||||
|
||||
return {"status": "ignored", "reason": "no_asset_or_component"}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_IGNORED,
|
||||
"reason": WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
|
||||
}
|
||||
|
||||
|
||||
async def _scan_component(repository: str, name: str, version: str):
|
||||
"""Look up component assets via Nexus API, then scan each package file."""
|
||||
|
||||
from guarddog_nexus.nexus_client import nexus_get
|
||||
|
||||
api_path = (
|
||||
f"/service/rest/v1/search?repository={repository}&name={name}&version={version}&format=pypi"
|
||||
f"/service/rest/v1/search"
|
||||
f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}"
|
||||
)
|
||||
try:
|
||||
resp = await nexus_get(api_path)
|
||||
@@ -139,10 +151,14 @@ async def _scan_component(repository: str, name: str, version: str):
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
continue
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(
|
||||
repository, asset_path
|
||||
)
|
||||
log.info("Scanning component asset: %s", asset_path)
|
||||
async for session in get_session():
|
||||
await harvest(download_url, repository, "pypi", asset_path, session)
|
||||
await harvest(
|
||||
download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session
|
||||
)
|
||||
break
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user