refactor: реструктуризация — core/, db/, routes/, web/

guarddog_nexus/
├── core/          scanner, harvester, nexus, llm
├── db/            engine, models, queries
├── routes/        webhooks, api_*, web
└── web/           templates + static

- 11 файлов перемещено (git mv — сохранена история)
- Все импорты обновлены (~15 файлов)
- main.py, tests — исправлены пути
- 50/50 тестов, ruff clean
This commit is contained in:
Marker689
2026-05-10 07:17:41 +03:00
parent 22dc87851a
commit 8726b65808
21 changed files with 80 additions and 80 deletions

View File

View File

@@ -0,0 +1,86 @@
"""REST API for findings (across all scans)."""
from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from ..config import config
from ..constants import (
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
JSON_PATH_RULE,
JSON_PATH_SEVERITY,
MAX_PAGE_SIZE,
)
from ..db.engine import get_session
from ..db.models import Finding
router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
@router.get("")
async def list_findings(
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
rule: str | None = Query(None),
severity: str | None = Query(None),
scan_id: int | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q = select(Finding)
if rule:
q = q.where(func.json_extract(Finding.data, JSON_PATH_RULE) == rule)
if severity:
q = q.where(func.json_extract(Finding.data, JSON_PATH_SEVERITY) == severity)
if scan_id:
q = q.where(Finding.scan_id == scan_id)
total = await session.scalar(select(func.count()).select_from(q.subquery()))
findings = (await session.execute(q.offset(offset).limit(limit))).scalars().all()
return {
"total": total,
"limit": limit,
"offset": offset,
"findings": [
{
"id": f.id,
"scan_id": f.scan_id,
**f.data,
"report": f.report,
"created_at": f.created_at.isoformat() if f.created_at else None,
}
for f in findings
],
}
@router.post("/{finding_id}/analyze")
async def analyze_finding_endpoint(
finding_id: int,
session: AsyncSession = Depends(get_session),
):
"""Manually trigger LLM analysis for a single finding."""
if not config.llm_enabled:
return {"detail": "LLM analysis is disabled"}
finding = await session.scalar(
select(Finding).where(Finding.id == finding_id)
)
if not finding:
return {"detail": "Not found"}
from ..core.llm import analyze_finding
report = await analyze_finding(finding.data)
if report is None:
return {"detail": "LLM analysis failed"}
finding.report = report
await session.commit()
return {
"id": finding.id,
**finding.data,
"report": report,
}

View File

@@ -0,0 +1,163 @@
"""REST API for packages (distinct packages across scans)."""
import csv
import io
from urllib.parse import unquote
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ..constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from ..db.engine import get_session
from ..db.models import Finding, Scan
from ..db.queries import build_package_list_query
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
@router.get("")
async def list_packages(
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
ecosystem: str | None = Query(None),
flagged: bool | None = Query(None),
search: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query(DEFAULT_SORT_BY_PACKAGES),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
rows_q, total_q = build_package_list_query(
flagged=flagged,
ecosystem=ecosystem,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
total = await session.scalar(total_q)
rows = (await session.execute(rows_q)).all()
return {
"total": total,
"limit": limit,
"offset": offset,
"packages": [
{
"name": r.pkg_name,
"version": r.pkg_ver,
"ecosystem": r.ecosystem,
"repository": r.repository,
"last_scanned_at": r.last_scan.isoformat() if r.last_scan else None,
"flagged": bool(r.is_flagged),
"total_findings": r.findings_sum,
"latest_scan_id": r.sid,
}
for r in rows
],
}
@router.get("/export")
async def export_packages_csv(
flagged: bool | None = Query(None),
search: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
rows_q, _total_q = build_package_list_query(
flagged=flagged,
search=search,
sort_by=DEFAULT_SORT_BY_PACKAGES,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
rows = (await session.execute(rows_q)).all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(
[
"name", "version", "ecosystem", "repository",
"last_scanned_at", "flagged", "total_findings",
]
)
for r in rows:
writer.writerow(
[
r.pkg_name, r.pkg_ver, r.ecosystem, r.repository,
r.last_scan.isoformat() if r.last_scan else "",
bool(r.is_flagged),
r.findings_sum,
]
)
return Response(
content=output.getvalue(),
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=packages_export.csv"},
)
@router.get("/{name:path}")
async def get_package(
name: str,
session: AsyncSession = Depends(get_session),
):
parts = name.rsplit("/", 1)
pkg_name = unquote(parts[0])
pkg_version = unquote(parts[1]) if len(parts) == 2 else ""
scans = (
(
await session.execute(
select(Scan)
.where(Scan.package_name == pkg_name, Scan.package_version == pkg_version)
.order_by(Scan.started_at.desc())
)
)
.scalars()
.all()
)
if not scans:
return {"detail": "Not found"}
all_findings: list[dict] = []
for s in scans:
findings = (
(await session.execute(select(Finding).where(Finding.scan_id == s.id)))
.scalars()
.all()
)
for f in findings:
all_findings.append({"id": f.id, **f.data, "report": f.report})
return {
"name": scans[0].package_name,
"version": scans[0].package_version,
"ecosystem": scans[0].ecosystem,
"repository": scans[0].repository,
"flagged": any(s.flagged for s in scans),
"scans": [
{
"id": s.id,
"status": s.status,
"total_findings": s.total_findings,
"flagged": s.flagged,
"started_at": s.started_at.isoformat() if s.started_at else None,
}
for s in scans
],
"findings": all_findings,
}

View File

@@ -0,0 +1,157 @@
"""REST API for scans."""
import csv
import io
from fastapi import APIRouter, Depends, Query, Response
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..constants import (
CSV_MEDIA_TYPE,
DEFAULT_OFFSET,
DEFAULT_PAGE_SIZE,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
MAX_PAGE_SIZE,
)
from ..db.engine import get_session
from ..db.models import Scan
from ..db.queries import build_scan_list_query, get_dashboard_stats
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
@router.get("")
async def list_scans(
limit: int = Query(DEFAULT_PAGE_SIZE, le=MAX_PAGE_SIZE),
offset: int = Query(DEFAULT_OFFSET, ge=0),
flagged: bool | None = Query(None),
search: str | None = Query(None),
status: str | None = Query(None),
repository: str | None = Query(None),
sort_by: str = Query(DEFAULT_SORT_BY_SCANS),
sort_dir: str = Query(DEFAULT_SORT_DIR),
session: AsyncSession = Depends(get_session),
):
q, count_q = build_scan_list_query(
flagged=flagged,
status=status,
repository=repository,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
limit=limit,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
return {
"total": total,
"limit": limit,
"offset": offset,
"scans": [
{
"id": s.id,
"package_name": s.package_name,
"package_version": s.package_version,
"ecosystem": s.ecosystem,
"repository": s.repository,
"status": s.status,
"total_findings": s.total_findings,
"flagged": s.flagged,
"started_at": s.started_at.isoformat() if s.started_at else None,
"finished_at": s.finished_at.isoformat() if s.finished_at else None,
"error_message": s.error_message,
}
for s in scans
],
}
@router.get("/export")
async def export_scans_csv(
flagged: bool | None = Query(None),
search: str | None = Query(None),
status: str | None = Query(None),
session: AsyncSession = Depends(get_session),
):
q, _count_q = build_scan_list_query(
flagged=flagged,
status=status,
search=search,
sort_by=DEFAULT_SORT_BY_SCANS,
sort_dir=DEFAULT_SORT_DIR,
limit=MAX_PAGE_SIZE,
offset=0,
)
scans = (await session.execute(q)).scalars().all()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(
[
"id", "package_name", "package_version", "ecosystem", "repository",
"status", "total_findings", "flagged", "started_at", "finished_at",
"error_message", "sha256",
]
)
for s in scans:
writer.writerow(
[
s.id, s.package_name, s.package_version, s.ecosystem, s.repository,
s.status, s.total_findings, s.flagged,
s.started_at.isoformat() if s.started_at else "",
s.finished_at.isoformat() if s.finished_at else "",
s.error_message or "",
s.sha256 or "",
]
)
return Response(
content=output.getvalue(),
media_type=CSV_MEDIA_TYPE,
headers={"Content-Disposition": "attachment; filename=scans_export.csv"},
)
@router.get("/stats")
async def scan_stats(session: AsyncSession = Depends(get_session)):
dashboard = await get_dashboard_stats(session)
return {
"total_scans": dashboard["total_scans"],
"flagged_scans": dashboard["flagged_scans"],
"recent_flagged": dashboard["recent_flagged"],
"total_findings": dashboard["total_findings"],
"top_rules": dashboard["top_rules"],
"latest_scan_at": dashboard["latest_flagged"][0].started_at.isoformat()
if dashboard["latest_flagged"]
else None,
}
@router.get("/{scan_id}")
async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
scan = await session.scalar(
select(Scan).where(Scan.id == scan_id).options(selectinload(Scan.findings))
)
if not scan:
return {"detail": "Not found"}
return {
"id": scan.id,
"package_name": scan.package_name,
"package_version": scan.package_version,
"ecosystem": scan.ecosystem,
"repository": scan.repository,
"nexus_asset_url": scan.nexus_asset_url,
"sha256": scan.sha256,
"status": scan.status,
"total_findings": scan.total_findings,
"flagged": scan.flagged,
"started_at": scan.started_at.isoformat() if scan.started_at else None,
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
"error_message": scan.error_message,
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
}

View File

@@ -0,0 +1,233 @@
"""Web UI routes — Jinja2 + htmx pages."""
from urllib.parse import unquote
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse
from jinja2 import Environment, PackageLoader, select_autoescape
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ..constants import (
APP_PACKAGE,
DEFAULT_SORT_BY_PACKAGES,
DEFAULT_SORT_BY_SCANS,
DEFAULT_SORT_DIR,
WEB_PER_PAGE,
)
from ..db.engine import get_session
from ..db.models import Finding, Scan
from ..db.queries import (
build_package_list_query,
build_scan_list_query,
get_dashboard_stats,
)
router = APIRouter(tags=["web"])
_jinja_env = Environment(
loader=PackageLoader(APP_PACKAGE, "web/templates"),
autoescape=select_autoescape(),
)
def _render(name: str, **context) -> HTMLResponse:
template = _jinja_env.get_template(name)
return HTMLResponse(template.render(**context))
@router.get("/", response_class=HTMLResponse)
async def dashboard(request: Request, session: AsyncSession = Depends(get_session)):
ctx = await get_dashboard_stats(session)
return _render("dashboard.html", **ctx, request=request)
@router.get("/dashboard/stats", response_class=HTMLResponse)
async def dashboard_stats_fragment(session: AsyncSession = Depends(get_session)):
ctx = await get_dashboard_stats(session)
return _render("dashboard_stats.html", **ctx)
@router.get("/scans", response_class=HTMLResponse)
async def scans_list(
request: Request,
page: int = 1,
flagged: str = "",
search: str = "",
status: str = "",
sort_by: str = DEFAULT_SORT_BY_SCANS,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
flagged_bool = None
if flagged == "1":
flagged_bool = True
q, count_q = build_scan_list_query(
flagged=flagged_bool,
status=status or None,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
scans = (await session.execute(q)).scalars().all()
total = await session.scalar(count_q)
template = "_scans_table.html" if request.headers.get("HX-Request") else "scans_list.html"
return _render(
template,
scans=scans,
page=page,
per_page=per_page,
total=total,
flagged_filter=flagged,
search=search,
status_filter=status,
sort_by=sort_by,
sort_dir=sort_dir,
request=request,
)
@router.get("/scans/{scan_id}", response_class=HTMLResponse)
async def scan_detail(
scan_id: int, request: Request, session: AsyncSession = Depends(get_session)
):
from sqlalchemy.orm import selectinload
scan = await session.scalar(
select(Scan)
.where(Scan.id == scan_id)
.options(selectinload(Scan.findings))
)
if not scan:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
return _render("scan_detail.html", scan=scan, request=request)
@router.get("/packages", response_class=HTMLResponse)
async def packages_list(
request: Request,
page: int = 1,
flagged: str = "",
search: str = "",
sort_by: str = DEFAULT_SORT_BY_PACKAGES,
sort_dir: str = DEFAULT_SORT_DIR,
session: AsyncSession = Depends(get_session),
):
per_page = WEB_PER_PAGE
offset = (page - 1) * per_page
flagged_bool = None
if flagged == "1":
flagged_bool = True
rows_q, total_q = build_package_list_query(
flagged=flagged_bool,
search=search or None,
sort_by=sort_by,
sort_dir=sort_dir,
limit=per_page,
offset=offset,
)
total = await session.scalar(total_q)
rows = (await session.execute(rows_q)).all()
template = "_packages_table.html" if request.headers.get("HX-Request") else "packages_list.html"
return _render(
template,
packages=rows,
page=page,
per_page=per_page,
total=total,
flagged_filter=flagged,
search=search,
sort_by=sort_by,
sort_dir=sort_dir,
request=request,
)
@router.get("/packages/{name:path}", response_class=HTMLResponse)
async def package_detail(
name: str,
request: Request,
session: AsyncSession = Depends(get_session),
):
# name:path captures the entire path after /packages/
# e.g. "eviltest/0.1.0" or "github.com/attacker/evilmodule/v0.1.0"
parts = name.rsplit("/", 1)
pkg_name = unquote(parts[0])
pkg_version = unquote(parts[1]) if len(parts) == 2 else ""
from sqlalchemy.orm import selectinload
scans = (
(
await session.execute(
select(Scan)
.where(Scan.package_name == pkg_name, Scan.package_version == pkg_version)
.options(selectinload(Scan.findings))
.order_by(Scan.started_at.desc())
)
)
.scalars()
.all()
)
if not scans:
return HTMLResponse("<h1>Not found</h1>", status_code=404)
all_findings = []
for s in scans:
all_findings.extend(s.findings)
return _render(
"package_detail.html",
pkg_name=pkg_name,
pkg_version=pkg_version,
scans=scans,
findings=all_findings,
request=request,
)
@router.post("/api/v1/findings/{finding_id}/analyze", response_class=HTMLResponse)
async def analyze_finding_htmx(
finding_id: int,
session: AsyncSession = Depends(get_session),
):
"""HTMX fragment: trigger LLM analysis and return styled result HTML."""
from ..config import config
from ..core.llm import analyze_finding
if not config.llm_enabled:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis is disabled</small></div>'
)
finding = await session.scalar(select(Finding).where(Finding.id == finding_id))
if not finding:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">Finding not found</small></div>',
status_code=404,
)
report = await analyze_finding(finding.data)
if report is None:
return HTMLResponse(
'<div class="llm-actions"><small class="flagged">LLM analysis failed</small></div>'
)
finding.report = report
await session.commit()
return _render("_llm_report_fragment.html", report=report)

View File

@@ -0,0 +1,190 @@
"""Nexus webhook receiver — handles component/asset webhooks."""
import hashlib
import hmac
import json
import re
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
from ..config import config
from ..constants import (
DEFAULT_ECOSYSTEM,
METADATA_PATTERNS,
PACKAGE_EXTENSIONS,
RELEVANT_WEBHOOK_ACTIONS,
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
WEBHOOK_IGNORE_NON_PACKAGE,
WEBHOOK_STATUS_ACCEPTED,
WEBHOOK_STATUS_IGNORED,
)
from ..core.harvester import harvest
from ..db.engine import get_session
from ..logging_setup import log
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
_METADATA_RE = [re.compile(p) for p in METADATA_PATTERNS]
def _is_package_asset(name: str) -> bool:
for pat in _METADATA_RE:
if pat.search(name):
return False
return name.endswith(PACKAGE_EXTENSIONS)
def _build_download_url(repo: str, asset_path: str) -> str:
base = config.nexus_url.rstrip("/")
asset_path = asset_path.strip("/")
return f"{base}/repository/{repo}/{asset_path}"
def _extract_asset_path(asset: dict) -> str | None:
for key in ("path", "name"):
val = asset.get(key)
if val:
return val
return None
def _detect_ecosystem(source: dict) -> str:
"""Detect ecosystem from asset or component format field."""
fmt = source.get("format", "").lower()
if fmt in ("pypi", "pip", "python"):
return "pypi"
if fmt in ("go", "golang"):
return "go"
if fmt in ("npm", "node"):
return "npm"
return fmt or DEFAULT_ECOSYSTEM
@router.post("/nexus")
async def nexus_webhook(
request: Request,
background_tasks: BackgroundTasks,
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
):
payload = await request.body()
if config.webhook_secret:
if not x_nexus_webhook_signature:
log.warning("Webhook rejected: missing signature header")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
)
expected = hmac.new(
config.webhook_secret.encode(), payload, hashlib.sha256
).hexdigest()
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
log.warning("Webhook rejected: invalid signature")
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature"
)
try:
data = json.loads(payload.decode("utf-8"))
except json.JSONDecodeError:
log.warning("Webhook received invalid JSON")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON"
)
action = data.get("action", "").upper()
if action not in RELEVANT_WEBHOOK_ACTIONS:
return {"status": WEBHOOK_STATUS_IGNORED, "action": action}
repository = data.get("repositoryName", "")
asset = data.get("asset")
component = data.get("component")
if asset:
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
ecosystem = _detect_ecosystem(asset)
log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
background_tasks.add_task(
_scan_in_background, download_url, repository, ecosystem, asset_path
)
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
if component:
name = component.get("name", "")
version = component.get("version", "")
if not name or not version:
return {
"status": WEBHOOK_STATUS_IGNORED,
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
}
ecosystem = _detect_ecosystem(component)
background_tasks.add_task(_scan_component, repository, name, version, ecosystem)
return {
"status": WEBHOOK_STATUS_ACCEPTED,
"component": f"{name}=={version}",
"action": action,
}
return {
"status": WEBHOOK_STATUS_IGNORED,
"reason": WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
}
async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
from ..core.nexus import nexus_get
api_path = (
f"/service/rest/v1/search"
f"?repository={repository}&name={name}&version={version}&format={ecosystem}"
)
try:
resp = await nexus_get(api_path)
resp.raise_for_status()
data = resp.json()
except Exception as e:
log.warning("Component lookup error for %s==%s: %s", name, version, e)
return
items = data.get("items", [])
if not items:
log.warning("No items found in search for %s==%s", name, version)
return
for item in items:
for asset in item.get("assets", []):
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
continue
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
log.info("Scanning component asset: %s", asset_path)
async for session in get_session():
await harvest(
download_url, repository, ecosystem, asset_path, session
)
break
async def _scan_in_background(
download_url: str,
repository: str,
format_: str,
asset_path: str,
):
try:
async for session in get_session():
await harvest(download_url, repository, format_, asset_path, session)
break
except Exception as e:
log.error("Background scan failed: %s", e)