feat: LLM-анализ — индикатор прогресса, кнопка рескана, статистика на дашборде

- Добавлен статус {"status": "analyzing"} в finding.report на время LLM-анализа
- Кнопка рескана (Retry) под LLM-отчётом в ручном режиме
- LLM-статистика на дашборде: analysed / pending
- Защита от двойного анализа через per-finding asyncio.Lock
- _llm_spinner.html — фрагмент спиннера для состояния analysing
- Удалён мёртвый код: constants, i18n, CSS, queries
- Фиксы: _env_int, индексы БД, UnicodeDecodeError, time.mktime и др.
- Шаблоны: shared includes (_status_badge, _pagination)
- AGENTS.md: workflow (lint, test, commit, rebuild)
This commit is contained in:
Marker689
2026-05-10 09:54:04 +03:00
parent c99a7bf67c
commit 6984844161
26 changed files with 261 additions and 266 deletions

View File

@@ -63,16 +63,20 @@ async def harvest(
return None
async with lock:
# Re-check DB in case another task already created and finished a scan
active = await session.scalar(
select(Scan.id).where(
Scan.nexus_asset_url == download_url,
Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]),
try:
# Re-check DB in case another task already created and finished a scan
active = await session.scalar(
select(Scan.id).where(
Scan.nexus_asset_url == download_url,
Scan.status.in_([ScanStatus.PENDING.value, ScanStatus.SCANNING.value]),
)
)
)
if active:
log.info("Already scanning this URL, skipping")
return None
if active:
log.info("Already scanning this URL, skipping")
return None
finally:
async with _url_lock:
_url_locks.pop(download_url, None)
scan = Scan(
package_name=package_name,
@@ -88,10 +92,9 @@ async def harvest(
await session.commit()
await session.refresh(scan)
os.makedirs(config.temp_dir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=config.temp_dir)
try:
os.makedirs(config.temp_dir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=config.temp_dir)
scan.status = ScanStatus.SCANNING.value
await session.commit()
@@ -103,7 +106,7 @@ async def harvest(
await session.commit()
return scan
scan.sha256 = compute_sha256(downloaded)
scan.sha256 = await compute_sha256(downloaded)
await session.commit()
existing = await session.scalar(
@@ -148,8 +151,12 @@ async def harvest(
# Auto-trigger LLM analysis for flagged packages
llm_reports = []
if scan.flagged and config.llm_enabled:
llm_reports = await _run_llm_analysis(created_findings, session)
if scan.flagged and config.llm_enabled and config.llm_auto_analyze:
try:
llm_reports = await _run_llm_analysis(created_findings, session)
except Exception as e:
log.error("LLM analysis failed for %s==%s: %s", package_name, package_version, e)
llm_reports = []
if scan.flagged:
extra = {
@@ -199,11 +206,18 @@ async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> l
"""Run LLM analysis on findings and persist reports to the database."""
from .llm import analyze_finding
# Mark all as analyzing so the UI shows a spinner
for finding in findings:
finding.report = {"status": "analyzing"}
await session.commit()
reports = []
for finding in findings:
report = await analyze_finding(finding.data)
if report:
finding.report = report
reports.append(report)
else:
finding.report = None
await session.commit()
return reports

View File

@@ -1,5 +1,6 @@
"""Sonatype Nexus REST API client using httpx async."""
import asyncio
import hashlib
import os
@@ -7,8 +8,7 @@ import httpx
from ..config import config
from ..constants import (
NPM_PATH_PREFIX,
PYPI_PATH_PREFIX,
PKG_PATH_PREFIX,
SHA256_CHUNK_SIZE,
)
from ..logging_setup import log
@@ -20,7 +20,7 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
Path format: packages/requests/2.31.0/requests-2.31.0.tar.gz
"""
parts = asset_path.strip("/").split("/")
if len(parts) >= 3 and parts[0] == PYPI_PATH_PREFIX:
if len(parts) >= 3 and parts[0] == PKG_PATH_PREFIX:
return parts[1], parts[2]
return None
@@ -35,8 +35,8 @@ def extract_go_info(asset_path: str) -> tuple[str, str] | None:
idx = cleaned.find("/@v/")
if idx == -1:
return None
if cleaned.startswith(PYPI_PATH_PREFIX + "/"):
module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx]
if cleaned.startswith(PKG_PATH_PREFIX + "/"):
module = cleaned[len(PKG_PATH_PREFIX) + 1 : idx]
else:
module = cleaned[:idx]
if not module:
@@ -56,7 +56,7 @@ def extract_npm_info(asset_path: str) -> tuple[str, str] | None:
Path format: packages/react/-/react-18.2.0.tgz
"""
parts = asset_path.strip("/").split("/")
if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX:
if len(parts) < 4 or parts[0] != PKG_PATH_PREFIX:
return None
name = parts[1]
# Last segment: <name>-<version>.tgz
@@ -100,14 +100,19 @@ async def download_asset(download_url: str, dest_dir: str) -> str | None:
try:
response = await client.get(download_url)
response.raise_for_status()
with open(dest_path, "wb") as f:
f.write(response.content)
content = response.content
await asyncio.to_thread(_write_file, dest_path, content)
return dest_path
except Exception as e:
log.warning("Failed to download %s: %s", download_url, e)
return None
def _write_file(path: str, content: bytes) -> None:
with open(path, "wb") as f:
f.write(content)
async def nexus_get(path: str) -> httpx.Response:
"""Make an authenticated GET request to Nexus REST API."""
auth = httpx.BasicAuth(config.nexus_username, config.nexus_password)
@@ -117,7 +122,11 @@ async def nexus_get(path: str) -> httpx.Response:
return await client.get(f"{config.nexus_url.rstrip('/')}{path}")
def compute_sha256(filepath: str) -> str:
async def compute_sha256(filepath: str) -> str:
return await asyncio.to_thread(_compute_sha256_sync, filepath)
def _compute_sha256_sync(filepath: str) -> str:
h = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(SHA256_CHUNK_SIZE), b""):

View File

@@ -43,6 +43,9 @@ async def scan_package(filepath: str, ecosystem: str = DEFAULT_ECOSYSTEM) -> dic
log.error("GuardDog exited %d: %s", proc.returncode, stderr.decode())
return {"findings": [], "errors": [stderr.decode().strip()]}
if proc.returncode == 1 and stderr:
log.warning("GuardDog stderr (exit 1): %s", stderr.decode().strip())
try:
data = json.loads(stdout.decode())
except json.JSONDecodeError:
@@ -96,6 +99,17 @@ def _normalize_output(data: dict) -> dict:
)
elif isinstance(value, dict) and not value:
continue
elif isinstance(value, dict):
# Non-empty dict — treat as a single finding
findings.append(
{
"rule": rule_name,
"severity": value.get("severity", DEFAULT_FINDING_SEVERITY),
"message": value.get("message", ""),
"location": value.get("location", ""),
"code": value.get("code", ""),
}
)
errors = data.get("errors", {})
if isinstance(errors, dict):