refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
@@ -2,20 +2,22 @@
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_ECOSYSTEM,
|
||||
ERROR_MESSAGE_MAX_LENGTH,
|
||||
PACKAGE_EXTENSIONS,
|
||||
SCAN_ERROR_DOWNLOAD_FAILED,
|
||||
)
|
||||
from guarddog_nexus.logging_setup import log
|
||||
from guarddog_nexus.models import Finding, Scan, ScanStatus
|
||||
from guarddog_nexus.nexus_client import (
|
||||
SUPPORTED_EXTENSIONS,
|
||||
compute_sha256,
|
||||
download_asset,
|
||||
extract_pypi_info,
|
||||
)
|
||||
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
||||
from guarddog_nexus.scanner import scan_package
|
||||
|
||||
|
||||
@@ -26,11 +28,10 @@ async def harvest(
|
||||
asset_path: str,
|
||||
session: AsyncSession,
|
||||
) -> Scan | None:
|
||||
"""Download, scan, and store results for a single package asset."""
|
||||
ecosystem = "pypi" if format_ in ("pypi",) else format_
|
||||
ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_
|
||||
|
||||
filename = os.path.basename(download_url.split("?")[0])
|
||||
if not filename.endswith(SUPPORTED_EXTENSIONS):
|
||||
if not filename.endswith(PACKAGE_EXTENSIONS):
|
||||
log.info("Skipping non-package asset: %s", filename)
|
||||
return None
|
||||
|
||||
@@ -73,7 +74,7 @@ async def harvest(
|
||||
downloaded = await download_asset(download_url, tmpdir)
|
||||
if not downloaded:
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = "Download failed"
|
||||
scan.error_message = SCAN_ERROR_DOWNLOAD_FAILED
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
@@ -103,9 +104,12 @@ async def harvest(
|
||||
result = await scan_package(downloaded, ecosystem)
|
||||
|
||||
findings_list = result.get("findings", [])
|
||||
created_findings: list[Finding] = []
|
||||
|
||||
for fdata in findings_list:
|
||||
session.add(Finding(scan_id=scan.id, data=fdata))
|
||||
f = Finding(scan_id=scan.id, data=fdata)
|
||||
session.add(f)
|
||||
created_findings.append(f)
|
||||
|
||||
scan.total_findings = len(findings_list)
|
||||
scan.flagged = len(findings_list) > 0
|
||||
@@ -113,7 +117,24 @@ async def harvest(
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
# Refresh to get IDs
|
||||
for f in created_findings:
|
||||
await session.refresh(f)
|
||||
|
||||
# Auto-trigger LLM analysis for flagged packages
|
||||
llm_reports = []
|
||||
if scan.flagged and config.llm_enabled:
|
||||
llm_reports = await _run_llm_analysis(created_findings, session)
|
||||
|
||||
if scan.flagged:
|
||||
extra = {
|
||||
"scan_id": scan.id,
|
||||
"package": f"{package_name}=={package_version}",
|
||||
"findings_count": scan.total_findings,
|
||||
"repository": repository,
|
||||
}
|
||||
if llm_reports:
|
||||
extra["llm_analysis"] = llm_reports
|
||||
log.warning(
|
||||
"FLAGGED %s==%s: %d findings in repo %s",
|
||||
package_name,
|
||||
@@ -121,6 +142,13 @@ async def harvest(
|
||||
scan.total_findings,
|
||||
repository,
|
||||
)
|
||||
if llm_reports:
|
||||
log.info(
|
||||
"LLM analysis complete for %s==%s: %d reports",
|
||||
package_name,
|
||||
package_version,
|
||||
len(llm_reports),
|
||||
)
|
||||
|
||||
log.info(
|
||||
"Scan complete: %s==%s (%d findings)",
|
||||
@@ -133,7 +161,24 @@ async def harvest(
|
||||
except Exception as e:
|
||||
log.error("Scan failed for %s==%s: %s", package_name, package_version, e)
|
||||
scan.status = ScanStatus.FAILED.value
|
||||
scan.error_message = str(e)[:1000]
|
||||
scan.error_message = str(e)[:ERROR_MESSAGE_MAX_LENGTH]
|
||||
scan.finished_at = datetime.datetime.now(datetime.timezone.utc)
|
||||
await session.commit()
|
||||
return scan
|
||||
|
||||
finally:
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
|
||||
async def _run_llm_analysis(findings: list[Finding], session: AsyncSession) -> list[dict]:
|
||||
"""Run LLM analysis on findings and persist reports to the database."""
|
||||
from guarddog_nexus.llm import analyze_finding
|
||||
|
||||
reports = []
|
||||
for finding in findings:
|
||||
report = await analyze_finding(finding.data)
|
||||
if report:
|
||||
finding.report = report
|
||||
reports.append(report)
|
||||
await session.commit()
|
||||
return reports
|
||||
|
||||
Reference in New Issue
Block a user