refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
181
guarddog_nexus/constants.py
Normal file
181
guarddog_nexus/constants.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""Centralized constants for GuardDog Nexus.
|
||||
|
||||
All magic strings, limits, enumerations, and shared data structures
|
||||
used across the codebase live here to avoid duplication and drift.
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Package handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Unified list of recognised package file extensions.
|
||||
# NOTE: webhooks uses this to decide whether to accept an asset;
|
||||
# harvester uses it to decide whether to download and scan.
|
||||
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||
|
||||
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
|
||||
PYPI_PATH_PREFIX = "packages"
|
||||
|
||||
# Metadata file patterns that should never be scanned
|
||||
METADATA_PATTERNS = (
|
||||
r"^/?simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"/?index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ecosystem
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_ECOSYSTEM = "pypi"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Severity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SEVERITY_WARNING = "WARNING"
|
||||
SEVERITY_ERROR = "ERROR"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sorting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCAN_SORT_FIELDS = {
|
||||
"id": "id",
|
||||
"package_name": "package_name",
|
||||
"started_at": "started_at",
|
||||
"status": "status",
|
||||
"total_findings": "total_findings",
|
||||
"flagged": "flagged",
|
||||
}
|
||||
|
||||
PACKAGE_SORT_FIELDS = {
|
||||
"name": "package_name",
|
||||
"version": "package_version",
|
||||
"last_scanned_at": "started_at",
|
||||
"total_findings": "total_findings",
|
||||
"flagged": "flagged",
|
||||
}
|
||||
|
||||
DEFAULT_SORT_BY_SCANS = "started_at"
|
||||
DEFAULT_SORT_BY_PACKAGES = "last_scanned_at"
|
||||
DEFAULT_SORT_DIR = "desc"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pagination
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_PAGE_SIZE = 50
|
||||
MAX_PAGE_SIZE = 200
|
||||
DEFAULT_OFFSET = 0
|
||||
WEB_PER_PAGE = 50
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard limits
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DASHBOARD_LATEST_FLAGGED_LIMIT = 8
|
||||
DASHBOARD_LATEST_SCANS_LIMIT = 10
|
||||
DASHBOARD_MOST_FLAGGED_LIMIT = 8
|
||||
TOP_RULES_LIMIT = 10
|
||||
|
||||
RECENT_FLAGGED_DAYS = 7
|
||||
HEATMAP_DAYS = 14
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database fields
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MAX_PACKAGE_NAME_LENGTH = 255
|
||||
MAX_PACKAGE_VERSION_LENGTH = 255
|
||||
MAX_ECOSYSTEM_LENGTH = 50
|
||||
SHA256_HEX_LENGTH = 64
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
GUARDDOG_BINARY_FALLBACK = "guarddog"
|
||||
GUARDDOG_OUTPUT_KEY = "--output-format"
|
||||
GUARDDOG_OUTPUT_FORMAT = "json"
|
||||
GUARDDOG_RESULTS_KEY = "results"
|
||||
GUARDDOG_ERRORS_KEY = "errors"
|
||||
|
||||
SCAN_ERROR_TIMEOUT = "timeout"
|
||||
SCAN_ERROR_BINARY_NOT_FOUND = "guarddog_not_found"
|
||||
SCAN_ERROR_JSON_PARSE = "json_parse_error"
|
||||
SCAN_ERROR_DOWNLOAD_FAILED = "Download failed"
|
||||
|
||||
ERROR_MESSAGE_MAX_LENGTH = 1000
|
||||
SHA256_CHUNK_SIZE = 8192
|
||||
|
||||
# Finding data dict keys
|
||||
FINDING_KEYS = ("rule", "severity", "message", "location", "code")
|
||||
DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON paths (used in SQL json_extract queries)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
JSON_PATH_RULE = "$.rule"
|
||||
JSON_PATH_SEVERITY = "$.severity"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webhook
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
|
||||
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
|
||||
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT = "no_asset_or_component"
|
||||
WEBHOOK_STATUS_ACCEPTED = "accepted"
|
||||
WEBHOOK_STATUS_IGNORED = "ignored"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
API_PREFIX_V1 = "/api/v1"
|
||||
HEALTH_PATH = "/health"
|
||||
STATIC_MOUNT_PATH = "/static"
|
||||
|
||||
CSV_MEDIA_TYPE = "text/csv"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
LLM_DEFAULT_MODEL = "gpt-4o-mini"
|
||||
LLM_DEFAULT_API_BASE = "https://api.openai.com/v1"
|
||||
LLM_DEFAULT_TIMEOUT = 30
|
||||
LLM_ANALYSIS_SYSTEM_PROMPT = (
|
||||
"You are a security analyst reviewing GuardDog findings for a Python package. "
|
||||
"Given a finding (rule name, severity, message, code snippet, location), "
|
||||
"provide a concise security analysis in 2-3 paragraphs. "
|
||||
"Assess whether this is likely a real threat or a false positive. "
|
||||
"Explain the risk, potential impact, and recommend an action. "
|
||||
"Be specific about the code pattern found and its implications. "
|
||||
"Respond in JSON with keys: verdict (safe|suspicious|malicious), "
|
||||
"summary (1-line verdict), analysis (2-3 paragraphs), "
|
||||
"and severity_rating (low|medium|high|critical)."
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Application metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
APP_NAME = "GuardDog Nexus"
|
||||
APP_DESCRIPTION = "Scan PyPI packages from Sonatype Nexus webhooks using GuardDog"
|
||||
APP_PACKAGE = "guarddog_nexus"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HTTP_TIMEOUT_DOWNLOAD = 120
|
||||
HTTP_TIMEOUT_API = 30
|
||||
Reference in New Issue
Block a user