Files
guarddog-nexus/guarddog_nexus/constants.py
Marker689 834138368a refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
  (104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
  build_package_list_query, get_dashboard_stats)
  Убрана дупликация между api/*.py и web/routes.py (~90%)

- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
  log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
  из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
  из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
  константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants

## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
  (htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious

## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
2026-05-10 04:37:07 +03:00

182 lines
5.9 KiB
Python

"""Centralized constants for GuardDog Nexus.
All magic strings, limits, enumerations, and shared data structures
used across the codebase live here to avoid duplication and drift.
"""
# ---------------------------------------------------------------------------
# Package handling
# ---------------------------------------------------------------------------
# Unified list of recognised package file extensions.
# NOTE: webhooks uses this to decide whether to accept an asset;
# harvester uses it to decide whether to download and scan.
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
PYPI_PATH_PREFIX = "packages"
# Metadata file patterns that should never be scanned
METADATA_PATTERNS = (
r"^/?simple/",
r"\.html$",
r"\.json$",
r"\.xml$",
r"/?index\.",
r"\.rss$",
r"\.atom$",
)
# ---------------------------------------------------------------------------
# Ecosystem
# ---------------------------------------------------------------------------
DEFAULT_ECOSYSTEM = "pypi"
# ---------------------------------------------------------------------------
# Severity
# ---------------------------------------------------------------------------
SEVERITY_WARNING = "WARNING"
SEVERITY_ERROR = "ERROR"
# ---------------------------------------------------------------------------
# Sorting
# ---------------------------------------------------------------------------
SCAN_SORT_FIELDS = {
"id": "id",
"package_name": "package_name",
"started_at": "started_at",
"status": "status",
"total_findings": "total_findings",
"flagged": "flagged",
}
PACKAGE_SORT_FIELDS = {
"name": "package_name",
"version": "package_version",
"last_scanned_at": "started_at",
"total_findings": "total_findings",
"flagged": "flagged",
}
DEFAULT_SORT_BY_SCANS = "started_at"
DEFAULT_SORT_BY_PACKAGES = "last_scanned_at"
DEFAULT_SORT_DIR = "desc"
# ---------------------------------------------------------------------------
# Pagination
# ---------------------------------------------------------------------------
DEFAULT_PAGE_SIZE = 50
MAX_PAGE_SIZE = 200
DEFAULT_OFFSET = 0
WEB_PER_PAGE = 50
# ---------------------------------------------------------------------------
# Dashboard limits
# ---------------------------------------------------------------------------
DASHBOARD_LATEST_FLAGGED_LIMIT = 8
DASHBOARD_LATEST_SCANS_LIMIT = 10
DASHBOARD_MOST_FLAGGED_LIMIT = 8
TOP_RULES_LIMIT = 10
RECENT_FLAGGED_DAYS = 7
HEATMAP_DAYS = 14
# ---------------------------------------------------------------------------
# Database fields
# ---------------------------------------------------------------------------
MAX_PACKAGE_NAME_LENGTH = 255
MAX_PACKAGE_VERSION_LENGTH = 255
MAX_ECOSYSTEM_LENGTH = 50
SHA256_HEX_LENGTH = 64
# ---------------------------------------------------------------------------
# Scanner
# ---------------------------------------------------------------------------
GUARDDOG_BINARY_FALLBACK = "guarddog"
GUARDDOG_OUTPUT_KEY = "--output-format"
GUARDDOG_OUTPUT_FORMAT = "json"
GUARDDOG_RESULTS_KEY = "results"
GUARDDOG_ERRORS_KEY = "errors"
SCAN_ERROR_TIMEOUT = "timeout"
SCAN_ERROR_BINARY_NOT_FOUND = "guarddog_not_found"
SCAN_ERROR_JSON_PARSE = "json_parse_error"
SCAN_ERROR_DOWNLOAD_FAILED = "Download failed"
ERROR_MESSAGE_MAX_LENGTH = 1000
SHA256_CHUNK_SIZE = 8192
# Finding data dict keys
FINDING_KEYS = ("rule", "severity", "message", "location", "code")
DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING
# ---------------------------------------------------------------------------
# JSON paths (used in SQL json_extract queries)
# ---------------------------------------------------------------------------
JSON_PATH_RULE = "$.rule"
JSON_PATH_SEVERITY = "$.severity"
# ---------------------------------------------------------------------------
# Webhook
# ---------------------------------------------------------------------------
RELEVANT_WEBHOOK_ACTIONS = {"CREATED", "UPDATED"}
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT = "no_asset_or_component"
WEBHOOK_STATUS_ACCEPTED = "accepted"
WEBHOOK_STATUS_IGNORED = "ignored"
# ---------------------------------------------------------------------------
# API
# ---------------------------------------------------------------------------
API_PREFIX_V1 = "/api/v1"
HEALTH_PATH = "/health"
STATIC_MOUNT_PATH = "/static"
CSV_MEDIA_TYPE = "text/csv"
# ---------------------------------------------------------------------------
# LLM
# ---------------------------------------------------------------------------
LLM_DEFAULT_MODEL = "gpt-4o-mini"
LLM_DEFAULT_API_BASE = "https://api.openai.com/v1"
LLM_DEFAULT_TIMEOUT = 30
LLM_ANALYSIS_SYSTEM_PROMPT = (
"You are a security analyst reviewing GuardDog findings for a Python package. "
"Given a finding (rule name, severity, message, code snippet, location), "
"provide a concise security analysis in 2-3 paragraphs. "
"Assess whether this is likely a real threat or a false positive. "
"Explain the risk, potential impact, and recommend an action. "
"Be specific about the code pattern found and its implications. "
"Respond in JSON with keys: verdict (safe|suspicious|malicious), "
"summary (1-line verdict), analysis (2-3 paragraphs), "
"and severity_rating (low|medium|high|critical)."
)
# ---------------------------------------------------------------------------
# Application metadata
# ---------------------------------------------------------------------------
APP_NAME = "GuardDog Nexus"
APP_DESCRIPTION = "Scan PyPI packages from Sonatype Nexus webhooks using GuardDog"
APP_PACKAGE = "guarddog_nexus"
# ---------------------------------------------------------------------------
# HTTP
# ---------------------------------------------------------------------------
HTTP_TIMEOUT_DOWNLOAD = 120
HTTP_TIMEOUT_API = 30