Files
guarddog-nexus/guarddog_nexus/constants.py
Marker689 d23abe8b4b feat: лимит конкурентных сканов через asyncio.Semaphore
- config.py: MAX_CONCURRENT_SCANS (default=4)
- harvester.py: глобальный _scan_semaphore оборачивает scan_package()
  — при N одновременных сканах, (N+1)-й будет ждать освобождения слота
  — download и SHA256 не лимитируются, только guarddog subprocess
- docker-compose.yml, .env.example: переменная добавлена
2026-05-10 05:52:23 +03:00

183 lines
5.9 KiB
Python

"""Centralized constants for GuardDog Nexus.
All magic strings, limits, enumerations, and shared data structures
used across the codebase live here to avoid duplication and drift.
"""
# ---------------------------------------------------------------------------
# Package handling
# ---------------------------------------------------------------------------
# Unified list of recognised package file extensions.
# NOTE: webhooks uses this to decide whether to accept an asset;
# harvester uses it to decide whether to download and scan.
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
PYPI_PATH_PREFIX = "packages"
# Metadata file patterns that should never be scanned
METADATA_PATTERNS = (
r"^/?simple/",
r"\.html$",
r"\.json$",
r"\.xml$",
r"/?index\.",
r"\.rss$",
r"\.atom$",
)
# ---------------------------------------------------------------------------
# Ecosystem
# ---------------------------------------------------------------------------
DEFAULT_ECOSYSTEM = "pypi"
# ---------------------------------------------------------------------------
# Severity
# ---------------------------------------------------------------------------
SEVERITY_WARNING = "WARNING"
SEVERITY_ERROR = "ERROR"
# ---------------------------------------------------------------------------
# Sorting
# ---------------------------------------------------------------------------
SCAN_SORT_FIELDS = {
"id": "id",
"package_name": "package_name",
"started_at": "started_at",
"status": "status",
"total_findings": "total_findings",
"flagged": "flagged",
}
PACKAGE_SORT_FIELDS = {
"name": "package_name",
"version": "package_version",
"last_scanned_at": "started_at",
"total_findings": "total_findings",
"flagged": "flagged",
}
DEFAULT_SORT_BY_SCANS = "started_at"
DEFAULT_SORT_BY_PACKAGES = "last_scanned_at"
DEFAULT_SORT_DIR = "desc"
# ---------------------------------------------------------------------------
# Pagination
# ---------------------------------------------------------------------------
DEFAULT_PAGE_SIZE = 50
MAX_PAGE_SIZE = 200
DEFAULT_OFFSET = 0
WEB_PER_PAGE = 50
# ---------------------------------------------------------------------------
# Dashboard limits
# ---------------------------------------------------------------------------
DASHBOARD_LATEST_FLAGGED_LIMIT = 8
DASHBOARD_LATEST_SCANS_LIMIT = 10
DASHBOARD_MOST_FLAGGED_LIMIT = 8
TOP_RULES_LIMIT = 10
RECENT_FLAGGED_DAYS = 7
HEATMAP_DAYS = 14
# ---------------------------------------------------------------------------
# Database fields
# ---------------------------------------------------------------------------
MAX_PACKAGE_NAME_LENGTH = 255
MAX_PACKAGE_VERSION_LENGTH = 255
MAX_ECOSYSTEM_LENGTH = 50
SHA256_HEX_LENGTH = 64
# ---------------------------------------------------------------------------
# Scanner
# ---------------------------------------------------------------------------
GUARDDOG_BINARY_FALLBACK = "guarddog"
DEFAULT_MAX_CONCURRENT_SCANS = 4
GUARDDOG_OUTPUT_KEY = "--output-format"
GUARDDOG_OUTPUT_FORMAT = "json"
GUARDDOG_RESULTS_KEY = "results"
GUARDDOG_ERRORS_KEY = "errors"
SCAN_ERROR_TIMEOUT = "timeout"
SCAN_ERROR_BINARY_NOT_FOUND = "guarddog_not_found"
SCAN_ERROR_JSON_PARSE = "json_parse_error"
SCAN_ERROR_DOWNLOAD_FAILED = "Download failed"
ERROR_MESSAGE_MAX_LENGTH = 1000
SHA256_CHUNK_SIZE = 8192
# Finding data dict keys
FINDING_KEYS = ("rule", "severity", "message", "location", "code")
DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING
# ---------------------------------------------------------------------------
# JSON paths (used in SQL json_extract queries)
# ---------------------------------------------------------------------------
JSON_PATH_RULE = "$.rule"
JSON_PATH_SEVERITY = "$.severity"
# ---------------------------------------------------------------------------
# Webhook
# ---------------------------------------------------------------------------
RELEVANT_WEBHOOK_ACTIONS = {"UPDATED"}
WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset"
WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version"
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT = "no_asset_or_component"
WEBHOOK_STATUS_ACCEPTED = "accepted"
WEBHOOK_STATUS_IGNORED = "ignored"
# ---------------------------------------------------------------------------
# API
# ---------------------------------------------------------------------------
API_PREFIX_V1 = "/api/v1"
HEALTH_PATH = "/health"
STATIC_MOUNT_PATH = "/static"
CSV_MEDIA_TYPE = "text/csv"
# ---------------------------------------------------------------------------
# LLM
# ---------------------------------------------------------------------------
LLM_DEFAULT_MODEL = "gpt-4o-mini"
LLM_DEFAULT_API_BASE = "https://api.openai.com/v1"
LLM_DEFAULT_TIMEOUT = 30
LLM_ANALYSIS_SYSTEM_PROMPT = (
"You are a security analyst reviewing GuardDog findings for a Python package. "
"Given a finding (rule name, severity, message, code snippet, location), "
"provide a concise security analysis in 2-3 paragraphs. "
"Assess whether this is likely a real threat or a false positive. "
"Explain the risk, potential impact, and recommend an action. "
"Be specific about the code pattern found and its implications. "
"Respond in JSON with keys: verdict (safe|suspicious|malicious), "
"summary (1-line verdict), analysis (2-3 paragraphs), "
"and severity_rating (low|medium|high|critical)."
)
# ---------------------------------------------------------------------------
# Application metadata
# ---------------------------------------------------------------------------
APP_NAME = "GuardDog Nexus"
APP_DESCRIPTION = "Scan PyPI packages from Sonatype Nexus webhooks using GuardDog"
APP_PACKAGE = "guarddog_nexus"
# ---------------------------------------------------------------------------
# HTTP
# ---------------------------------------------------------------------------
HTTP_TIMEOUT_DOWNLOAD = 120
HTTP_TIMEOUT_API = 30