"""Centralized constants for GuardDog Nexus. All magic strings, limits, enumerations, and shared data structures used across the codebase live here to avoid duplication and drift. """ # --------------------------------------------------------------------------- # Package handling # --------------------------------------------------------------------------- # Unified list of recognised package file extensions. # NOTE: webhooks uses this to decide whether to accept an asset; # harvester uses it to decide whether to download and scan. PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip") # Prefix used in PyPI-style asset paths ("/packages/name/ver/file") PYPI_PATH_PREFIX = "packages" NPM_PATH_PREFIX = "packages" # Metadata file patterns that should never be scanned METADATA_PATTERNS = ( r"^/?simple/", r"\.html$", r"\.json$", r"\.xml$", r"/?index\.", r"\.rss$", r"\.atom$", ) # --------------------------------------------------------------------------- # Ecosystem # --------------------------------------------------------------------------- DEFAULT_ECOSYSTEM = "pypi" # --------------------------------------------------------------------------- # Severity # --------------------------------------------------------------------------- SEVERITY_WARNING = "WARNING" SEVERITY_ERROR = "ERROR" # --------------------------------------------------------------------------- # Sorting # --------------------------------------------------------------------------- SCAN_SORT_FIELDS = { "id": "id", "package_name": "package_name", "started_at": "started_at", "status": "status", "total_findings": "total_findings", "flagged": "flagged", } PACKAGE_SORT_FIELDS = { "name": "package_name", "version": "package_version", "last_scanned_at": "started_at", "total_findings": "total_findings", "flagged": "flagged", } DEFAULT_SORT_BY_SCANS = "started_at" DEFAULT_SORT_BY_PACKAGES = "last_scanned_at" DEFAULT_SORT_DIR = "desc" # --------------------------------------------------------------------------- # Pagination # --------------------------------------------------------------------------- DEFAULT_PAGE_SIZE = 50 MAX_PAGE_SIZE = 200 DEFAULT_OFFSET = 0 WEB_PER_PAGE = 50 # --------------------------------------------------------------------------- # Dashboard limits # --------------------------------------------------------------------------- DASHBOARD_LATEST_FLAGGED_LIMIT = 8 DASHBOARD_LATEST_SCANS_LIMIT = 10 DASHBOARD_MOST_FLAGGED_LIMIT = 8 TOP_RULES_LIMIT = 10 RECENT_FLAGGED_DAYS = 7 HEATMAP_DAYS = 14 # --------------------------------------------------------------------------- # Database fields # --------------------------------------------------------------------------- MAX_PACKAGE_NAME_LENGTH = 255 MAX_PACKAGE_VERSION_LENGTH = 255 MAX_ECOSYSTEM_LENGTH = 50 SHA256_HEX_LENGTH = 64 # --------------------------------------------------------------------------- # Scanner # --------------------------------------------------------------------------- GUARDDOG_BINARY_FALLBACK = "guarddog" DEFAULT_MAX_CONCURRENT_SCANS = 4 GUARDDOG_OUTPUT_KEY = "--output-format" GUARDDOG_OUTPUT_FORMAT = "json" GUARDDOG_RESULTS_KEY = "results" SCAN_ERROR_TIMEOUT = "timeout" SCAN_ERROR_BINARY_NOT_FOUND = "guarddog_not_found" SCAN_ERROR_JSON_PARSE = "json_parse_error" SCAN_ERROR_DOWNLOAD_FAILED = "Download failed" ERROR_MESSAGE_MAX_LENGTH = 1000 SHA256_CHUNK_SIZE = 8192 # Finding data dict keys FINDING_KEYS = ("rule", "severity", "message", "location", "code") DEFAULT_FINDING_SEVERITY = SEVERITY_WARNING # --------------------------------------------------------------------------- # JSON paths (used in SQL json_extract queries) # --------------------------------------------------------------------------- JSON_PATH_RULE = "$.rule" JSON_PATH_SEVERITY = "$.severity" # --------------------------------------------------------------------------- # Webhook # --------------------------------------------------------------------------- RELEVANT_WEBHOOK_ACTIONS = {"UPDATED"} WEBHOOK_IGNORE_NON_PACKAGE = "non_package_asset" WEBHOOK_IGNORE_NO_NAME_OR_VERSION = "no_name_or_version" WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT = "no_asset_or_component" WEBHOOK_STATUS_ACCEPTED = "accepted" WEBHOOK_STATUS_IGNORED = "ignored" # --------------------------------------------------------------------------- # API # --------------------------------------------------------------------------- API_PREFIX_V1 = "/api/v1" HEALTH_PATH = "/health" STATIC_MOUNT_PATH = "/static" CSV_MEDIA_TYPE = "text/csv" # --------------------------------------------------------------------------- # LLM # --------------------------------------------------------------------------- LLM_DEFAULT_MODEL = "gpt-4o-mini" LLM_DEFAULT_API_BASE = "https://api.openai.com/v1" LLM_DEFAULT_TIMEOUT = 30 LLM_DEFAULT_TEMPERATURE = 0.3 LLM_RESPONSE_FORMAT = "json_object" LLM_ANALYSIS_SYSTEM_PROMPT = ( "You are a security analyst reviewing GuardDog findings for a software package. " "Given a finding (rule name, severity, message, code snippet, location), " "provide a concise security analysis in 2-3 paragraphs. " "Assess whether this is likely a real threat or a false positive. " "Explain the risk, potential impact, and recommend an action. " "Be specific about the code pattern found and its implications. " "Respond in JSON with keys: verdict (safe|suspicious|malicious), " "summary (1-line verdict), analysis (2-3 paragraphs), " "and severity_rating (low|medium|high|critical)." ) # --------------------------------------------------------------------------- # Application metadata # --------------------------------------------------------------------------- APP_NAME = "GuardDog Nexus" APP_DESCRIPTION = "Scan PyPI packages from Sonatype Nexus webhooks using GuardDog" APP_PACKAGE = "guarddog_nexus" APP_VERSION = "0.1.0" # --------------------------------------------------------------------------- # HTTP # --------------------------------------------------------------------------- HTTP_TIMEOUT_DOWNLOAD = 120 HTTP_TIMEOUT_API = 30