refactor: вынос хардкода + LLM-анализ finding'ов
## Часть A: Вынос хардкода
- Новый модуль constants.py — все magic strings, лимиты, severity, ключи
(104 хардкод-значения централизованы)
- Новый модуль queries.py — общие SQL-запросы (build_scan_list_query,
build_package_list_query, get_dashboard_stats)
Убрана дупликация между api/*.py и web/routes.py (~90%)
- config.py: добавлены NLP_ENABLED, nexus_timeout, guarddog_binary,
log_syslog_facility, LLM-переменные
- nexus_client.py: таймауты из конфига, SHA256_CHUNK_SIZE из constants
- scanner.py: error-ключи из constants, GUARDDOG_OUTPUT_FORMAT из constants
- webhooks.py: RELEVANT_WEBHOOK_ACTIONS, METADATA_PATTERNS, ignore-строки
из constants
- logging_setup.py: конфигурируемый syslog facility, APP_PACKAGE из constants
- main.py: APP_NAME, APP_DESCRIPTION, APP_PACKAGE из constants
- models.py: поле report: JSON | None в Finding для LLM-отчётов
- harvester.py: авто-очистка tmpdir через finally; ERROR_MESSAGE_MAX_LENGTH
из constants; PACKAGE_EXTENSIONS вместо SUPPORTED_EXTENSIONS (с .gem)
- api/*.py + web/routes.py: используют build_*_query из queries.py,
константы для лимитов и сортировок
- tests/conftest.py: SEVERITY_WARNING, DEFAULT_ECOSYSTEM из constants
## Часть B: LLM-анализ finding'ов
- llm.py: клиент для OpenAI-совместимых API с промптом security-аналитика
- harvester.py: авто-триггер после flagged scan, сохранение report в БД
- api/findings.py: POST /{id}/analyze — ручной триггер
- web/routes.py: POST /api/v1/findings/{id}/analyze — HTMX-фрагмент
- _llm_report_fragment.html: шаблон фрагмента с вердиктом
- scan_detail.html, package_detail.html: кнопка Analyze with LLM
(htmx-post, spinner, inline-замена на LLM-отчёт)
- style.css: стили для .llm-report .verdict-safe/suspicious/malicious
## Часть C: Тесты
- 50 тестов, все зелёные
- Линтер чистый
- Тесты используют constants где нужно
This commit is contained in:
@@ -8,32 +8,28 @@ import re
|
||||
from fastapi import APIRouter, BackgroundTasks, Header, HTTPException, Request, status
|
||||
|
||||
from guarddog_nexus.config import config
|
||||
from guarddog_nexus.constants import (
|
||||
DEFAULT_ECOSYSTEM,
|
||||
METADATA_PATTERNS,
|
||||
PACKAGE_EXTENSIONS,
|
||||
RELEVANT_WEBHOOK_ACTIONS,
|
||||
WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
|
||||
WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
||||
WEBHOOK_IGNORE_NON_PACKAGE,
|
||||
WEBHOOK_STATUS_ACCEPTED,
|
||||
WEBHOOK_STATUS_IGNORED,
|
||||
)
|
||||
from guarddog_nexus.database import get_session
|
||||
from guarddog_nexus.harvester import harvest
|
||||
from guarddog_nexus.logging_setup import log
|
||||
|
||||
router = APIRouter(prefix="/webhooks", tags=["webhooks"])
|
||||
|
||||
RELEVANT_ACTIONS = {"CREATED", "UPDATED"}
|
||||
|
||||
METADATA_PATTERNS = [
|
||||
re.compile(p)
|
||||
for p in [
|
||||
r"^/?simple/",
|
||||
r"\.html$",
|
||||
r"\.json$",
|
||||
r"\.xml$",
|
||||
r"/?index\.",
|
||||
r"\.rss$",
|
||||
r"\.atom$",
|
||||
]
|
||||
]
|
||||
|
||||
PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
||||
_METADATA_RE = [re.compile(p) for p in METADATA_PATTERNS]
|
||||
|
||||
|
||||
def _is_package_asset(name: str) -> bool:
|
||||
for pat in METADATA_PATTERNS:
|
||||
for pat in _METADATA_RE:
|
||||
if pat.search(name):
|
||||
return False
|
||||
return name.endswith(PACKAGE_EXTENSIONS)
|
||||
@@ -41,7 +37,7 @@ def _is_package_asset(name: str) -> bool:
|
||||
|
||||
def _build_download_url(repo: str, asset_path: str) -> str:
|
||||
base = config.nexus_url.rstrip("/")
|
||||
asset_path = asset_path.lstrip("/")
|
||||
asset_path = asset_path.strip("/")
|
||||
return f"{base}/repository/{repo}/{asset_path}"
|
||||
|
||||
|
||||
@@ -60,7 +56,6 @@ async def nexus_webhook(
|
||||
x_nexus_webhook_signature: str | None = Header(None, alias="X-Nexus-Webhook-Signature"),
|
||||
):
|
||||
payload = await request.body()
|
||||
payload_str = payload.decode("utf-8")
|
||||
|
||||
if config.webhook_secret:
|
||||
if not x_nexus_webhook_signature:
|
||||
@@ -68,58 +63,75 @@ async def nexus_webhook(
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing signature"
|
||||
)
|
||||
expected = hmac.new(config.webhook_secret.encode(), payload, hashlib.sha256).hexdigest()
|
||||
expected = hmac.new(
|
||||
config.webhook_secret.encode(), payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
if not hmac.compare_digest(x_nexus_webhook_signature, expected):
|
||||
log.warning("Webhook rejected: invalid signature")
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN, detail="Invalid signature"
|
||||
)
|
||||
|
||||
try:
|
||||
data = json.loads(payload_str)
|
||||
data = json.loads(payload.decode("utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
log.warning("Webhook received invalid JSON")
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON"
|
||||
)
|
||||
|
||||
action = data.get("action", "").upper()
|
||||
if action not in RELEVANT_ACTIONS:
|
||||
return {"status": "ignored", "action": action}
|
||||
if action not in RELEVANT_WEBHOOK_ACTIONS:
|
||||
return {"status": WEBHOOK_STATUS_IGNORED, "action": action}
|
||||
|
||||
repository = data.get("repositoryName", "")
|
||||
|
||||
asset = data.get("asset")
|
||||
component = data.get("component")
|
||||
|
||||
if asset:
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
return {"status": "ignored", "reason": "non_package_asset"}
|
||||
return {"status": WEBHOOK_STATUS_IGNORED, "reason": WEBHOOK_IGNORE_NON_PACKAGE}
|
||||
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(
|
||||
repository, asset_path
|
||||
)
|
||||
|
||||
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
|
||||
|
||||
background_tasks.add_task(_scan_in_background, download_url, repository, "pypi", asset_path)
|
||||
return {"status": "accepted", "asset": asset_path, "action": action}
|
||||
background_tasks.add_task(
|
||||
_scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path
|
||||
)
|
||||
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
|
||||
|
||||
if component:
|
||||
name = component.get("name", "")
|
||||
version = component.get("version", "")
|
||||
if not name or not version:
|
||||
return {"status": "ignored", "reason": "no_name_or_version"}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_IGNORED,
|
||||
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
||||
}
|
||||
|
||||
# For component events, look up assets via Nexus REST API
|
||||
background_tasks.add_task(_scan_component, repository, name, version)
|
||||
return {"status": "accepted", "component": f"{name}=={version}", "action": action}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_ACCEPTED,
|
||||
"component": f"{name}=={version}",
|
||||
"action": action,
|
||||
}
|
||||
|
||||
return {"status": "ignored", "reason": "no_asset_or_component"}
|
||||
return {
|
||||
"status": WEBHOOK_STATUS_IGNORED,
|
||||
"reason": WEBHOOK_IGNORE_NO_ASSET_OR_COMPONENT,
|
||||
}
|
||||
|
||||
|
||||
async def _scan_component(repository: str, name: str, version: str):
|
||||
"""Look up component assets via Nexus API, then scan each package file."""
|
||||
|
||||
from guarddog_nexus.nexus_client import nexus_get
|
||||
|
||||
api_path = (
|
||||
f"/service/rest/v1/search?repository={repository}&name={name}&version={version}&format=pypi"
|
||||
f"/service/rest/v1/search"
|
||||
f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}"
|
||||
)
|
||||
try:
|
||||
resp = await nexus_get(api_path)
|
||||
@@ -139,10 +151,14 @@ async def _scan_component(repository: str, name: str, version: str):
|
||||
asset_path = _extract_asset_path(asset)
|
||||
if not asset_path or not _is_package_asset(asset_path):
|
||||
continue
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
|
||||
download_url = asset.get("downloadUrl") or _build_download_url(
|
||||
repository, asset_path
|
||||
)
|
||||
log.info("Scanning component asset: %s", asset_path)
|
||||
async for session in get_session():
|
||||
await harvest(download_url, repository, "pypi", asset_path, session)
|
||||
await harvest(
|
||||
download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session
|
||||
)
|
||||
break
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user