feat: поддержка Go и npm экосистем

- setup-nexus.sh: создание go-proxy (proxy.golang.org) и npm-proxy (registry.npmjs.org)
- nexus_client.py: extract_go_info() и extract_npm_info() для парсинга путей
  Go:  packages/github.com/gorilla/mux/@v/v1.8.0.zip → name=github.com/gorilla/mux ver=v1.8.0
  npm: packages/lodash/-/lodash-4.17.21.tgz → name=lodash ver=4.17.21
- nexus_client.py: EXTRACTORS dict + extract_package_info() универсальный extractor
- webhooks.py: _detect_ecosystem() — определяет экосистему из asset.format
- harvester.py: использует extract_package_info() вместо extract_pypi_info()
- Всё в Docker-контейнере, на хосте ничего не ставится
- GuardDog поддерживает go и npm из коробки
This commit is contained in:
Marker689
2026-05-10 06:29:34 +03:00
parent 646a50d01a
commit 6523f55dcd
5 changed files with 134 additions and 40 deletions

View File

@@ -15,6 +15,7 @@ PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file") # Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
PYPI_PATH_PREFIX = "packages" PYPI_PATH_PREFIX = "packages"
NPM_PATH_PREFIX = "packages"
# Metadata file patterns that should never be scanned # Metadata file patterns that should never be scanned
METADATA_PATTERNS = ( METADATA_PATTERNS = (

View File

@@ -18,7 +18,7 @@ from guarddog_nexus.constants import (
) )
from guarddog_nexus.logging_setup import log from guarddog_nexus.logging_setup import log
from guarddog_nexus.models import Finding, Scan, ScanStatus from guarddog_nexus.models import Finding, Scan, ScanStatus
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_package_info
from guarddog_nexus.scanner import scan_package from guarddog_nexus.scanner import scan_package
# Per-URL locks to avoid parallel scans of the same asset # Per-URL locks to avoid parallel scans of the same asset
@@ -36,14 +36,14 @@ async def harvest(
asset_path: str, asset_path: str,
session: AsyncSession, session: AsyncSession,
) -> Scan | None: ) -> Scan | None:
ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_ ecosystem = format_ if format_ else DEFAULT_ECOSYSTEM
filename = os.path.basename(download_url.split("?")[0]) filename = os.path.basename(download_url.split("?")[0])
if not filename.endswith(PACKAGE_EXTENSIONS): if not filename.endswith(PACKAGE_EXTENSIONS):
log.info("Skipping non-package asset: %s", filename) log.info("Skipping non-package asset: %s", filename)
return None return None
info = extract_pypi_info(asset_path) info = extract_package_info(asset_path, ecosystem)
if info is None: if info is None:
log.warning("Could not parse package info from path: %s", asset_path) log.warning("Could not parse package info from path: %s", asset_path)
return None return None

View File

@@ -7,6 +7,7 @@ import httpx
from guarddog_nexus.config import config from guarddog_nexus.config import config
from guarddog_nexus.constants import ( from guarddog_nexus.constants import (
NPM_PATH_PREFIX,
PYPI_PATH_PREFIX, PYPI_PATH_PREFIX,
SHA256_CHUNK_SIZE, SHA256_CHUNK_SIZE,
) )
@@ -24,6 +25,70 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
return None return None
def extract_go_info(asset_path: str) -> tuple[str, str] | None:
"""Extract module and version from a Go proxy asset path.
Path format: packages/github.com/gin-gonic/gin/@v/v1.9.0.zip
"""
cleaned = asset_path.strip("/")
# Find @v/ marker
idx = cleaned.find("/@v/")
if idx == -1:
return None
if cleaned.startswith(PYPI_PATH_PREFIX + "/"):
module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx]
else:
module = cleaned[:idx]
if not module:
return None
# Version: after @v/ up to the next / or end
ver_start = idx + 4 # len("/@v/")
rest = cleaned[ver_start:]
version = rest.split("/")[0] if "/" in rest else rest
if version.endswith(".zip"):
version = version[:-4]
return module, version
def extract_npm_info(asset_path: str) -> tuple[str, str] | None:
"""Extract package name and version from an npm proxy asset path.
Path format: packages/react/-/react-18.2.0.tgz
"""
parts = asset_path.strip("/").split("/")
if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX:
return None
name = parts[1]
# Last segment: <name>-<version>.tgz
last = parts[-1]
if last.startswith(name + "-"):
raw = last[len(name) + 1 :]
for ext in (".tgz", ".tar.gz"):
if raw.endswith(ext):
return name, raw[: -len(ext)]
return None
# Map of ecosystem → extractor function
EXTRACTORS = {
"pypi": extract_pypi_info,
"go": extract_go_info,
"npm": extract_npm_info,
}
def extract_package_info(asset_path: str, ecosystem: str) -> tuple[str, str] | None:
"""Extract package name and version based on ecosystem."""
extractor = EXTRACTORS.get(ecosystem)
if extractor:
return extractor(asset_path)
# Fallback for unknown ecosystems — try simple parts split
parts = asset_path.strip("/").split("/")
if len(parts) >= 3:
return parts[1], parts[2]
return None
async def download_asset(download_url: str, dest_dir: str) -> str | None: async def download_asset(download_url: str, dest_dir: str) -> str | None:
"""Download an asset from Nexus using async httpx.""" """Download an asset from Nexus using async httpx."""
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))

View File

@@ -49,6 +49,18 @@ def _extract_asset_path(asset: dict) -> str | None:
return None return None
def _detect_ecosystem(source: dict) -> str:
"""Detect ecosystem from asset or component format field."""
fmt = source.get("format", "").lower()
if fmt in ("pypi", "pip", "python"):
return "pypi"
if fmt in ("go", "golang"):
return "go"
if fmt in ("npm", "node"):
return "npm"
return fmt or DEFAULT_ECOSYSTEM
@router.post("/nexus") @router.post("/nexus")
async def nexus_webhook( async def nexus_webhook(
request: Request, request: Request,
@@ -96,11 +108,12 @@ async def nexus_webhook(
download_url = asset.get("downloadUrl") or _build_download_url( download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path repository, asset_path
) )
ecosystem = _detect_ecosystem(asset)
log.info("Webhook: %s asset %s in %s", action, asset_path, repository) log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
background_tasks.add_task( background_tasks.add_task(
_scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path _scan_in_background, download_url, repository, ecosystem, asset_path
) )
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
@@ -113,7 +126,8 @@ async def nexus_webhook(
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION, "reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
} }
background_tasks.add_task(_scan_component, repository, name, version) ecosystem = _detect_ecosystem(component)
background_tasks.add_task(_scan_component, repository, name, version, ecosystem)
return { return {
"status": WEBHOOK_STATUS_ACCEPTED, "status": WEBHOOK_STATUS_ACCEPTED,
"component": f"{name}=={version}", "component": f"{name}=={version}",
@@ -126,12 +140,12 @@ async def nexus_webhook(
} }
async def _scan_component(repository: str, name: str, version: str): async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
from guarddog_nexus.nexus_client import nexus_get from guarddog_nexus.nexus_client import nexus_get
api_path = ( api_path = (
f"/service/rest/v1/search" f"/service/rest/v1/search"
f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}" f"?repository={repository}&name={name}&version={version}&format={ecosystem}"
) )
try: try:
resp = await nexus_get(api_path) resp = await nexus_get(api_path)
@@ -157,7 +171,7 @@ async def _scan_component(repository: str, name: str, version: str):
log.info("Scanning component asset: %s", asset_path) log.info("Scanning component asset: %s", asset_path)
async for session in get_session(): async for session in get_session():
await harvest( await harvest(
download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session download_url, repository, ecosystem, asset_path, session
) )
break break

View File

@@ -30,37 +30,51 @@ if [ -f /nexus-data/admin.password ]; then
echo "Using initial admin password from volume" echo "Using initial admin password from volume"
fi fi
echo "Creating PyPI proxy repository..." # Common proxy config
curl -sf -u "admin:${AUTH_PASS}" \ _proxy_config() {
-H "Content-Type: application/json" \ cat <<EOF
-X POST \ {
-d "{ "name": "$1",
\"name\": \"pypi-proxy\", "online": true,
\"online\": true, "storage": {
\"storage\": { "blobStoreName": "default",
\"blobStoreName\": \"default\", "strictContentTypeValidation": true
\"strictContentTypeValidation\": true },
}, "proxy": {
\"proxy\": { "remoteUrl": "$2",
\"remoteUrl\": \"https://pypi.org\", "contentMaxAge": 1440,
\"contentMaxAge\": 1440, "metadataMaxAge": 1440
\"metadataMaxAge\": 1440 },
}, "negativeCache": {
\"negativeCache\": { "enabled": true,
\"enabled\": true, "timeToLive": 1440
\"timeToLive\": 1440 },
}, "httpClient": {
\"httpClient\": { "blocked": false,
\"blocked\": false, "autoBlock": true,
\"autoBlock\": true, "connection": {
\"connection\": { "timeout": 60,
\"timeout\": 60, "retries": 3
\"retries\": 3
}
} }
}" \ }
"${NEXUS_URL}/service/rest/v1/repositories/pypi/proxy" 2>/dev/null || \ }
echo "Proxy repo may already exist, continuing..." EOF
}
_create_proxy() {
local name="$1" remote="$2" format="$3"
echo "Creating $name ($format proxy → $remote)..."
curl -sf -u "admin:${AUTH_PASS}" \
-H "Content-Type: application/json" \
-X POST \
-d "$(_proxy_config "$name" "$remote")" \
"${NEXUS_URL}/service/rest/v1/repositories/${format}/proxy" 2>/dev/null && \
echo " OK" || echo " already exists or failed, continuing..."
}
_create_proxy "pypi-proxy" "https://pypi.org" "pypi"
_create_proxy "go-proxy" "https://proxy.golang.org" "go"
_create_proxy "npm-proxy" "https://registry.npmjs.org" "npm"
echo "" echo ""
echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition." echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition."
@@ -68,7 +82,7 @@ echo "In Nexus Pro, configure:"
echo " Capability: Webhook: Repository" echo " Capability: Webhook: Repository"
echo " URL: ${WEBHOOK_URL}" echo " URL: ${WEBHOOK_URL}"
echo " Event types: repository.component, repository.asset" echo " Event types: repository.component, repository.asset"
echo " Repository filter: pypi-proxy" echo " Repository filter: pypi-proxy, go-proxy, npm-proxy"
echo "" echo ""
if [ -f /nexus-data/admin.password ]; then if [ -f /nexus-data/admin.password ]; then