feat: поддержка Go и npm экосистем

- setup-nexus.sh: создание go-proxy (proxy.golang.org) и npm-proxy (registry.npmjs.org)
- nexus_client.py: extract_go_info() и extract_npm_info() для парсинга путей
  Go:  packages/github.com/gorilla/mux/@v/v1.8.0.zip → name=github.com/gorilla/mux ver=v1.8.0
  npm: packages/lodash/-/lodash-4.17.21.tgz → name=lodash ver=4.17.21
- nexus_client.py: EXTRACTORS dict + extract_package_info() универсальный extractor
- webhooks.py: _detect_ecosystem() — определяет экосистему из asset.format
- harvester.py: использует extract_package_info() вместо extract_pypi_info()
- Всё в Docker-контейнере, на хосте ничего не ставится
- GuardDog поддерживает go и npm из коробки
This commit is contained in:
Marker689
2026-05-10 06:29:34 +03:00
parent 646a50d01a
commit 6523f55dcd
5 changed files with 134 additions and 40 deletions

View File

@@ -15,6 +15,7 @@ PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
PYPI_PATH_PREFIX = "packages"
NPM_PATH_PREFIX = "packages"
# Metadata file patterns that should never be scanned
METADATA_PATTERNS = (

View File

@@ -18,7 +18,7 @@ from guarddog_nexus.constants import (
)
from guarddog_nexus.logging_setup import log
from guarddog_nexus.models import Finding, Scan, ScanStatus
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_package_info
from guarddog_nexus.scanner import scan_package
# Per-URL locks to avoid parallel scans of the same asset
@@ -36,14 +36,14 @@ async def harvest(
asset_path: str,
session: AsyncSession,
) -> Scan | None:
ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_
ecosystem = format_ if format_ else DEFAULT_ECOSYSTEM
filename = os.path.basename(download_url.split("?")[0])
if not filename.endswith(PACKAGE_EXTENSIONS):
log.info("Skipping non-package asset: %s", filename)
return None
info = extract_pypi_info(asset_path)
info = extract_package_info(asset_path, ecosystem)
if info is None:
log.warning("Could not parse package info from path: %s", asset_path)
return None

View File

@@ -7,6 +7,7 @@ import httpx
from guarddog_nexus.config import config
from guarddog_nexus.constants import (
NPM_PATH_PREFIX,
PYPI_PATH_PREFIX,
SHA256_CHUNK_SIZE,
)
@@ -24,6 +25,70 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
return None
def extract_go_info(asset_path: str) -> tuple[str, str] | None:
"""Extract module and version from a Go proxy asset path.
Path format: packages/github.com/gin-gonic/gin/@v/v1.9.0.zip
"""
cleaned = asset_path.strip("/")
# Find @v/ marker
idx = cleaned.find("/@v/")
if idx == -1:
return None
if cleaned.startswith(PYPI_PATH_PREFIX + "/"):
module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx]
else:
module = cleaned[:idx]
if not module:
return None
# Version: after @v/ up to the next / or end
ver_start = idx + 4 # len("/@v/")
rest = cleaned[ver_start:]
version = rest.split("/")[0] if "/" in rest else rest
if version.endswith(".zip"):
version = version[:-4]
return module, version
def extract_npm_info(asset_path: str) -> tuple[str, str] | None:
"""Extract package name and version from an npm proxy asset path.
Path format: packages/react/-/react-18.2.0.tgz
"""
parts = asset_path.strip("/").split("/")
if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX:
return None
name = parts[1]
# Last segment: <name>-<version>.tgz
last = parts[-1]
if last.startswith(name + "-"):
raw = last[len(name) + 1 :]
for ext in (".tgz", ".tar.gz"):
if raw.endswith(ext):
return name, raw[: -len(ext)]
return None
# Map of ecosystem → extractor function
EXTRACTORS = {
"pypi": extract_pypi_info,
"go": extract_go_info,
"npm": extract_npm_info,
}
def extract_package_info(asset_path: str, ecosystem: str) -> tuple[str, str] | None:
"""Extract package name and version based on ecosystem."""
extractor = EXTRACTORS.get(ecosystem)
if extractor:
return extractor(asset_path)
# Fallback for unknown ecosystems — try simple parts split
parts = asset_path.strip("/").split("/")
if len(parts) >= 3:
return parts[1], parts[2]
return None
async def download_asset(download_url: str, dest_dir: str) -> str | None:
"""Download an asset from Nexus using async httpx."""
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))

View File

@@ -49,6 +49,18 @@ def _extract_asset_path(asset: dict) -> str | None:
return None
def _detect_ecosystem(source: dict) -> str:
"""Detect ecosystem from asset or component format field."""
fmt = source.get("format", "").lower()
if fmt in ("pypi", "pip", "python"):
return "pypi"
if fmt in ("go", "golang"):
return "go"
if fmt in ("npm", "node"):
return "npm"
return fmt or DEFAULT_ECOSYSTEM
@router.post("/nexus")
async def nexus_webhook(
request: Request,
@@ -96,11 +108,12 @@ async def nexus_webhook(
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
ecosystem = _detect_ecosystem(asset)
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
background_tasks.add_task(
_scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path
_scan_in_background, download_url, repository, ecosystem, asset_path
)
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
@@ -113,7 +126,8 @@ async def nexus_webhook(
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
}
background_tasks.add_task(_scan_component, repository, name, version)
ecosystem = _detect_ecosystem(component)
background_tasks.add_task(_scan_component, repository, name, version, ecosystem)
return {
"status": WEBHOOK_STATUS_ACCEPTED,
"component": f"{name}=={version}",
@@ -126,12 +140,12 @@ async def nexus_webhook(
}
async def _scan_component(repository: str, name: str, version: str):
async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
from guarddog_nexus.nexus_client import nexus_get
api_path = (
f"/service/rest/v1/search"
f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}"
f"?repository={repository}&name={name}&version={version}&format={ecosystem}"
)
try:
resp = await nexus_get(api_path)
@@ -157,7 +171,7 @@ async def _scan_component(repository: str, name: str, version: str):
log.info("Scanning component asset: %s", asset_path)
async for session in get_session():
await harvest(
download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session
download_url, repository, ecosystem, asset_path, session
)
break

View File

@@ -30,37 +30,51 @@ if [ -f /nexus-data/admin.password ]; then
echo "Using initial admin password from volume"
fi
echo "Creating PyPI proxy repository..."
curl -sf -u "admin:${AUTH_PASS}" \
-H "Content-Type: application/json" \
-X POST \
-d "{
\"name\": \"pypi-proxy\",
\"online\": true,
\"storage\": {
\"blobStoreName\": \"default\",
\"strictContentTypeValidation\": true
},
\"proxy\": {
\"remoteUrl\": \"https://pypi.org\",
\"contentMaxAge\": 1440,
\"metadataMaxAge\": 1440
},
\"negativeCache\": {
\"enabled\": true,
\"timeToLive\": 1440
},
\"httpClient\": {
\"blocked\": false,
\"autoBlock\": true,
\"connection\": {
\"timeout\": 60,
\"retries\": 3
}
# Common proxy config
_proxy_config() {
cat <<EOF
{
"name": "$1",
"online": true,
"storage": {
"blobStoreName": "default",
"strictContentTypeValidation": true
},
"proxy": {
"remoteUrl": "$2",
"contentMaxAge": 1440,
"metadataMaxAge": 1440
},
"negativeCache": {
"enabled": true,
"timeToLive": 1440
},
"httpClient": {
"blocked": false,
"autoBlock": true,
"connection": {
"timeout": 60,
"retries": 3
}
}" \
"${NEXUS_URL}/service/rest/v1/repositories/pypi/proxy" 2>/dev/null || \
echo "Proxy repo may already exist, continuing..."
}
}
EOF
}
_create_proxy() {
local name="$1" remote="$2" format="$3"
echo "Creating $name ($format proxy → $remote)..."
curl -sf -u "admin:${AUTH_PASS}" \
-H "Content-Type: application/json" \
-X POST \
-d "$(_proxy_config "$name" "$remote")" \
"${NEXUS_URL}/service/rest/v1/repositories/${format}/proxy" 2>/dev/null && \
echo " OK" || echo " already exists or failed, continuing..."
}
_create_proxy "pypi-proxy" "https://pypi.org" "pypi"
_create_proxy "go-proxy" "https://proxy.golang.org" "go"
_create_proxy "npm-proxy" "https://registry.npmjs.org" "npm"
echo ""
echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition."
@@ -68,7 +82,7 @@ echo "In Nexus Pro, configure:"
echo " Capability: Webhook: Repository"
echo " URL: ${WEBHOOK_URL}"
echo " Event types: repository.component, repository.asset"
echo " Repository filter: pypi-proxy"
echo " Repository filter: pypi-proxy, go-proxy, npm-proxy"
echo ""
if [ -f /nexus-data/admin.password ]; then