feat: поддержка Go и npm экосистем
- setup-nexus.sh: создание go-proxy (proxy.golang.org) и npm-proxy (registry.npmjs.org) - nexus_client.py: extract_go_info() и extract_npm_info() для парсинга путей Go: packages/github.com/gorilla/mux/@v/v1.8.0.zip → name=github.com/gorilla/mux ver=v1.8.0 npm: packages/lodash/-/lodash-4.17.21.tgz → name=lodash ver=4.17.21 - nexus_client.py: EXTRACTORS dict + extract_package_info() универсальный extractor - webhooks.py: _detect_ecosystem() — определяет экосистему из asset.format - harvester.py: использует extract_package_info() вместо extract_pypi_info() - Всё в Docker-контейнере, на хосте ничего не ставится - GuardDog поддерживает go и npm из коробки
This commit is contained in:
@@ -15,6 +15,7 @@ PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem")
|
|||||||
|
|
||||||
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
|
# Prefix used in PyPI-style asset paths ("/packages/name/ver/file")
|
||||||
PYPI_PATH_PREFIX = "packages"
|
PYPI_PATH_PREFIX = "packages"
|
||||||
|
NPM_PATH_PREFIX = "packages"
|
||||||
|
|
||||||
# Metadata file patterns that should never be scanned
|
# Metadata file patterns that should never be scanned
|
||||||
METADATA_PATTERNS = (
|
METADATA_PATTERNS = (
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from guarddog_nexus.constants import (
|
|||||||
)
|
)
|
||||||
from guarddog_nexus.logging_setup import log
|
from guarddog_nexus.logging_setup import log
|
||||||
from guarddog_nexus.models import Finding, Scan, ScanStatus
|
from guarddog_nexus.models import Finding, Scan, ScanStatus
|
||||||
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info
|
from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_package_info
|
||||||
from guarddog_nexus.scanner import scan_package
|
from guarddog_nexus.scanner import scan_package
|
||||||
|
|
||||||
# Per-URL locks to avoid parallel scans of the same asset
|
# Per-URL locks to avoid parallel scans of the same asset
|
||||||
@@ -36,14 +36,14 @@ async def harvest(
|
|||||||
asset_path: str,
|
asset_path: str,
|
||||||
session: AsyncSession,
|
session: AsyncSession,
|
||||||
) -> Scan | None:
|
) -> Scan | None:
|
||||||
ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_
|
ecosystem = format_ if format_ else DEFAULT_ECOSYSTEM
|
||||||
|
|
||||||
filename = os.path.basename(download_url.split("?")[0])
|
filename = os.path.basename(download_url.split("?")[0])
|
||||||
if not filename.endswith(PACKAGE_EXTENSIONS):
|
if not filename.endswith(PACKAGE_EXTENSIONS):
|
||||||
log.info("Skipping non-package asset: %s", filename)
|
log.info("Skipping non-package asset: %s", filename)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
info = extract_pypi_info(asset_path)
|
info = extract_package_info(asset_path, ecosystem)
|
||||||
if info is None:
|
if info is None:
|
||||||
log.warning("Could not parse package info from path: %s", asset_path)
|
log.warning("Could not parse package info from path: %s", asset_path)
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import httpx
|
|||||||
|
|
||||||
from guarddog_nexus.config import config
|
from guarddog_nexus.config import config
|
||||||
from guarddog_nexus.constants import (
|
from guarddog_nexus.constants import (
|
||||||
|
NPM_PATH_PREFIX,
|
||||||
PYPI_PATH_PREFIX,
|
PYPI_PATH_PREFIX,
|
||||||
SHA256_CHUNK_SIZE,
|
SHA256_CHUNK_SIZE,
|
||||||
)
|
)
|
||||||
@@ -24,6 +25,70 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_go_info(asset_path: str) -> tuple[str, str] | None:
|
||||||
|
"""Extract module and version from a Go proxy asset path.
|
||||||
|
|
||||||
|
Path format: packages/github.com/gin-gonic/gin/@v/v1.9.0.zip
|
||||||
|
"""
|
||||||
|
cleaned = asset_path.strip("/")
|
||||||
|
# Find @v/ marker
|
||||||
|
idx = cleaned.find("/@v/")
|
||||||
|
if idx == -1:
|
||||||
|
return None
|
||||||
|
if cleaned.startswith(PYPI_PATH_PREFIX + "/"):
|
||||||
|
module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx]
|
||||||
|
else:
|
||||||
|
module = cleaned[:idx]
|
||||||
|
if not module:
|
||||||
|
return None
|
||||||
|
# Version: after @v/ up to the next / or end
|
||||||
|
ver_start = idx + 4 # len("/@v/")
|
||||||
|
rest = cleaned[ver_start:]
|
||||||
|
version = rest.split("/")[0] if "/" in rest else rest
|
||||||
|
if version.endswith(".zip"):
|
||||||
|
version = version[:-4]
|
||||||
|
return module, version
|
||||||
|
|
||||||
|
|
||||||
|
def extract_npm_info(asset_path: str) -> tuple[str, str] | None:
|
||||||
|
"""Extract package name and version from an npm proxy asset path.
|
||||||
|
|
||||||
|
Path format: packages/react/-/react-18.2.0.tgz
|
||||||
|
"""
|
||||||
|
parts = asset_path.strip("/").split("/")
|
||||||
|
if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX:
|
||||||
|
return None
|
||||||
|
name = parts[1]
|
||||||
|
# Last segment: <name>-<version>.tgz
|
||||||
|
last = parts[-1]
|
||||||
|
if last.startswith(name + "-"):
|
||||||
|
raw = last[len(name) + 1 :]
|
||||||
|
for ext in (".tgz", ".tar.gz"):
|
||||||
|
if raw.endswith(ext):
|
||||||
|
return name, raw[: -len(ext)]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Map of ecosystem → extractor function
|
||||||
|
EXTRACTORS = {
|
||||||
|
"pypi": extract_pypi_info,
|
||||||
|
"go": extract_go_info,
|
||||||
|
"npm": extract_npm_info,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_package_info(asset_path: str, ecosystem: str) -> tuple[str, str] | None:
|
||||||
|
"""Extract package name and version based on ecosystem."""
|
||||||
|
extractor = EXTRACTORS.get(ecosystem)
|
||||||
|
if extractor:
|
||||||
|
return extractor(asset_path)
|
||||||
|
# Fallback for unknown ecosystems — try simple parts split
|
||||||
|
parts = asset_path.strip("/").split("/")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
return parts[1], parts[2]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
async def download_asset(download_url: str, dest_dir: str) -> str | None:
|
||||||
"""Download an asset from Nexus using async httpx."""
|
"""Download an asset from Nexus using async httpx."""
|
||||||
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0]))
|
||||||
|
|||||||
@@ -49,6 +49,18 @@ def _extract_asset_path(asset: dict) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_ecosystem(source: dict) -> str:
|
||||||
|
"""Detect ecosystem from asset or component format field."""
|
||||||
|
fmt = source.get("format", "").lower()
|
||||||
|
if fmt in ("pypi", "pip", "python"):
|
||||||
|
return "pypi"
|
||||||
|
if fmt in ("go", "golang"):
|
||||||
|
return "go"
|
||||||
|
if fmt in ("npm", "node"):
|
||||||
|
return "npm"
|
||||||
|
return fmt or DEFAULT_ECOSYSTEM
|
||||||
|
|
||||||
|
|
||||||
@router.post("/nexus")
|
@router.post("/nexus")
|
||||||
async def nexus_webhook(
|
async def nexus_webhook(
|
||||||
request: Request,
|
request: Request,
|
||||||
@@ -96,11 +108,12 @@ async def nexus_webhook(
|
|||||||
download_url = asset.get("downloadUrl") or _build_download_url(
|
download_url = asset.get("downloadUrl") or _build_download_url(
|
||||||
repository, asset_path
|
repository, asset_path
|
||||||
)
|
)
|
||||||
|
ecosystem = _detect_ecosystem(asset)
|
||||||
|
|
||||||
log.info("Webhook: %s asset %s in %s", action, asset_path, repository)
|
log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository)
|
||||||
|
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
_scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path
|
_scan_in_background, download_url, repository, ecosystem, asset_path
|
||||||
)
|
)
|
||||||
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
|
return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action}
|
||||||
|
|
||||||
@@ -113,7 +126,8 @@ async def nexus_webhook(
|
|||||||
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
"reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION,
|
||||||
}
|
}
|
||||||
|
|
||||||
background_tasks.add_task(_scan_component, repository, name, version)
|
ecosystem = _detect_ecosystem(component)
|
||||||
|
background_tasks.add_task(_scan_component, repository, name, version, ecosystem)
|
||||||
return {
|
return {
|
||||||
"status": WEBHOOK_STATUS_ACCEPTED,
|
"status": WEBHOOK_STATUS_ACCEPTED,
|
||||||
"component": f"{name}=={version}",
|
"component": f"{name}=={version}",
|
||||||
@@ -126,12 +140,12 @@ async def nexus_webhook(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def _scan_component(repository: str, name: str, version: str):
|
async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
|
||||||
from guarddog_nexus.nexus_client import nexus_get
|
from guarddog_nexus.nexus_client import nexus_get
|
||||||
|
|
||||||
api_path = (
|
api_path = (
|
||||||
f"/service/rest/v1/search"
|
f"/service/rest/v1/search"
|
||||||
f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}"
|
f"?repository={repository}&name={name}&version={version}&format={ecosystem}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
resp = await nexus_get(api_path)
|
resp = await nexus_get(api_path)
|
||||||
@@ -157,7 +171,7 @@ async def _scan_component(repository: str, name: str, version: str):
|
|||||||
log.info("Scanning component asset: %s", asset_path)
|
log.info("Scanning component asset: %s", asset_path)
|
||||||
async for session in get_session():
|
async for session in get_session():
|
||||||
await harvest(
|
await harvest(
|
||||||
download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session
|
download_url, repository, ecosystem, asset_path, session
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -30,37 +30,51 @@ if [ -f /nexus-data/admin.password ]; then
|
|||||||
echo "Using initial admin password from volume"
|
echo "Using initial admin password from volume"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Creating PyPI proxy repository..."
|
# Common proxy config
|
||||||
|
_proxy_config() {
|
||||||
|
cat <<EOF
|
||||||
|
{
|
||||||
|
"name": "$1",
|
||||||
|
"online": true,
|
||||||
|
"storage": {
|
||||||
|
"blobStoreName": "default",
|
||||||
|
"strictContentTypeValidation": true
|
||||||
|
},
|
||||||
|
"proxy": {
|
||||||
|
"remoteUrl": "$2",
|
||||||
|
"contentMaxAge": 1440,
|
||||||
|
"metadataMaxAge": 1440
|
||||||
|
},
|
||||||
|
"negativeCache": {
|
||||||
|
"enabled": true,
|
||||||
|
"timeToLive": 1440
|
||||||
|
},
|
||||||
|
"httpClient": {
|
||||||
|
"blocked": false,
|
||||||
|
"autoBlock": true,
|
||||||
|
"connection": {
|
||||||
|
"timeout": 60,
|
||||||
|
"retries": 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
_create_proxy() {
|
||||||
|
local name="$1" remote="$2" format="$3"
|
||||||
|
echo "Creating $name ($format proxy → $remote)..."
|
||||||
curl -sf -u "admin:${AUTH_PASS}" \
|
curl -sf -u "admin:${AUTH_PASS}" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d "{
|
-d "$(_proxy_config "$name" "$remote")" \
|
||||||
\"name\": \"pypi-proxy\",
|
"${NEXUS_URL}/service/rest/v1/repositories/${format}/proxy" 2>/dev/null && \
|
||||||
\"online\": true,
|
echo " OK" || echo " already exists or failed, continuing..."
|
||||||
\"storage\": {
|
|
||||||
\"blobStoreName\": \"default\",
|
|
||||||
\"strictContentTypeValidation\": true
|
|
||||||
},
|
|
||||||
\"proxy\": {
|
|
||||||
\"remoteUrl\": \"https://pypi.org\",
|
|
||||||
\"contentMaxAge\": 1440,
|
|
||||||
\"metadataMaxAge\": 1440
|
|
||||||
},
|
|
||||||
\"negativeCache\": {
|
|
||||||
\"enabled\": true,
|
|
||||||
\"timeToLive\": 1440
|
|
||||||
},
|
|
||||||
\"httpClient\": {
|
|
||||||
\"blocked\": false,
|
|
||||||
\"autoBlock\": true,
|
|
||||||
\"connection\": {
|
|
||||||
\"timeout\": 60,
|
|
||||||
\"retries\": 3
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}" \
|
_create_proxy "pypi-proxy" "https://pypi.org" "pypi"
|
||||||
"${NEXUS_URL}/service/rest/v1/repositories/pypi/proxy" 2>/dev/null || \
|
_create_proxy "go-proxy" "https://proxy.golang.org" "go"
|
||||||
echo "Proxy repo may already exist, continuing..."
|
_create_proxy "npm-proxy" "https://registry.npmjs.org" "npm"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition."
|
echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition."
|
||||||
@@ -68,7 +82,7 @@ echo "In Nexus Pro, configure:"
|
|||||||
echo " Capability: Webhook: Repository"
|
echo " Capability: Webhook: Repository"
|
||||||
echo " URL: ${WEBHOOK_URL}"
|
echo " URL: ${WEBHOOK_URL}"
|
||||||
echo " Event types: repository.component, repository.asset"
|
echo " Event types: repository.component, repository.asset"
|
||||||
echo " Repository filter: pypi-proxy"
|
echo " Repository filter: pypi-proxy, go-proxy, npm-proxy"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
if [ -f /nexus-data/admin.password ]; then
|
if [ -f /nexus-data/admin.password ]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user