From 6523f55dcdec8a292170093c348f3d1079091932 Mon Sep 17 00:00:00 2001 From: Marker689 Date: Sun, 10 May 2026 06:29:34 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80?= =?UTF-8?q?=D0=B6=D0=BA=D0=B0=20Go=20=D0=B8=20npm=20=D1=8D=D0=BA=D0=BE?= =?UTF-8?q?=D1=81=D0=B8=D1=81=D1=82=D0=B5=D0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - setup-nexus.sh: создание go-proxy (proxy.golang.org) и npm-proxy (registry.npmjs.org) - nexus_client.py: extract_go_info() и extract_npm_info() для парсинга путей Go: packages/github.com/gorilla/mux/@v/v1.8.0.zip → name=github.com/gorilla/mux ver=v1.8.0 npm: packages/lodash/-/lodash-4.17.21.tgz → name=lodash ver=4.17.21 - nexus_client.py: EXTRACTORS dict + extract_package_info() универсальный extractor - webhooks.py: _detect_ecosystem() — определяет экосистему из asset.format - harvester.py: использует extract_package_info() вместо extract_pypi_info() - Всё в Docker-контейнере, на хосте ничего не ставится - GuardDog поддерживает go и npm из коробки --- guarddog_nexus/constants.py | 1 + guarddog_nexus/harvester.py | 6 +-- guarddog_nexus/nexus_client.py | 65 +++++++++++++++++++++++++++++ guarddog_nexus/webhooks.py | 26 +++++++++--- scripts/setup-nexus.sh | 76 ++++++++++++++++++++-------------- 5 files changed, 134 insertions(+), 40 deletions(-) diff --git a/guarddog_nexus/constants.py b/guarddog_nexus/constants.py index a6f8b95..abdc5fc 100644 --- a/guarddog_nexus/constants.py +++ b/guarddog_nexus/constants.py @@ -15,6 +15,7 @@ PACKAGE_EXTENSIONS = (".tar.gz", ".tgz", ".whl", ".zip", ".gem") # Prefix used in PyPI-style asset paths ("/packages/name/ver/file") PYPI_PATH_PREFIX = "packages" +NPM_PATH_PREFIX = "packages" # Metadata file patterns that should never be scanned METADATA_PATTERNS = ( diff --git a/guarddog_nexus/harvester.py b/guarddog_nexus/harvester.py index 3f43f1b..31dfa23 100644 --- a/guarddog_nexus/harvester.py +++ b/guarddog_nexus/harvester.py @@ -18,7 +18,7 @@ from guarddog_nexus.constants import ( ) from guarddog_nexus.logging_setup import log from guarddog_nexus.models import Finding, Scan, ScanStatus -from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_pypi_info +from guarddog_nexus.nexus_client import compute_sha256, download_asset, extract_package_info from guarddog_nexus.scanner import scan_package # Per-URL locks to avoid parallel scans of the same asset @@ -36,14 +36,14 @@ async def harvest( asset_path: str, session: AsyncSession, ) -> Scan | None: - ecosystem = DEFAULT_ECOSYSTEM if format_ in (DEFAULT_ECOSYSTEM,) else format_ + ecosystem = format_ if format_ else DEFAULT_ECOSYSTEM filename = os.path.basename(download_url.split("?")[0]) if not filename.endswith(PACKAGE_EXTENSIONS): log.info("Skipping non-package asset: %s", filename) return None - info = extract_pypi_info(asset_path) + info = extract_package_info(asset_path, ecosystem) if info is None: log.warning("Could not parse package info from path: %s", asset_path) return None diff --git a/guarddog_nexus/nexus_client.py b/guarddog_nexus/nexus_client.py index f7301ae..db466f3 100644 --- a/guarddog_nexus/nexus_client.py +++ b/guarddog_nexus/nexus_client.py @@ -7,6 +7,7 @@ import httpx from guarddog_nexus.config import config from guarddog_nexus.constants import ( + NPM_PATH_PREFIX, PYPI_PATH_PREFIX, SHA256_CHUNK_SIZE, ) @@ -24,6 +25,70 @@ def extract_pypi_info(asset_path: str) -> tuple[str, str] | None: return None +def extract_go_info(asset_path: str) -> tuple[str, str] | None: + """Extract module and version from a Go proxy asset path. + + Path format: packages/github.com/gin-gonic/gin/@v/v1.9.0.zip + """ + cleaned = asset_path.strip("/") + # Find @v/ marker + idx = cleaned.find("/@v/") + if idx == -1: + return None + if cleaned.startswith(PYPI_PATH_PREFIX + "/"): + module = cleaned[len(PYPI_PATH_PREFIX) + 1 : idx] + else: + module = cleaned[:idx] + if not module: + return None + # Version: after @v/ up to the next / or end + ver_start = idx + 4 # len("/@v/") + rest = cleaned[ver_start:] + version = rest.split("/")[0] if "/" in rest else rest + if version.endswith(".zip"): + version = version[:-4] + return module, version + + +def extract_npm_info(asset_path: str) -> tuple[str, str] | None: + """Extract package name and version from an npm proxy asset path. + + Path format: packages/react/-/react-18.2.0.tgz + """ + parts = asset_path.strip("/").split("/") + if len(parts) < 4 or parts[0] != NPM_PATH_PREFIX: + return None + name = parts[1] + # Last segment: -.tgz + last = parts[-1] + if last.startswith(name + "-"): + raw = last[len(name) + 1 :] + for ext in (".tgz", ".tar.gz"): + if raw.endswith(ext): + return name, raw[: -len(ext)] + return None + + +# Map of ecosystem → extractor function +EXTRACTORS = { + "pypi": extract_pypi_info, + "go": extract_go_info, + "npm": extract_npm_info, +} + + +def extract_package_info(asset_path: str, ecosystem: str) -> tuple[str, str] | None: + """Extract package name and version based on ecosystem.""" + extractor = EXTRACTORS.get(ecosystem) + if extractor: + return extractor(asset_path) + # Fallback for unknown ecosystems — try simple parts split + parts = asset_path.strip("/").split("/") + if len(parts) >= 3: + return parts[1], parts[2] + return None + + async def download_asset(download_url: str, dest_dir: str) -> str | None: """Download an asset from Nexus using async httpx.""" dest_path = os.path.join(dest_dir, os.path.basename(download_url.split("?")[0])) diff --git a/guarddog_nexus/webhooks.py b/guarddog_nexus/webhooks.py index 8b13e57..bb2c5d5 100644 --- a/guarddog_nexus/webhooks.py +++ b/guarddog_nexus/webhooks.py @@ -49,6 +49,18 @@ def _extract_asset_path(asset: dict) -> str | None: return None +def _detect_ecosystem(source: dict) -> str: + """Detect ecosystem from asset or component format field.""" + fmt = source.get("format", "").lower() + if fmt in ("pypi", "pip", "python"): + return "pypi" + if fmt in ("go", "golang"): + return "go" + if fmt in ("npm", "node"): + return "npm" + return fmt or DEFAULT_ECOSYSTEM + + @router.post("/nexus") async def nexus_webhook( request: Request, @@ -96,11 +108,12 @@ async def nexus_webhook( download_url = asset.get("downloadUrl") or _build_download_url( repository, asset_path ) + ecosystem = _detect_ecosystem(asset) - log.info("Webhook: %s asset %s in %s", action, asset_path, repository) + log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository) background_tasks.add_task( - _scan_in_background, download_url, repository, DEFAULT_ECOSYSTEM, asset_path + _scan_in_background, download_url, repository, ecosystem, asset_path ) return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} @@ -113,7 +126,8 @@ async def nexus_webhook( "reason": WEBHOOK_IGNORE_NO_NAME_OR_VERSION, } - background_tasks.add_task(_scan_component, repository, name, version) + ecosystem = _detect_ecosystem(component) + background_tasks.add_task(_scan_component, repository, name, version, ecosystem) return { "status": WEBHOOK_STATUS_ACCEPTED, "component": f"{name}=={version}", @@ -126,12 +140,12 @@ async def nexus_webhook( } -async def _scan_component(repository: str, name: str, version: str): +async def _scan_component(repository: str, name: str, version: str, ecosystem: str): from guarddog_nexus.nexus_client import nexus_get api_path = ( f"/service/rest/v1/search" - f"?repository={repository}&name={name}&version={version}&format={DEFAULT_ECOSYSTEM}" + f"?repository={repository}&name={name}&version={version}&format={ecosystem}" ) try: resp = await nexus_get(api_path) @@ -157,7 +171,7 @@ async def _scan_component(repository: str, name: str, version: str): log.info("Scanning component asset: %s", asset_path) async for session in get_session(): await harvest( - download_url, repository, DEFAULT_ECOSYSTEM, asset_path, session + download_url, repository, ecosystem, asset_path, session ) break diff --git a/scripts/setup-nexus.sh b/scripts/setup-nexus.sh index 2f6f0a8..9245810 100644 --- a/scripts/setup-nexus.sh +++ b/scripts/setup-nexus.sh @@ -30,37 +30,51 @@ if [ -f /nexus-data/admin.password ]; then echo "Using initial admin password from volume" fi -echo "Creating PyPI proxy repository..." -curl -sf -u "admin:${AUTH_PASS}" \ - -H "Content-Type: application/json" \ - -X POST \ - -d "{ - \"name\": \"pypi-proxy\", - \"online\": true, - \"storage\": { - \"blobStoreName\": \"default\", - \"strictContentTypeValidation\": true - }, - \"proxy\": { - \"remoteUrl\": \"https://pypi.org\", - \"contentMaxAge\": 1440, - \"metadataMaxAge\": 1440 - }, - \"negativeCache\": { - \"enabled\": true, - \"timeToLive\": 1440 - }, - \"httpClient\": { - \"blocked\": false, - \"autoBlock\": true, - \"connection\": { - \"timeout\": 60, - \"retries\": 3 - } +# Common proxy config +_proxy_config() { + cat </dev/null || \ - echo "Proxy repo may already exist, continuing..." + } +} +EOF +} + +_create_proxy() { + local name="$1" remote="$2" format="$3" + echo "Creating $name ($format proxy → $remote)..." + curl -sf -u "admin:${AUTH_PASS}" \ + -H "Content-Type: application/json" \ + -X POST \ + -d "$(_proxy_config "$name" "$remote")" \ + "${NEXUS_URL}/service/rest/v1/repositories/${format}/proxy" 2>/dev/null && \ + echo " OK" || echo " already exists or failed, continuing..." +} + +_create_proxy "pypi-proxy" "https://pypi.org" "pypi" +_create_proxy "go-proxy" "https://proxy.golang.org" "go" +_create_proxy "npm-proxy" "https://registry.npmjs.org" "npm" echo "" echo "NOTE: Webhook setup is not available in Nexus OSS/Community edition." @@ -68,7 +82,7 @@ echo "In Nexus Pro, configure:" echo " Capability: Webhook: Repository" echo " URL: ${WEBHOOK_URL}" echo " Event types: repository.component, repository.asset" -echo " Repository filter: pypi-proxy" +echo " Repository filter: pypi-proxy, go-proxy, npm-proxy" echo "" if [ -f /nexus-data/admin.password ]; then