diff --git a/.env.example b/.env.example index 6f062f9..d7b4f0f 100644 --- a/.env.example +++ b/.env.example @@ -35,3 +35,4 @@ LLM_API_BASE=https://api.openai.com/v1 LLM_API_KEY= LLM_MODEL=gpt-4o-mini LLM_TIMEOUT_SECONDS=30 +LLM_MAX_CONCURRENT_ANALYSES=2 diff --git a/README.md b/README.md index 70da52a..3b2bf2e 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ python -m guarddog_nexus.main | `LLM_API_BASE` | `https://api.openai.com/v1` | Базовый URL OpenAI-совместимого API | | `LLM_MODEL` | `gpt-4o-mini` | Название модели | | `LLM_TIMEOUT_SECONDS` | `30` | Таймаут запроса к LLM | +| `LLM_MAX_CONCURRENT_ANALYSES` | `2` | Максимум одновременных LLM-анализов | ## Настройка Nexus diff --git a/guarddog_nexus/config.py b/guarddog_nexus/config.py index 38672ca..17f8b95 100644 --- a/guarddog_nexus/config.py +++ b/guarddog_nexus/config.py @@ -57,6 +57,9 @@ class Config: llm_api_key: str = os.getenv("LLM_API_KEY", "") llm_model: str = os.getenv("LLM_MODEL", LLM_DEFAULT_MODEL) llm_timeout: int = int(os.getenv("LLM_TIMEOUT_SECONDS", str(LLM_DEFAULT_TIMEOUT))) + llm_max_concurrent: int = int( + os.getenv("LLM_MAX_CONCURRENT_ANALYSES", "2") + ) config = Config() diff --git a/guarddog_nexus/core/harvester.py b/guarddog_nexus/core/harvester.py index 08f8aaf..7a9b434 100644 --- a/guarddog_nexus/core/harvester.py +++ b/guarddog_nexus/core/harvester.py @@ -35,6 +35,8 @@ async def harvest( format_: str, asset_path: str, session: AsyncSession, + initiator: str | None = None, + source_ip: str | None = None, ) -> Scan | None: ecosystem = format_ if format_ else DEFAULT_ECOSYSTEM @@ -78,6 +80,8 @@ async def harvest( ecosystem=ecosystem, repository=repository, nexus_asset_url=download_url, + initiator=initiator, + source_ip=source_ip, status=ScanStatus.PENDING.value, ) session.add(scan) diff --git a/guarddog_nexus/core/llm.py b/guarddog_nexus/core/llm.py index 87dbd08..b5055a7 100644 --- a/guarddog_nexus/core/llm.py +++ b/guarddog_nexus/core/llm.py @@ -3,6 +3,7 @@ Supports any OpenAI-compatible API endpoint with configurable model. """ +import asyncio import json import httpx @@ -11,6 +12,8 @@ from ..config import config from ..constants import LLM_ANALYSIS_SYSTEM_PROMPT from ..logging_setup import log +_llm_semaphore = asyncio.Semaphore(config.llm_max_concurrent) + def _build_user_message(finding: dict) -> str: """Build a concise prompt from a finding's data.""" @@ -62,12 +65,13 @@ async def analyze_finding(finding_data: dict) -> dict | None: } try: - async with httpx.AsyncClient( - timeout=config.llm_timeout, headers=headers - ) as client: - resp = await client.post(url, json=payload) - resp.raise_for_status() - body = resp.json() + async with _llm_semaphore: + async with httpx.AsyncClient( + timeout=config.llm_timeout, headers=headers + ) as client: + resp = await client.post(url, json=payload) + resp.raise_for_status() + body = resp.json() except httpx.TimeoutException: log.error( "LLM analysis timed out after %ds for rule=%s", diff --git a/guarddog_nexus/db/models.py b/guarddog_nexus/db/models.py index 3b19376..e2bdf04 100644 --- a/guarddog_nexus/db/models.py +++ b/guarddog_nexus/db/models.py @@ -36,6 +36,8 @@ class Scan(Base): ) finished_at: Mapped[datetime.datetime | None] = mapped_column(DateTime, nullable=True) error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + initiator: Mapped[str | None] = mapped_column(String(255), nullable=True) + source_ip: Mapped[str | None] = mapped_column(String(45), nullable=True) findings: Mapped[list["Finding"]] = relationship( "Finding", back_populates="scan", cascade="all, delete-orphan" diff --git a/guarddog_nexus/routes/api_scans.py b/guarddog_nexus/routes/api_scans.py index aed5379..8d33fcb 100644 --- a/guarddog_nexus/routes/api_scans.py +++ b/guarddog_nexus/routes/api_scans.py @@ -153,5 +153,7 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)): "started_at": scan.started_at.isoformat() if scan.started_at else None, "finished_at": scan.finished_at.isoformat() if scan.finished_at else None, "error_message": scan.error_message, + "initiator": scan.initiator, + "source_ip": scan.source_ip, "findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings], } diff --git a/guarddog_nexus/routes/webhooks.py b/guarddog_nexus/routes/webhooks.py index b56226c..ccd8cb5 100644 --- a/guarddog_nexus/routes/webhooks.py +++ b/guarddog_nexus/routes/webhooks.py @@ -96,9 +96,15 @@ async def nexus_webhook( if action not in RELEVANT_WEBHOOK_ACTIONS: return {"status": WEBHOOK_STATUS_IGNORED, "action": action} + # Log full payload for debugging (to discover available fields) + log.info("Webhook payload: initiator=%s nodeId=%s keys=%s", + data.get("initiator"), data.get("nodeId"), sorted(data.keys())) + repository = data.get("repositoryName", "") asset = data.get("asset") component = data.get("component") + initiator = data.get("initiator") + source_ip = request.client.host if request.client else None if asset: asset_path = _extract_asset_path(asset) @@ -113,7 +119,8 @@ async def nexus_webhook( log.info("Webhook: %s asset %s (%s) in %s", action, asset_path, ecosystem, repository) background_tasks.add_task( - _scan_in_background, download_url, repository, ecosystem, asset_path + _scan_in_background, download_url, repository, ecosystem, asset_path, + initiator=initiator, source_ip=source_ip, ) return {"status": WEBHOOK_STATUS_ACCEPTED, "asset": asset_path, "action": action} @@ -181,10 +188,15 @@ async def _scan_in_background( repository: str, format_: str, asset_path: str, + initiator: str | None = None, + source_ip: str | None = None, ): try: async for session in get_session(): - await harvest(download_url, repository, format_, asset_path, session) + await harvest( + download_url, repository, format_, asset_path, session, + initiator=initiator, source_ip=source_ip, + ) break except Exception as e: log.error("Background scan failed: %s", e) diff --git a/guarddog_nexus/web/static/style.css b/guarddog_nexus/web/static/style.css index ee296a4..efd9591 100644 --- a/guarddog_nexus/web/static/style.css +++ b/guarddog_nexus/web/static/style.css @@ -237,6 +237,15 @@ table.compact td { padding: 0.35rem 0.5rem; } .llm-actions { margin-top: 0.5rem; } .llm-actions button { font-size: 0.8rem; } +.llm-disclaimer { + margin-top: 0.6rem; + font-size: 0.72rem; + opacity: 0.5; + font-style: italic; + border-top: 1px solid var(--pico-color-gray-600); + padding-top: 0.4rem; +} + /* ------------------------------------------------------------------ */ /* Shared controls */ /* ------------------------------------------------------------------ */ diff --git a/guarddog_nexus/web/templates/_llm_report_fragment.html b/guarddog_nexus/web/templates/_llm_report_fragment.html index b278d83..d311adf 100644 --- a/guarddog_nexus/web/templates/_llm_report_fragment.html +++ b/guarddog_nexus/web/templates/_llm_report_fragment.html @@ -7,4 +7,5 @@

{{ report.summary }}

{{ report.analysis }}

+

⚠ AI-generated analysis — may contain inaccuracies. Always verify findings before taking action.

diff --git a/guarddog_nexus/web/templates/dashboard_stats.html b/guarddog_nexus/web/templates/dashboard_stats.html index 19e9490..dcd89be 100644 --- a/guarddog_nexus/web/templates/dashboard_stats.html +++ b/guarddog_nexus/web/templates/dashboard_stats.html @@ -1,47 +1,21 @@ -
- {{ total_scans }} scans - {{ flagged_scans }} flagged - {{ total_findings }} findings - {{ errors_count }} errors - {{ warnings_count }} warnings -
- -
- {% if days %} -
-

Scan activity (14 days)

-
- {% set max_cnt = days | map(attribute=1) | max %} - {% for day, cnt, fl in days %} -
- {% set h = (cnt / max_cnt * 38) | int if max_cnt > 0 else 0 %} -
-
{{ day }}: {{ cnt }} scans, {{ fl }} flagged
-
- {% endfor %} -
-
- {% endif %} - - {% if latest_flagged %} -
-

Latest Flagged

- - - - {% for s in latest_flagged %} - - - - - - - {% endfor %} - -
PackageVersionFindingsTime
{{ s.package_name }}{{ s.package_version }}{{ s.total_findings }}{{ s.started_at.strftime('%m-%d %H:%M') if s.started_at }}
-
- {% endif %} -
+{% if latest_flagged %} +
+

Latest Flagged

+ + + + {% for s in latest_flagged %} + + + + + + + {% endfor %} + +
PackageVersionFindingsTime
{{ s.package_name }}{{ s.package_version }}{{ s.total_findings }}{{ s.started_at.strftime('%m-%d %H:%M') if s.started_at }}
+
+{% endif %}

Latest Scans

diff --git a/guarddog_nexus/web/templates/package_detail.html b/guarddog_nexus/web/templates/package_detail.html index 582eb06..27260eb 100644 --- a/guarddog_nexus/web/templates/package_detail.html +++ b/guarddog_nexus/web/templates/package_detail.html @@ -64,6 +64,7 @@

{{ f.report.summary }}

{{ f.report.analysis }}

+

⚠ AI-generated analysis — may contain inaccuracies.

{% else %}
diff --git a/guarddog_nexus/web/templates/scan_detail.html b/guarddog_nexus/web/templates/scan_detail.html index aff200d..8e7b42c 100644 --- a/guarddog_nexus/web/templates/scan_detail.html +++ b/guarddog_nexus/web/templates/scan_detail.html @@ -24,6 +24,8 @@
SHA256
{{ scan.sha256 or '-' }}
Started
{{ scan.started_at.strftime('%Y-%m-%d %H:%M') if scan.started_at }}
Finished
{{ scan.finished_at.strftime('%Y-%m-%d %H:%M') if scan.finished_at }}
+ {% if scan.initiator %}
Initiated by
{{ scan.initiator }}
{% endif %} + {% if scan.source_ip %}
Source IP
{{ scan.source_ip }}
{% endif %}
{% if scan.error_message %}
Error: {{ scan.error_message }}
{% endif %}
@@ -58,6 +60,7 @@

{{ f.report.summary }}

{{ f.report.analysis }}

+

⚠ AI-generated analysis — may contain inaccuracies.

{% else %}