fix: try/except in _scan_component, serialize_finding to prevent data injection, DRY LLM template, SUPPORTED_ECOSYSTEMS constant

This commit is contained in:
Marker689
2026-05-11 19:45:49 +03:00
parent 6743321463
commit a6cd20e41c
8 changed files with 67 additions and 83 deletions

View File

@@ -32,6 +32,7 @@ METADATA_PATTERNS = (
# ---------------------------------------------------------------------------
DEFAULT_ECOSYSTEM = "pypi"
SUPPORTED_ECOSYSTEMS = frozenset({"pypi", "go", "npm"})
# ---------------------------------------------------------------------------
# Severity

View File

@@ -13,7 +13,7 @@ from ..constants import (
)
from ..db.engine import get_session
from ..db.models import Finding
from ..schemas import FindingsListResponse
from ..schemas import FindingsListResponse, serialize_finding
router = APIRouter(prefix="/api/v1/findings", tags=["findings"])
@@ -42,14 +42,5 @@ async def list_findings(
"total": total,
"limit": limit,
"offset": offset,
"findings": [
{
"id": f.id,
"scan_id": f.scan_id,
**f.data,
"report": f.report,
"created_at": f.created_at.isoformat() if f.created_at else None,
}
for f in findings
],
"findings": [serialize_finding(f) for f in findings],
}

View File

@@ -20,7 +20,7 @@ from ..core.nexus import parse_package_path
from ..db.engine import get_session
from ..db.models import Scan
from ..db.queries import build_package_list_query
from ..schemas import PackageDetailOut, PackageListResponse
from ..schemas import PackageDetailOut, PackageListResponse, serialize_finding
router = APIRouter(prefix="/api/v1/packages", tags=["packages"])
@@ -145,7 +145,7 @@ async def get_package(
all_findings: list[dict] = []
for s in scans:
for f in s.findings:
all_findings.append({"id": f.id, **f.data, "report": f.report})
all_findings.append(serialize_finding(f))
return {
"name": scans[0].package_name,

View File

@@ -19,7 +19,7 @@ from ..constants import (
from ..db.engine import get_session
from ..db.models import Scan
from ..db.queries import build_scan_list_query, get_dashboard_stats
from ..schemas import ScanDetailOut, ScanListResponse, StatsResponse
from ..schemas import ScanDetailOut, ScanListResponse, StatsResponse, serialize_finding
router = APIRouter(prefix="/api/v1/scans", tags=["scans"])
@@ -171,5 +171,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)) -
"error_message": scan.error_message,
"initiator": scan.initiator,
"source_ip": scan.source_ip,
"findings": [{"id": f.id, **f.data, "report": f.report} for f in scan.findings],
"findings": [serialize_finding(f) for f in scan.findings],
}

View File

@@ -159,6 +159,7 @@ async def nexus_webhook(
async def _scan_component(repository: str, name: str, version: str, ecosystem: str):
try:
from ..core.nexus import nexus_get
params = urlencode(
@@ -188,11 +189,15 @@ async def _scan_component(repository: str, name: str, version: str, ecosystem: s
asset_path = _extract_asset_path(asset)
if not asset_path or not _is_package_asset(asset_path):
continue
download_url = asset.get("downloadUrl") or _build_download_url(repository, asset_path)
download_url = asset.get("downloadUrl") or _build_download_url(
repository, asset_path
)
log.info("Scanning component asset: %s", asset_path)
async for session in get_session():
await harvest(download_url, repository, ecosystem, asset_path, session)
break
except Exception as e:
log.error("Component scan failed for %s==%s: %s", name, version, e)
async def _scan_in_background(

View File

@@ -100,3 +100,20 @@ class StatsResponse(BaseModel):
total_findings: int
top_rules: list[dict]
latest_scan_at: datetime | None = None
# Finding data known fields (prevents **f.data from overwriting id/scan_id)
_FINDING_DATA_FIELDS = ("rule", "severity", "message", "location", "code")
def serialize_finding(finding) -> dict:
"""Extract known fields from a Finding, preventing data field injection."""
result = {
"id": finding.id,
"scan_id": finding.scan_id,
"report": finding.report,
"created_at": finding.created_at.isoformat() if finding.created_at else None,
}
for field in _FINDING_DATA_FIELDS:
result[field] = finding.data.get(field, "")
return result

View File

@@ -57,24 +57,9 @@
{% if f.report and f.report.status == "analyzing" %}
{% include "_llm_spinner.html" %}
{% elif f.report and f.report.verdict %}
<div class="llm-report llm-{{ f.report.verdict }}">
<div class="llm-header">
<span class="llm-badge llm-badge-{{ f.report.verdict }}">{{ f.report.verdict }}</span>
{% if f.report.severity_rating %}
<span class="llm-severity">{{ f.report.severity_rating }}</span>
{% endif %}
{% if config.llm_enabled and not config.llm_auto_analyze %}
<button class="llm-retry"
hx-post="/api/v1/findings/{{ f.id }}/analyze?retry=1"
hx-target="closest .llm-report"
hx-swap="outerHTML"
hx-indicator="closest .llm-report">{{ t('llm_retry', request.state.lang) }}</button>
{% endif %}
</div>
<p class="llm-summary">{{ f.report.summary }}</p>
<p class="llm-analysis">{{ f.report.analysis }}</p>
<p class="llm-disclaimer">{{ t('llm_disclaimer', request.state.lang) }}</p>
</div>
{% with report=f.report, finding_id=f.id %}
{% include "_llm_report_fragment.html" %}
{% endwith %}
{% elif config.llm_enabled and not config.llm_auto_analyze %}
<div class="llm-actions" id="llm-{{ f.id }}">
<button class="outline"

View File

@@ -53,24 +53,9 @@
{% if f.report and f.report.status == "analyzing" %}
{% include "_llm_spinner.html" %}
{% elif f.report and f.report.verdict %}
<div class="llm-report llm-{{ f.report.verdict }}">
<div class="llm-header">
<span class="llm-badge llm-badge-{{ f.report.verdict }}">{{ f.report.verdict }}</span>
{% if f.report.severity_rating %}
<span class="llm-severity">{{ f.report.severity_rating }}</span>
{% endif %}
{% if config.llm_enabled and not config.llm_auto_analyze %}
<button class="llm-retry"
hx-post="/api/v1/findings/{{ f.id }}/analyze?retry=1"
hx-target="closest .llm-report"
hx-swap="outerHTML"
hx-indicator="closest .llm-report">{{ t('llm_retry', request.state.lang) }}</button>
{% endif %}
</div>
<p class="llm-summary">{{ f.report.summary }}</p>
<p class="llm-analysis">{{ f.report.analysis }}</p>
<p class="llm-disclaimer">{{ t('llm_disclaimer', request.state.lang) }}</p>
</div>
{% with report=f.report, finding_id=f.id %}
{% include "_llm_report_fragment.html" %}
{% endwith %}
{% elif config.llm_enabled and not config.llm_auto_analyze %}
<div class="llm-actions" id="llm-{{ f.id }}">
<button class="outline"