refactor: JSON data column for findings, code snippets captured and displayed

This commit is contained in:
Marker689
2026-05-09 05:52:10 +03:00
parent e83167a938
commit e577f1944c
11 changed files with 60 additions and 57 deletions

View File

@@ -21,9 +21,9 @@ async def list_findings(
): ):
q = select(Finding) q = select(Finding)
if rule: if rule:
q = q.where(Finding.rule == rule) q = q.where(func.json_extract(Finding.data, "$.rule") == rule)
if severity: if severity:
q = q.where(Finding.severity == severity) q = q.where(func.json_extract(Finding.data, "$.severity") == severity)
if scan_id: if scan_id:
q = q.where(Finding.scan_id == scan_id) q = q.where(Finding.scan_id == scan_id)
@@ -38,10 +38,7 @@ async def list_findings(
{ {
"id": f.id, "id": f.id,
"scan_id": f.scan_id, "scan_id": f.scan_id,
"rule": f.rule, **f.data,
"severity": f.severity,
"message": f.message,
"location": f.location,
"created_at": f.created_at.isoformat() if f.created_at else None, "created_at": f.created_at.isoformat() if f.created_at else None,
} }
for f in findings for f in findings

View File

@@ -84,12 +84,13 @@ async def get_package(
if not scans: if not scans:
return {"detail": "Not found"} return {"detail": "Not found"}
all_findings = [] all_findings: list[dict] = []
for s in scans: for s in scans:
findings = ( findings = (
(await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all() (await session.execute(select(Finding).where(Finding.scan_id == s.id))).scalars().all()
) )
all_findings.extend(f.__dict__ for f in findings) for f in findings:
all_findings.append({"id": f.id, **f.data})
return { return {
"name": scans[0].package_name, "name": scans[0].package_name,
@@ -107,14 +108,5 @@ async def get_package(
} }
for s in scans for s in scans
], ],
"findings": [ "findings": all_findings,
{
"id": f["id"],
"rule": f.get("rule"),
"severity": f.get("severity"),
"message": f.get("message"),
"location": f.get("location"),
}
for f in all_findings
],
} }

View File

@@ -1,7 +1,7 @@
"""REST API for scans.""" """REST API for scans."""
from fastapi import APIRouter, Depends, Query from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, select from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
@@ -63,9 +63,12 @@ async def scan_stats(session: AsyncSession = Depends(get_session)):
top_rules = ( top_rules = (
await session.execute( await session.execute(
select(Finding.rule, func.count(Finding.id).label("cnt")) select(
.group_by(Finding.rule) func.json_extract(Finding.data, "$.rule").label("rule"),
.order_by(func.count(Finding.id).desc()) func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10) .limit(10)
) )
).all() ).all()
@@ -103,14 +106,5 @@ async def get_scan(scan_id: int, session: AsyncSession = Depends(get_session)):
"started_at": scan.started_at.isoformat() if scan.started_at else None, "started_at": scan.started_at.isoformat() if scan.started_at else None,
"finished_at": scan.finished_at.isoformat() if scan.finished_at else None, "finished_at": scan.finished_at.isoformat() if scan.finished_at else None,
"error_message": scan.error_message, "error_message": scan.error_message,
"findings": [ "findings": [{"id": f.id, **f.data} for f in scan.findings],
{
"id": f.id,
"rule": f.rule,
"severity": f.severity,
"message": f.message,
"location": f.location,
}
for f in scan.findings
],
} }

View File

@@ -83,14 +83,7 @@ async def harvest(
findings_list = result.get("findings", []) findings_list = result.get("findings", [])
for fdata in findings_list: for fdata in findings_list:
finding = Finding( session.add(Finding(scan_id=scan.id, data=fdata))
scan_id=scan.id,
rule=fdata["rule"],
severity=fdata["severity"],
message=fdata["message"],
location=fdata.get("location"),
)
session.add(finding)
scan.total_findings = len(findings_list) scan.total_findings = len(findings_list)
scan.flagged = len(findings_list) > 0 scan.flagged = len(findings_list) > 0

View File

@@ -3,7 +3,17 @@
import datetime import datetime
from enum import Enum from enum import Enum
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func from sqlalchemy import (
JSON,
Boolean,
DateTime,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
func,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from guarddog_nexus.database import Base from guarddog_nexus.database import Base
@@ -50,10 +60,7 @@ class Finding(Base):
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
scan_id: Mapped[int] = mapped_column(Integer, ForeignKey("scans.id"), nullable=False) scan_id: Mapped[int] = mapped_column(Integer, ForeignKey("scans.id"), nullable=False)
rule: Mapped[str] = mapped_column(String(255), nullable=False) data: Mapped[dict] = mapped_column(JSON, nullable=False)
severity: Mapped[str] = mapped_column(String(50), nullable=False)
message: Mapped[str] = mapped_column(Text, nullable=False)
location: Mapped[str | None] = mapped_column(String(512), nullable=True)
created_at: Mapped[datetime.datetime] = mapped_column( created_at: Mapped[datetime.datetime] = mapped_column(
DateTime, nullable=False, default=func.now() DateTime, nullable=False, default=func.now()
) )

View File

@@ -77,6 +77,7 @@ def _normalize_output(data: dict) -> dict:
"severity": "WARNING", "severity": "WARNING",
"message": value, "message": value,
"location": "", "location": "",
"code": "",
} }
) )
elif isinstance(value, list): elif isinstance(value, list):
@@ -88,6 +89,7 @@ def _normalize_output(data: dict) -> dict:
"severity": item.get("severity", "WARNING"), "severity": item.get("severity", "WARNING"),
"message": item.get("message", ""), "message": item.get("message", ""),
"location": item.get("location", ""), "location": item.get("location", ""),
"code": item.get("code", ""),
} }
) )
elif isinstance(value, dict) and not value: elif isinstance(value, dict) and not value:

View File

@@ -4,7 +4,7 @@ import datetime
from fastapi import APIRouter, Depends, Request from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from sqlalchemy import Integer, cast, func, select from sqlalchemy import Integer, cast, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from guarddog_nexus.database import get_session from guarddog_nexus.database import get_session
@@ -53,10 +53,14 @@ async def _dashboard_data(session: AsyncSession) -> dict:
total_findings = await session.scalar(select(func.count(Finding.id))) total_findings = await session.scalar(select(func.count(Finding.id)))
warnings_count = await session.scalar( warnings_count = await session.scalar(
select(func.count(Finding.id)).where(Finding.severity == "WARNING") select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "WARNING"
)
) )
errors_count = await session.scalar( errors_count = await session.scalar(
select(func.count(Finding.id)).where(Finding.severity == "ERROR") select(func.count(Finding.id)).where(
func.json_extract(Finding.data, "$.severity") == "ERROR"
)
) )
latest_flagged = ( latest_flagged = (
@@ -77,9 +81,12 @@ async def _dashboard_data(session: AsyncSession) -> dict:
top_rules = ( top_rules = (
await session.execute( await session.execute(
select(Finding.rule, func.count(Finding.id).label("cnt")) select(
.group_by(Finding.rule) func.json_extract(Finding.data, "$.rule").label("rule"),
.order_by(func.count(Finding.id).desc()) func.count(Finding.id).label("cnt"),
)
.group_by(text("rule"))
.order_by(text("cnt DESC"))
.limit(10) .limit(10)
) )
).all() ).all()

View File

@@ -28,6 +28,7 @@
<strong>{{ f.rule }}</strong> <strong>{{ f.rule }}</strong>
{% if f.location %}<small> @ {{ f.location }}</small>{% endif %} {% if f.location %}<small> @ {{ f.location }}</small>{% endif %}
<p>{{ f.message }}</p> <p>{{ f.message }}</p>
{% if f.code %}<pre><code>{{ f.code }}</code></pre>{% endif %}
</article> </article>
{% endfor %} {% endfor %}
{% else %} {% else %}

View File

@@ -16,12 +16,13 @@
<h2>Findings ({{ scan.findings|length }})</h2> <h2>Findings ({{ scan.findings|length }})</h2>
{% if scan.findings %} {% if scan.findings %}
{% for f in scan.findings|sort(attribute='severity', reverse=true) %} {% for f in scan.findings|sort(attribute='data.severity', reverse=true) %}
<article class="finding-card {{ f.severity }}"> <article class="finding-card {{ f.data.severity }}">
<strong class="severity-{{ f.severity }}">[{{ f.severity }}]</strong> <strong class="severity-{{ f.data.severity }}">[{{ f.data.severity }}]</strong>
<strong>{{ f.rule }}</strong> <strong>{{ f.data.rule }}</strong>
{% if f.location %}<small> @ {{ f.location }}</small>{% endif %} {% if f.data.location %}<small> @ {{ f.data.location }}</small>{% endif %}
<p>{{ f.message }}</p> <p>{{ f.data.message }}</p>
{% if f.data.code %}<pre><code>{{ f.data.code }}</code></pre>{% endif %}
</article> </article>
{% endfor %} {% endfor %}
{% else %} {% else %}

View File

@@ -147,18 +147,21 @@ def guarddog_normalized_flagged():
"severity": "WARNING", "severity": "WARNING",
"message": "Package contains URL to suspicious domain", "message": "Package contains URL to suspicious domain",
"location": "setup.py:15", "location": "setup.py:15",
"code": "url = 'http://evil.com'",
}, },
{ {
"rule": "exec-base64", "rule": "exec-base64",
"severity": "WARNING", "severity": "WARNING",
"message": "Base64-encoded code execution detected", "message": "Base64-encoded code execution detected",
"location": "core.py:42", "location": "core.py:42",
"code": "exec(base64.b64decode(...))",
}, },
{ {
"rule": "empty_information", "rule": "empty_information",
"severity": "WARNING", "severity": "WARNING",
"message": "Package description is empty", "message": "Package description is empty",
"location": "", "location": "",
"code": "",
}, },
], ],
"errors": [], "errors": [],

View File

@@ -43,6 +43,12 @@ async def test_harvest_new_package(db_session, guarddog_normalized_flagged):
.all() .all()
) )
assert len(findings) == 3 assert len(findings) == 3
rules = {f.data["rule"] for f in findings}
assert "shady-links" in rules
# Check code is preserved
for f in findings:
if f.data["rule"] == "shady-links":
assert f.data["code"] == "url = 'http://evil.com'"
@pytest.mark.asyncio @pytest.mark.asyncio