"""Shared SQL query builders for GuardDog Nexus. Eliminates ~90% duplicated SQL between api/*.py and web/routes.py. """ import datetime from sqlalchemy import func, select, text from sqlalchemy.ext.asyncio import AsyncSession from guarddog_nexus.constants import ( DASHBOARD_LATEST_FLAGGED_LIMIT, DASHBOARD_LATEST_SCANS_LIMIT, JSON_PATH_RULE, PACKAGE_SORT_FIELDS, RECENT_FLAGGED_DAYS, SCAN_SORT_FIELDS, TOP_RULES_LIMIT, ) from guarddog_nexus.db.models import Finding, Scan # --------------------------------------------------------------------------- # Scan list query builder # --------------------------------------------------------------------------- def build_scan_list_query( flagged: bool | None = None, status: str | None = None, repository: str | None = None, search: str | None = None, sort_by: str = "started_at", sort_dir: str = "desc", limit: int = 50, offset: int = 0, ): """Builds a filtered, sorted, paginated query for scans. Returns (query, total_count) — both still awaiting execution. """ q = select(Scan) count_q = select(func.count(Scan.id)) if flagged is not None: q = q.where(Scan.flagged == flagged) count_q = count_q.where(Scan.flagged == flagged) if status: q = q.where(Scan.status == status) count_q = count_q.where(Scan.status == status) if repository: q = q.where(Scan.repository == repository) count_q = count_q.where(Scan.repository == repository) if search: pattern = f"%{search}%" condition = Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern) q = q.where(condition) count_q = count_q.where(condition) # Resolve sort field sort_field_name = SCAN_SORT_FIELDS.get(sort_by, "started_at") sort_col = getattr(Scan, sort_field_name, Scan.started_at) q = q.order_by(sort_col.desc() if sort_dir == "desc" else sort_col.asc()) q = q.offset(offset).limit(limit) return q, count_q # --------------------------------------------------------------------------- # Package list query builder # --------------------------------------------------------------------------- def build_package_list_query( flagged: bool | None = None, ecosystem: str | None = None, repository: str | None = None, search: str | None = None, sort_by: str = "last_scanned_at", sort_dir: str = "desc", limit: int = 50, offset: int = 0, ): """Builds an aggregated package list query (distinct by name+version). Returns (query, total_count_query) — both awaiting execution. """ subq = select( Scan.package_name.label("pkg_name"), Scan.package_version.label("pkg_ver"), Scan.ecosystem, Scan.repository, func.max(Scan.started_at).label("last_scan"), func.max(Scan.flagged).label("is_flagged"), func.sum(Scan.total_findings).label("findings_sum"), func.max(Scan.id).label("sid"), ).group_by(Scan.package_name, Scan.package_version) if ecosystem: subq = subq.where(Scan.ecosystem == ecosystem) if repository: subq = subq.where(Scan.repository == repository) if search: pattern = f"%{search}%" subq = subq.where(Scan.package_name.ilike(pattern) | Scan.package_version.ilike(pattern)) if flagged is not None: subq = subq.having(func.max(Scan.flagged) == flagged) # Resolve sort field sort_field_name = PACKAGE_SORT_FIELDS.get(sort_by, "started_at") sort_col_from = getattr(Scan, sort_field_name, Scan.started_at) sort_col = func.max(sort_col_from) subq = subq.order_by(sort_col.desc() if sort_dir == "desc" else sort_col.asc()) sq = subq.subquery() total_q = select(func.count()).select_from(sq) rows_q = select(sq).offset(offset).limit(limit) return rows_q, total_q # --------------------------------------------------------------------------- # Dashboard stats (shared between API /stats and web dashboard) # --------------------------------------------------------------------------- async def get_dashboard_stats(session: AsyncSession) -> dict: """Return all dashboard statistics as a single dict.""" total_scans = await session.scalar(select(func.count(Scan.id))) flagged_scans = await session.scalar(select(func.count(Scan.id)).where(Scan.flagged == True)) recent_flagged = await session.scalar( select(func.count(Scan.id)).where( Scan.flagged == True, Scan.started_at >= func.datetime("now", f"-{RECENT_FLAGGED_DAYS} days"), ) ) total_findings = await session.scalar(select(func.count(Finding.id))) llm_analyzed = await session.scalar( select(func.count(Finding.id)).where( func.json_extract(Finding.report, "$.verdict").isnot(None) ) ) llm_pending = await session.scalar( select(func.count(Finding.id)).where(Finding.report.is_(None)) ) latest_flagged = ( ( await session.execute( select(Scan) .where(Scan.flagged == True) .order_by(Scan.started_at.desc()) .limit(DASHBOARD_LATEST_FLAGGED_LIMIT) ) ) .scalars() .all() ) latest_scans = ( ( await session.execute( select(Scan).order_by(Scan.started_at.desc()).limit(DASHBOARD_LATEST_SCANS_LIMIT) ) ) .scalars() .all() ) top_rules = ( await session.execute( select( func.json_extract(Finding.data, JSON_PATH_RULE).label("rule"), func.count(Finding.id).label("cnt"), ) .group_by(text("rule")) .order_by(text("cnt DESC")) .limit(TOP_RULES_LIMIT) ) ).all() return { "total_scans": total_scans or 0, "flagged_scans": flagged_scans or 0, "recent_flagged": recent_flagged or 0, "total_findings": total_findings or 0, "llm_analyzed": llm_analyzed or 0, "llm_pending": llm_pending or 0, "latest_flagged": latest_flagged, "latest_scans": latest_scans, "top_rules": [{"rule": r.rule, "count": r.cnt} for r in top_rules], "now": datetime.datetime.now(datetime.timezone.utc), }