All files / src/app/api/graph/validate route.ts

96.29% Statements 26/27
50% Branches 1/2
100% Functions 12/12
100% Lines 24/24

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213          1x                                   1x                                               1x                                                                       3x 3x   3x               3x                                                                 45x               30x                                                       75x           2x 2x   2x 30x     2x   2x   2x         2x     34x     1x           1x       1x             1x 1x      
import { NextResponse } from "next/server";
import { runQuery } from "@/lib/neo4j";
import { requireAuth, isAuthError } from "@/lib/auth-guard";
import { LABEL_LAYER } from "@/lib/graph-constants";
 
export const dynamic = "force-dynamic";
 
/**
 * GET /api/graph/validate
 *
 * Validates the Neo4j knowledge graph and returns a structured report:
 *
 *  nodeCounts    — count of nodes per label
 *  edgeCounts    — count of edges per relationship type
 *  orphans       — nodes with no relationships at all
 *  missingProps  — nodes missing required identifying properties
 *  unknownLabels — labels present in Neo4j but not in LABEL_LAYER mapping
 *  edgeRules     — edge type validation against allowed source→target pairs
 *  summary       — { totalNodes, totalEdges, issueCount }
 */
 
// Required identifying property per node label.
// A node is flagged if ALL listed properties are null/missing.
const REQUIRED_PROPS: Record<string, string[]> = {
  Participant: ["participantId", "name"],
  DataProduct: ["productId", "name"],
  Contract: ["contractId"],
  HDABApproval: ["approvalId"],
  HealthDataset: ["datasetId", "title"],
  Patient: ["resourceId"],
  Condition: ["resourceId", "code"],
  Observation: ["resourceId", "code"],
  OMOPPerson: ["personId"],
  SnomedConcept: ["conceptId"],
  LoincCode: ["loincNumber"],
  TrustCenter: ["name", "did"],
  SPESession: ["sessionId", "studyId"],
  ResearchPseudonym: ["rpsnId", "studyId"],
  VerifiableCredential: ["credentialId", "credentialType"],
};
 
// Valid edge types between label pairs (source → target).
// Only the most critical cross-layer relationships are listed here.
const VALID_EDGES: Array<{
  type: string;
  from: string;
  to: string;
}> = [
  // L1 Governance
  { type: "OFFERS", from: "Participant", to: "DataProduct" },
  { type: "GOVERNED_BY", from: "DataProduct", to: "OdrlPolicy" },
  { type: "HAS_CONTRACT", from: "DataProduct", to: "Contract" },
  { type: "APPROVED_BY", from: "Contract", to: "HDABApproval" },
  { type: "APPLIED_BY", from: "AccessApplication", to: "Participant" },
  // L1 → L2
  { type: "DESCRIBES", from: "DataProduct", to: "HealthDataset" },
  // L2 HealthDCAT-AP
  { type: "HAS_DISTRIBUTION", from: "HealthDataset", to: "Distribution" },
  { type: "CONFORMS_TO", from: "HealthDataset", to: "EEHRxFProfile" },
  // L3 FHIR
  { type: "HAS_CONDITION", from: "Patient", to: "Condition" },
  { type: "HAS_OBSERVATION", from: "Patient", to: "Observation" },
  { type: "HAS_ENCOUNTER", from: "Patient", to: "Encounter" },
  { type: "HAS_MEDICATION_REQUEST", from: "Patient", to: "MedicationRequest" },
  { type: "HAS_PROCEDURE", from: "Patient", to: "Procedure" },
  // L3 → L4 (FHIR to OMOP CDM)
  { type: "MAPS_TO", from: "Patient", to: "OMOPPerson" },
  { type: "MAPS_TO", from: "Condition", to: "OMOPConditionOccurrence" },
  // L3 → L5 (FHIR coding)
  { type: "CODED_BY", from: "Condition", to: "SnomedConcept" },
  { type: "CODED_BY", from: "Condition", to: "ICD10Code" },
  { type: "CODED_BY", from: "Observation", to: "LoincCode" },
  { type: "CODED_BY", from: "MedicationRequest", to: "RxNormConcept" },
  // Phase 18: Trust Center
  { type: "GOVERNED_BY", from: "TrustCenter", to: "HDABApproval" },
  { type: "RESOLVES_PSEUDONYMS_FOR", from: "TrustCenter", to: "HealthDataset" },
  { type: "MUTUALLY_RECOGNISES", from: "TrustCenter", to: "TrustCenter" },
  { type: "MANAGES", from: "TrustCenter", to: "SPESession" },
  { type: "LINKED_FROM", from: "ResearchPseudonym", to: "ProviderPseudonym" },
  { type: "USED_IN", from: "ResearchPseudonym", to: "SPESession" },
];
 
export async function GET() {
  const auth = await requireAuth();
  Iif (isAuthError(auth)) return auth;
 
  try {
    const [
      nodeLabelRows,
      edgeTypeRows,
      orphanRows,
      missingPropResults,
      unknownLabelRows,
      edgeValidationRows,
    ] = await Promise.all([
      // 1. Count nodes per label
      runQuery<{ label: string; count: number }>(
        `MATCH (n)
         UNWIND labels(n) AS lbl
         WITH lbl, count(*) AS cnt
         ORDER BY cnt DESC
         RETURN lbl AS label, cnt AS count`,
      ),
 
      // 2. Count edges per relationship type
      runQuery<{ type: string; count: number }>(
        `MATCH ()-[r]->()
         WITH type(r) AS t, count(*) AS cnt
         ORDER BY cnt DESC
         RETURN t AS type, cnt AS count`,
      ),
 
      // 3. Find orphan nodes (no relationships at all)
      runQuery<{ label: string; name: string; id: string }>(
        `MATCH (n)
         WHERE NOT (n)--()
           AND any(l IN labels(n) WHERE l IN $knownLabels)
         RETURN labels(n)[0] AS label,
                coalesce(n.name, n.id, elementId(n)) AS name,
                elementId(n) AS id
         LIMIT 50`,
        { knownLabels: Object.keys(LABEL_LAYER) },
      ),
 
      // 4. Missing required properties — one query per label
      Promise.all(
        Object.entries(REQUIRED_PROPS).map(async ([label, props]) => {
          const rows = await runQuery<{ id: string; name: string }>(
            `MATCH (n:\`${label}\`)
             WHERE all(p IN $props WHERE n[p] IS NULL)
             RETURN elementId(n) AS id,
                    coalesce(n.name, n.id, elementId(n)) AS name
             LIMIT 20`,
            { props },
          );
          return { label, missing: rows, requiredProps: props };
        }),
      ),
 
      // 5. Labels in Neo4j that are NOT in our LABEL_LAYER mapping
      runQuery<{ label: string; count: number }>(
        `MATCH (n)
         UNWIND labels(n) AS lbl
         WITH lbl, count(*) AS cnt
         WHERE NOT lbl IN $knownLabels
         RETURN lbl AS label, cnt AS count
         ORDER BY cnt DESC`,
        { knownLabels: Object.keys(LABEL_LAYER) },
      ),
 
      // 6. Check edge validity: count edges whose types are in VALID_EDGES
      //    and report any edge type NOT in our expected set
      runQuery<{ type: string; count: number }>(
        `MATCH (a)-[r]->(b)
         WHERE any(l IN labels(a) WHERE l IN $knownLabels)
           AND any(l IN labels(b) WHERE l IN $knownLabels)
           AND NOT type(r) IN $validTypes
         WITH type(r) AS t, count(*) AS cnt
         ORDER BY cnt DESC
         RETURN t AS type, cnt AS count
         LIMIT 30`,
        {
          knownLabels: Object.keys(LABEL_LAYER),
          validTypes: VALID_EDGES.map((e) => e.type),
        },
      ),
    ]);
 
    // Summarise
    const totalNodes = nodeLabelRows.reduce((s, r) => s + Number(r.count), 0);
    const totalEdges = edgeTypeRows.reduce((s, r) => s + Number(r.count), 0);
 
    const missingPropIssues = missingPropResults.filter(
      (r) => r.missing.length > 0,
    );
    const issueCount =
      orphanRows.length + missingPropIssues.length + edgeValidationRows.length;
 
    return NextResponse.json({
      summary: { totalNodes, totalEdges, issueCount },
      nodeCounts: nodeLabelRows.map((r) => ({
        label: r.label,
        count: Number(r.count),
        known: r.label in LABEL_LAYER,
      })),
      edgeCounts: edgeTypeRows.map((r) => ({
        type: r.type,
        count: Number(r.count),
        defined: VALID_EDGES.some((e) => e.type === r.type),
      })),
      orphans: orphanRows,
      missingProps: missingPropIssues.map((r) => ({
        label: r.label,
        requiredProps: r.requiredProps,
        count: r.missing.length,
        sample: r.missing.slice(0, 5),
      })),
      unknownLabels: unknownLabelRows.map((r) => ({
        label: r.label,
        count: Number(r.count),
      })),
      unexpectedEdgeTypes: edgeValidationRows.map((r) => ({
        type: r.type,
        count: Number(r.count),
      })),
      validEdgeRules: VALID_EDGES,
    });
  } catch (err) {
    console.error("GET /api/graph/validate error:", err);
    return NextResponse.json({ error: "Neo4j unavailable" }, { status: 502 });
  }
}