feat: add governance-audit hook — threat detection for Copilot sessions

Add real-time governance audit hook that scans prompts for threat signals: - 5 threat categories: data exfiltration, privilege escalation, system destruction, prompt injection, credential exposure - 4 governance levels: open, standard, strict, locked - Append-only JSON audit trail (logs/copilot/governance/audit.log) - Session summary with threat counts at session end - Privacy-aware: logs decisions and metadata, never prompt content Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-20 02:15:12 +00:00 · 2026-02-18 13:29:41 -08:00
parent 8480453512
commit 4a4b9343d5
6 changed files with 330 additions and 0 deletions
--- a/hooks/governance-audit/audit-prompt.sh
+++ b/hooks/governance-audit/audit-prompt.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+# Governance Audit: Scan user prompts for threat signals before agent processing
+#
+# Environment variables:
+#   GOVERNANCE_LEVEL - "open", "standard", "strict", "locked" (default: standard)
+#   BLOCK_ON_THREAT  - "true" to exit non-zero on threats (default: false)
+#   SKIP_GOVERNANCE_AUDIT - "true" to disable (default: unset)
+
+set -euo pipefail
+
+if [[ "${SKIP_GOVERNANCE_AUDIT:-}" == "true" ]]; then
+  exit 0
+fi
+
+INPUT=$(cat)
+
+mkdir -p logs/copilot/governance
+
+TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+LEVEL="${GOVERNANCE_LEVEL:-standard}"
+BLOCK="${BLOCK_ON_THREAT:-false}"
+LOG_FILE="logs/copilot/governance/audit.log"
+
+# Extract prompt text from Copilot input (JSON with userMessage field)
+PROMPT=""
+if command -v jq &>/dev/null; then
+  PROMPT=$(echo "$INPUT" | jq -r '.userMessage // .prompt // empty' 2>/dev/null || echo "")
+fi
+if [[ -z "$PROMPT" ]]; then
+  PROMPT="$INPUT"
+fi
+
+# Threat detection patterns organized by category
+# Each pattern has: category, description, severity (0.0-1.0)
+THREATS_FOUND=()
+
+check_pattern() {
+  local pattern="$1"
+  local category="$2"
+  local severity="$3"
+  local description="$4"
+
+  if echo "$PROMPT" | grep -qiE "$pattern"; then
+    local evidence
+    evidence=$(echo "$PROMPT" | grep -oiE "$pattern" | head -1)
+    THREATS_FOUND+=("$category:$severity:$description:$evidence")
+  fi
+}
+
+# Data exfiltration signals
+check_pattern "send\s+(all|every|entire)\s+\w+\s+to\s+" "data_exfiltration" "0.8" "Bulk data transfer"
+check_pattern "export\s+.*\s+to\s+(external|outside|third.?party)" "data_exfiltration" "0.9" "External export"
+check_pattern "curl\s+.*\s+-d\s+" "data_exfiltration" "0.7" "HTTP POST with data"
+check_pattern "upload\s+.*\s+(credentials|secrets|keys)" "data_exfiltration" "0.95" "Credential upload"
+
+# Privilege escalation signals
+check_pattern "(sudo|as\s+root|admin\s+access|runas\s+/user)" "privilege_escalation" "0.8" "Elevated privileges"
+check_pattern "chmod\s+777" "privilege_escalation" "0.9" "World-writable permissions"
+check_pattern "add\s+.*\s+(sudoers|administrators)" "privilege_escalation" "0.95" "Adding admin access"
+
+# System destruction signals
+check_pattern "(rm\s+-rf\s+/|del\s+/[sq]|format\s+c:)" "system_destruction" "0.95" "Destructive command"
+check_pattern "(drop\s+database|truncate\s+table|delete\s+from\s+\w+\s*$)" "system_destruction" "0.9" "Database destruction"
+check_pattern "wipe\s+(all|entire|every)" "system_destruction" "0.9" "Mass deletion"
+
+# Prompt injection signals
+check_pattern "ignore\s+(previous|above|all)\s+(instructions?|rules?|prompts?)" "prompt_injection" "0.9" "Instruction override"
+check_pattern "you\s+are\s+now\s+(a|an)\s+" "prompt_injection" "0.7" "Role reassignment"
+check_pattern "system\s*:\s*" "prompt_injection" "0.6" "System prompt injection"
+
+# Credential exposure signals
+check_pattern "(api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*['\"]?\w{8,}" "credential_exposure" "0.9" "Hardcoded credential"
+check_pattern "(aws_access_key|AKIA[0-9A-Z]{16})" "credential_exposure" "0.95" "AWS key exposure"
+
+# Log the prompt event
+if [[ ${#THREATS_FOUND[@]} -gt 0 ]]; then
+  # Build threats JSON array
+  THREATS_JSON="["
+  FIRST=true
+  MAX_SEVERITY="0.0"
+  for threat in "${THREATS_FOUND[@]}"; do
+    IFS=':' read -r category severity description evidence <<< "$threat"
+
+    if [[ "$FIRST" != "true" ]]; then
+      THREATS_JSON+=","
+    fi
+    FIRST=false
+
+    THREATS_JSON+=$(jq -Rn \
+      --arg cat "$category" \
+      --arg sev "$severity" \
+      --arg desc "$description" \
+      --arg ev "$evidence" \
+      '{"category":$cat,"severity":($sev|tonumber),"description":$desc,"evidence":$ev}')
+
+    # Track max severity
+    if (( $(echo "$severity > $MAX_SEVERITY" | bc -l 2>/dev/null || echo 0) )); then
+      MAX_SEVERITY="$severity"
+    fi
+  done
+  THREATS_JSON+="]"
+
+  jq -Rn \
+    --arg timestamp "$TIMESTAMP" \
+    --arg level "$LEVEL" \
+    --argjson threats "$THREATS_JSON" \
+    --argjson count "${#THREATS_FOUND[@]}" \
+    '{"timestamp":$timestamp,"event":"threat_detected","governance_level":$level,"threat_count":$count,"threats":$threats}' \
+    >> "$LOG_FILE"
+
+  echo "⚠️ Governance: ${#THREATS_FOUND[@]} threat signal(s) detected"
+  for threat in "${THREATS_FOUND[@]}"; do
+    IFS=':' read -r category severity description evidence <<< "$threat"
+    echo "  🔴 [$category] $description (severity: $severity)"
+  done
+
+  # In strict/locked mode or when BLOCK_ON_THREAT is true, exit non-zero to block
+  if [[ "$BLOCK" == "true" ]] || [[ "$LEVEL" == "strict" ]] || [[ "$LEVEL" == "locked" ]]; then
+    echo "🚫 Prompt blocked by governance policy (level: $LEVEL)"
+    exit 1
+  fi
+else
+  jq -Rn \
+    --arg timestamp "$TIMESTAMP" \
+    --arg level "$LEVEL" \
+    '{"timestamp":$timestamp,"event":"prompt_scanned","governance_level":$level,"status":"clean"}' \
+    >> "$LOG_FILE"
+fi
+
+exit 0