#!/bin/bash
set -euo pipefail
export PATH="/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin"
LOG_DIR="/Users/openclaw/.openclaw/workspace/tmp"
LOG_FILE="$LOG_DIR/ops-sentinel.log"
mkdir -p "$LOG_DIR"
ts=$(date '+%Y-%m-%d %H:%M:%S %Z')
issues=()
summary=()

# Gateway status
gateway_status=$(openclaw gateway status)
runtime_line=$(echo "$gateway_status" | grep '^Runtime:' || true)
rpc_line=$(echo "$gateway_status" | grep '^RPC probe:' || true)
pid_line=$(echo "$gateway_status" | sed -nE 's/.*Runtime:.*pid ([0-9]+).*/\1/p' | head -n1)
summary+=("runtime=${runtime_line#Runtime: }")
summary+=("rpc=${rpc_line#RPC probe: }")
summary+=("pid=${pid_line:-unknown}")

if [[ -n "$rpc_line" && "$rpc_line" != *"ok"* ]]; then
  issues+=("Gateway RPC probe != ok")
fi
if [[ -n "$runtime_line" && "$runtime_line" != *"running"* ]]; then
  issues+=("Gateway runtime not running")
fi

# Worker count
worker_count=$(openclaw sessions --agent worker 2>/dev/null | grep -c 'worker:task-' || true)
summary+=("workers=$worker_count")
if [[ $worker_count -gt 20 ]]; then
  issues+=("Worker sessions high ($worker_count)")
fi

# Canonical session check
canonical_count=$(openclaw sessions 2>/dev/null | grep -c 'agent:main:main' || true)
summary+=("canonical=$canonical_count")
if [[ $canonical_count -eq 0 ]]; then
  issues+=("agent:dorian:main missing from session store")
fi

printf '%s | %s\n' "$ts" "${summary[*]}" >> "$LOG_FILE"

if [[ ${#issues[@]} -gt 0 ]]; then
  msg="Sentinel alert @ $ts: ${issues[*]} (rpc=${rpc_line#RPC probe: }, workers=$worker_count, canonical=$canonical_count)"
  openclaw system event --mode now --text "$msg"
fi
