#!/usr/bin/env python3
import json
import os
import re
import time
import urllib.request
from pathlib import Path

ROUTER_LOG = Path("/Users/openclaw/event-router/router.log")
STATE_DIR = Path("/Users/openclaw/.openclaw/workspace/tmp/notion-webhook-watchdog")
STATE_PATH = STATE_DIR / "state.json"
HEALTH_PATH = STATE_DIR / "health.json"
ALERT_PATH = STATE_DIR / "alert.md"

REPEATED_FAIL_THRESHOLD = int(os.getenv("REPEATED_FAIL_THRESHOLD", "3"))
NO_SUCCESS_MINUTES = int(os.getenv("NO_SUCCESS_MINUTES", "90"))
SUSPENSION_HOURS = int(os.getenv("SUSPENSION_HOURS", "6"))
ROUTER_HEALTH_URL = os.getenv("ROUTER_HEALTH_URL", "http://127.0.0.1:8080/health")

SUCCESS_PATTERNS = [
    re.compile(r'openclawStatus":2\d\d'),
    re.compile(r'forwarded to OpenClaw: 2\d\d'),
    re.compile(r'\[notion\] selftest_forward .*openclawStatus":2\d\d'),
]
AUTH_FAIL_PATTERNS = [
    re.compile(r'openclawStatus":401'),
    re.compile(r'forwarded to OpenClaw: 401'),
]
UPSTREAM_5XX_PATTERNS = [
    re.compile(r'openclawStatus":5\d\d'),
    re.compile(r'forwarded to OpenClaw: 5\d\d'),
]


def read_json(path: Path, default):
    if not path.exists():
        return default
    try:
        return json.loads(path.read_text())
    except Exception:
        return default


def write_json(path: Path, data):
    path.write_text(json.dumps(data, indent=2) + "\n")


def check_router_reachable(url: str) -> bool:
    try:
        req = urllib.request.Request(url, method="GET")
        with urllib.request.urlopen(req, timeout=4) as resp:  # nosec B310
            return 200 <= int(resp.status) < 300
    except Exception:
        return False


def count_matches(text: str, patterns) -> int:
    return sum(len(p.findall(text)) for p in patterns)


def main():
    STATE_DIR.mkdir(parents=True, exist_ok=True)
    now = int(time.time())

    state = read_json(
        STATE_PATH,
        {
            "offset": 0,
            "last_success_epoch": None,
            "updated_at": None,
        },
    )

    offset = int(state.get("offset") or 0)
    if not ROUTER_LOG.exists():
        chunk = ""
        new_offset = 0
    else:
        size = ROUTER_LOG.stat().st_size
        if size < offset:
            offset = 0
        with ROUTER_LOG.open("rb") as f:
            f.seek(offset)
            raw = f.read()
        chunk = raw.decode("utf-8", errors="replace")
        new_offset = size

    success_count = count_matches(chunk, SUCCESS_PATTERNS)
    auth_fail_count = count_matches(chunk, AUTH_FAIL_PATTERNS)
    upstream_5xx_count = count_matches(chunk, UPSTREAM_5XX_PATTERNS)

    last_success_epoch = state.get("last_success_epoch")
    if success_count > 0:
        last_success_epoch = now

    # Bootstrap guard: on first run, avoid false suspension alerts if historical log
    # already contains successful forwards but no state exists yet.
    if last_success_epoch is None and ROUTER_LOG.exists():
        historical = ROUTER_LOG.read_text(errors="replace")
        if count_matches(historical, SUCCESS_PATTERNS) > 0:
            last_success_epoch = now

    minutes_since_success = None
    if last_success_epoch:
        minutes_since_success = max(0, int((now - int(last_success_epoch)) / 60))

    router_reachable = check_router_reachable(ROUTER_HEALTH_URL)

    repeated_failures = auth_fail_count + upstream_5xx_count
    repeated_failure_alert = repeated_failures >= REPEATED_FAIL_THRESHOLD
    no_success_alert = (
        minutes_since_success is None or minutes_since_success >= NO_SUCCESS_MINUTES
    )
    suspension_alert = (
        router_reachable
        and (minutes_since_success is None or minutes_since_success >= SUSPENSION_HOURS * 60)
    )

    level = "healthy"
    if repeated_failure_alert or no_success_alert:
        level = "warning"
    if suspension_alert:
        level = "critical"

    summary = {
        "service": "notion-webhook",
        "level": level,
        "time_epoch": now,
        "time_iso": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(now)),
        "router_reachable": router_reachable,
        "window": {
            "new_log_bytes": len(chunk.encode("utf-8")),
            "new_success_count": success_count,
            "new_auth_fail_count": auth_fail_count,
            "new_upstream_5xx_count": upstream_5xx_count,
        },
        "thresholds": {
            "repeated_fail_threshold": REPEATED_FAIL_THRESHOLD,
            "no_success_minutes": NO_SUCCESS_MINUTES,
            "suspension_hours": SUSPENSION_HOURS,
        },
        "state": {
            "last_success_epoch": last_success_epoch,
            "minutes_since_success": minutes_since_success,
            "repeated_failure_alert": repeated_failure_alert,
            "no_success_alert": no_success_alert,
            "suspension_alert": suspension_alert,
        },
        "instructions": [],
    }

    if repeated_failure_alert:
        summary["instructions"].append(
            "Repeated 401/5xx forwarding failures detected. Check hooks token sync and restart gateway + event-router."
        )
    if no_success_alert:
        summary["instructions"].append(
            "No successful webhook forwards in threshold window. Verify Notion events are arriving and router forwarding is healthy."
        )
    if suspension_alert:
        summary["instructions"].append(
            "Suspension sentinel: endpoint reachable but no successful deliveries for extended period. In Notion, verify webhook subscription is active and resume/recreate if suspended."
        )

    write_json(HEALTH_PATH, summary)

    alert_lines = [
        "# Notion Webhook Watchdog Alert",
        "",
        f"- Level: **{level.upper()}**",
        f"- Time (UTC): `{summary['time_iso']}`",
        f"- Router reachable: `{router_reachable}`",
        f"- New success count (since last run): `{success_count}`",
        f"- New auth failures 401 (since last run): `{auth_fail_count}`",
        f"- New upstream 5xx (since last run): `{upstream_5xx_count}`",
        f"- Minutes since last successful forward: `{minutes_since_success}`",
        "",
    ]
    if summary["instructions"]:
        alert_lines.append("## Action Required")
        for item in summary["instructions"]:
            alert_lines.append(f"- {item}")
    else:
        alert_lines.append("## Status")
        alert_lines.append("- No action required.")

    ALERT_PATH.write_text("\n".join(alert_lines) + "\n")

    state["offset"] = new_offset
    state["last_success_epoch"] = last_success_epoch
    state["updated_at"] = summary["time_iso"]
    write_json(STATE_PATH, state)


if __name__ == "__main__":
    main()
