#!/usr/bin/env python3
"""
Bee intermediary triage pipeline.

Purpose:
- Prevent duplicate / low-practicality Bee task suggestions from going directly into Notion Tasks.
- Stage suggestions locally, score + dedupe them, and only promote explicitly approved items.

Usage:
  # 1) Ingest a Bee payload JSON file into local staging
  python3 scripts/bee_triage.py ingest --input bee_payload.json

  # 2) Build triage queue (dedupe + scoring)
  python3 scripts/bee_triage.py triage

  # 3) Manually mark approved rows in tmp/bee_review_queue.jsonl
  #    set "status": "Approved"

  # 4) Promote approved rows into Notion Tasks (idempotent by dedupe key)
  python3 scripts/bee_triage.py promote --apply
"""

from __future__ import annotations

import argparse
import hashlib
import json
import os
import re
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, Iterable, List

ROOT = Path("/Users/openclaw/.openclaw/workspace")
TMP = ROOT / "tmp"
STAGING = TMP / "bee_staging.jsonl"
QUEUE = TMP / "bee_review_queue.jsonl"
IGNORE = TMP / "bee_ignore.jsonl"

NOTION_VERSION = "2025-09-03"
TASKS_DB_ID = os.environ.get("NOTION_TASKS_DB_ID", "5397f31d-9431-48f4-ab6a-5d75b9289ff7")


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def load_json(path: Path) -> Dict:
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)


def append_jsonl(path: Path, rows: Iterable[Dict]) -> int:
    path.parent.mkdir(parents=True, exist_ok=True)
    count = 0
    with path.open("a", encoding="utf-8") as f:
        for row in rows:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")
            count += 1
    return count


def read_jsonl(path: Path) -> List[Dict]:
    if not path.exists():
        return []
    out = []
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            out.append(json.loads(line))
    return out


def ignored_keys() -> set[str]:
    keys = set()
    for row in read_jsonl(IGNORE):
        key = str(row.get("dedupe_key") or "").strip()
        if key:
            keys.add(key)
    return keys


def normalize_text(text: str) -> str:
    t = (text or "").lower().strip()
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[^a-z0-9 ]", "", t)
    return t


def dedupe_key(source_id: str, text: str) -> str:
    return "bee:" + hashlib.sha1(f"{source_id}|{normalize_text(text)}".encode()).hexdigest()[:16]


def actionability_score(text: str) -> int:
    """Simple heuristic score (0-100)."""
    t = normalize_text(text)
    score = 30
    verbs = [
        "buy", "source", "set up", "setup", "load", "follow up", "evaluate",
        "review", "install", "remove", "call", "send", "schedule", "clean",
    ]
    if any(v in t for v in verbs):
        score += 25
    if len(t.split()) >= 5:
        score += 15
    if re.search(r"\b(today|tomorrow|this week|daily|weekly|by )\b", t):
        score += 10
    vague = ["maybe", "kind of", "should", "could", "not sure", "if possible"]
    if any(v in t for v in vague):
        score -= 20
    if len(t) < 20:
        score -= 25
    return max(0, min(100, score))


def extract_action_items(payload: Dict) -> List[str]:
    # Preferred structured path
    if isinstance(payload.get("action_items"), list):
        return [str(x).strip() for x in payload["action_items"] if str(x).strip()]

    # Fallback from markdown summary blocks
    summary = str(payload.get("summary") or "")
    items = []
    in_actions = False
    for line in summary.splitlines():
        raw = line.strip()
        if not raw:
            continue
        if raw.lower().startswith("## action items"):
            in_actions = True
            continue
        if in_actions and raw.startswith("## "):
            break
        if in_actions and raw.startswith("-"):
            items.append(raw.lstrip("- ").strip())
    return items


def notion_key() -> str:
    key_path = os.path.expanduser("~/.config/notion/api_key")
    return open(key_path, "r", encoding="utf-8").read().strip()


def notion_request(method: str, path: str, payload=None) -> Dict:
    key = notion_key()
    data = json.dumps(payload).encode("utf-8") if payload is not None else None
    req = urllib.request.Request(
        f"https://api.notion.com/v1{path}",
        data=data,
        method=method,
        headers={
            "Authorization": f"Bearer {key}",
            "Notion-Version": NOTION_VERSION,
            "Content-Type": "application/json",
        },
    )
    with urllib.request.urlopen(req, timeout=45) as r:
        return json.loads(r.read().decode("utf-8"))


def task_exists_by_desc_marker(marker: str) -> bool:
    # Query first 100 tasks and check Description for marker; light idempotency.
    page = notion_request("POST", "/data_sources/98ac8c59-4c40-4672-a628-2c35392eecfa/query", {"page_size": 100})
    for row in page.get("results", []):
        desc = "".join(
            x.get("plain_text", "")
            for x in row.get("properties", {}).get("Description", {}).get("rich_text", [])
        )
        if marker in desc:
            return True
    return False


def create_task_in_notion(
    title: str,
    project_page_id: str,
    label: str,
    marker: str,
    context_text: str,
    source_id: str,
    actionability_score: int,
    dry_run: bool,
) -> str:
    if dry_run:
        return "dry-run"

    desc_parts = [marker]
    if source_id:
        desc_parts.append(f"Bee Source: {source_id}")
    if isinstance(actionability_score, int):
        desc_parts.append(f"Actionability Score: {actionability_score}")
    if context_text:
        desc_parts.append("Context:")
        desc_parts.append(context_text[:1400])

    properties = {
        "Name": {"title": [{"type": "text", "text": {"content": title[:1900]}}]},
        "Section": {"select": {"name": "Process"}},
        "Labels": {"multi_select": [{"name": label}]},
        "Description": {
            "rich_text": [
                {"type": "text", "text": {"content": "\n\n".join(desc_parts)[:1900]}}
            ]
        },
    }

    # Leave project empty when no project id is provided (routes to Inbox views).
    if project_page_id:
        properties["Project"] = {"relation": [{"id": project_page_id}]}

    body = {
        "parent": {"database_id": TASKS_DB_ID},
        "properties": properties,
    }
    row = notion_request("POST", "/pages", body)
    return row.get("url", "")


def cmd_ingest(args):
    payload = load_json(Path(args.input))
    source_id = str(payload.get("source_id") or payload.get("id") or hashlib.sha1(json.dumps(payload, sort_keys=True).encode()).hexdigest()[:16])
    action_items = extract_action_items(payload)
    context_text = str(payload.get("summary") or payload.get("context") or "").strip()
    source_confirmed = bool(payload.get("confirmed", False))

    rows = []
    for item in action_items:
        rows.append(
            {
                "ingested_at": now_iso(),
                "source_id": source_id,
                "title": item,
                "context": context_text,
                "source_confirmed": source_confirmed,
                "normalized": normalize_text(item),
                "dedupe_key": dedupe_key(source_id, item),
                "status": "Raw",
            }
        )

    n = append_jsonl(STAGING, rows)
    print(json.dumps({"ingested": n, "staging": str(STAGING)}, indent=2))


def cmd_triage(args):
    raw = read_jsonl(STAGING)
    seen = set()
    ignored = ignored_keys()
    queue = []
    skipped_ignored = 0

    for r in raw:
        key = r.get("dedupe_key")
        norm = r.get("normalized", "")
        if key in seen or not norm:
            continue
        seen.add(key)
        if key in ignored:
            skipped_ignored += 1
            continue

        score = actionability_score(r.get("title", ""))
        source_confirmed = bool(r.get("source_confirmed", False))
        if score < args.min_score:
            status = "Rejected"
            reason = "low_actionability"
        elif source_confirmed:
            # Bee confirmed=true is treated as an allow signal, but still subject
            # to minimum quality filtering (score threshold).
            status = "Approved"
            reason = "auto_approved_confirmed_source"
        else:
            status = "Candidate"
            reason = ""

        queue.append(
            {
                "reviewed_at": now_iso(),
                "source_id": r.get("source_id"),
                "title": r.get("title"),
                "context": r.get("context", ""),
                "source_confirmed": source_confirmed,
                "dedupe_key": key,
                "actionability_score": score,
                "status": status,
                "reason": reason,
            }
        )

    QUEUE.parent.mkdir(parents=True, exist_ok=True)
    with QUEUE.open("w", encoding="utf-8") as f:
        for row in queue:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")

    print(json.dumps({
        "queue_rows": len(queue),
        "approved": sum(1 for x in queue if x["status"] == "Approved"),
        "candidate": sum(1 for x in queue if x["status"] == "Candidate"),
        "rejected": sum(1 for x in queue if x["status"] == "Rejected"),
        "skipped_ignored": skipped_ignored,
        "queue": str(QUEUE),
        "ignore": str(IGNORE),
    }, indent=2))


def cmd_promote(args):
    rows = read_jsonl(QUEUE)
    approved = [r for r in rows if str(r.get("status", "")).lower() == "approved"]

    created = 0
    skipped = 0
    items = []

    for r in approved:
        marker = f"[BEE_DEDUPE:{r['dedupe_key']}]"
        if task_exists_by_desc_marker(marker):
            skipped += 1
            items.append({"title": r["title"], "status": "skipped_exists"})
            continue

        url = create_task_in_notion(
            title=r["title"],
            project_page_id=args.project_id,
            label=args.label,
            marker=marker,
            context_text=str(r.get("context", "")),
            source_id=str(r.get("source_id", "")),
            actionability_score=int(r.get("actionability_score", 0) or 0),
            dry_run=not args.apply,
        )
        created += 1
        items.append({"title": r["title"], "status": "created" if args.apply else "would_create", "url": url})

    print(json.dumps({
        "approved_rows": len(approved),
        "created": created,
        "skipped_existing": skipped,
        "apply": args.apply,
        "items": items,
    }, indent=2))


def cmd_ignore(args):
    key = ""
    if args.dedupe_key:
        key = args.dedupe_key.strip()
    elif args.source_id and args.title:
        key = dedupe_key(str(args.source_id).strip(), str(args.title).strip())

    if not key:
        raise SystemExit("Provide --dedupe-key OR both --source-id and --title")

    row = {
        "ignored_at": now_iso(),
        "dedupe_key": key,
        "source_id": str(args.source_id or "").strip(),
        "title": str(args.title or "").strip(),
        "reason": str(args.reason or "manual_ignore").strip(),
    }
    append_jsonl(IGNORE, [row])
    print(json.dumps({"ignored": key, "ignore": str(IGNORE)}, indent=2))


def cmd_unignore(args):
    key = str(args.dedupe_key or "").strip()
    if not key:
        raise SystemExit("Provide --dedupe-key")
    rows = [r for r in read_jsonl(IGNORE) if str(r.get("dedupe_key", "")).strip() != key]
    IGNORE.parent.mkdir(parents=True, exist_ok=True)
    with IGNORE.open("w", encoding="utf-8") as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")
    print(json.dumps({"unignored": key, "remaining": len(rows), "ignore": str(IGNORE)}, indent=2))


def cmd_list_ignored(args):
    rows = read_jsonl(IGNORE)
    if args.limit and args.limit > 0:
        rows = rows[-args.limit:]
    print(json.dumps({"count": len(rows), "items": rows}, indent=2))


def main():
    ap = argparse.ArgumentParser()
    sub = ap.add_subparsers(dest="cmd", required=True)

    p1 = sub.add_parser("ingest")
    p1.add_argument("--input", required=True, help="Bee payload JSON file")
    p1.set_defaults(func=cmd_ingest)

    p2 = sub.add_parser("triage")
    p2.add_argument("--min-score", type=int, default=55)
    p2.set_defaults(func=cmd_triage)

    p3 = sub.add_parser("promote")
    p3.add_argument("--project-id", default="", help="Optional Notion project page id; leave empty for Inbox")
    p3.add_argument("--label", default="open_time")
    p3.add_argument("--apply", action="store_true")
    p3.set_defaults(func=cmd_promote)

    p4 = sub.add_parser("ignore")
    p4.add_argument("--dedupe-key", default="")
    p4.add_argument("--source-id", default="")
    p4.add_argument("--title", default="")
    p4.add_argument("--reason", default="manual_ignore")
    p4.set_defaults(func=cmd_ignore)

    p5 = sub.add_parser("unignore")
    p5.add_argument("--dedupe-key", required=True)
    p5.set_defaults(func=cmd_unignore)

    p6 = sub.add_parser("ignored")
    p6.add_argument("--limit", type=int, default=50)
    p6.set_defaults(func=cmd_list_ignored)

    args = ap.parse_args()
    args.func(args)


if __name__ == "__main__":
    main()
