#!/usr/bin/env python3
import re
from typing import Dict, Optional

LINKEDIN_SENDERS = {
    'jobalerts-noreply@linkedin.com': 'linkedin_alert',
    'messages-noreply@linkedin.com': 'linkedin_message',
    'inmail-hit-reply@linkedin.com': 'linkedin_inmail',
    'jobs-listings@linkedin.com': 'linkedin_alert',
}


def clean(text: str) -> str:
    if not text:
        return ''
    text = text.replace('\\>', '>')
    text = text.replace('\r', '')
    return text.strip()


def extract_forwarded_header(email_text: str, header: str) -> Optional[str]:
    email_text = clean(email_text)
    m = re.search(rf'^>\s*{re.escape(header)}:\s*(.+)$', email_text, re.MULTILINE | re.IGNORECASE)
    return m.group(1).strip() if m else None


def extract_email(text: str) -> Optional[str]:
    if not text:
        return None
    m = re.search(r'([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,})', text, re.I)
    return m.group(1).lower() if m else None


def classify_record(subject: str, email_text: str, from_email: str = '') -> str:
    subject = clean(subject).lower()
    email_text = clean(email_text)
    forwarded_from = extract_forwarded_header(email_text, 'From') or ''
    sender = extract_email(forwarded_from) or (from_email or '').lower()
    if sender in LINKEDIN_SENDERS:
        kind = LINKEDIN_SENDERS[sender]
        if kind == 'linkedin_inmail':
            return 'recruiter_inmail'
        return 'linkedin_alert'
    if 'linkedin' in sender or 'linkedin' in subject or 'linkedin' in email_text.lower():
        if 'inmail' in subject or 'recruiter' in email_text.lower():
            return 'recruiter_inmail'
        return 'linkedin_alert'
    return 'non_linkedin'


def extract_job_alert_fields(subject: str, email_text: str) -> Dict[str, Optional[str]]:
    text = clean(email_text)
    out = {
        'role_title': None,
        'company': None,
        'job_url': None,
        'posted_date': None,
        'salary': None,
        'linkedin_job_id': None,
        'message_snippet': None,
    }

    date_match = re.search(r'>\s*Date:\s*(.+)$', text, re.MULTILINE)
    if date_match:
        out['posted_date'] = date_match.group(1).strip()

    salary = re.search(r'(\$[\d,]+(?:\s*-\s*\$[\d,]+)?(?:\s*/\s*(?:yr|year|hour))?)', text, re.I)
    if salary:
        out['salary'] = salary.group(1).strip()

    job_id = re.search(r'(?:job\s*id|jobId|currentJobId|jk=|job/view/)([A-Za-z0-9\-]{6,})', text, re.I)
    if job_id:
        out['linkedin_job_id'] = job_id.group(1).strip()

    url = re.search(r'https?://[^\s)]+', text)
    if url:
        out['job_url'] = url.group(0)

    lines = [re.sub(r'^>\s*', '', ln).strip() for ln in text.splitlines()]
    lines = [ln for ln in lines if ln]

    subject_clean = re.sub(r'^Fwd:\s*', '', subject, flags=re.I).strip()
    m = re.search(r'New jobs similar to (.+?) at (.+)$', subject_clean, re.I)
    if m:
        out['role_title'] = m.group(1).strip()
        out['company'] = m.group(2).strip()
    else:
        m = re.search(r'New Jobs Based on Your Title [–-] (.+)$', subject_clean, re.I)
        if m:
            out['role_title'] = m.group(1).strip()

    if not out['role_title']:
        for i, ln in enumerate(lines):
            if re.fullmatch(r'[A-Z][A-Za-z0-9 &/,()\-]{3,80}', ln):
                nxt = lines[i+1] if i+1 < len(lines) else ''
                if re.search(r'\$|Remote|,\s*[A-Z]{2}|Hybrid|On-site', nxt, re.I):
                    out['role_title'] = ln.strip()
                    out['message_snippet'] = f'{ln} | {nxt}'.strip(' |')
                    break

    if not out['company'] and ' at ' in subject_clean:
        parts = subject_clean.rsplit(' at ', 1)
        if len(parts) == 2:
            out['company'] = parts[1].strip()

    return out


def extract_inmail_fields(subject: str, email_text: str) -> Dict[str, Optional[str]]:
    text = clean(email_text)
    out = {
        'recruiter_name': None,
        'company': None,
        'message_snippet': None,
    }
    recruiter = re.search(r'(?:From:|Recruiter:|sent you a message[:\s])\s*([A-Z][A-Za-z .\'-]+)', text, re.I)
    if recruiter:
        out['recruiter_name'] = recruiter.group(1).strip()
    company = re.search(r'(?:at|with)\s+([A-Z][A-Za-z0-9 .,&\-]+)', text)
    if company:
        out['company'] = company.group(1).strip()
    snippet = re.search(r'(?:message|InMail)[:\s]+(.{20,300})', text, re.I | re.S)
    if snippet:
        out['message_snippet'] = ' '.join(snippet.group(1).split())[:300]
    return out


def parse_record(subject: str, email_text: str, from_email: str = '') -> Dict[str, object]:
    classification = classify_record(subject, email_text, from_email)
    if classification == 'linkedin_alert':
        fields = extract_job_alert_fields(subject, email_text)
        return {
            'classification': classification,
            'status': 'New',
            'source': 'LinkedIn Alert',
            'strategy': None,
            'fields': fields,
        }
    if classification == 'recruiter_inmail':
        fields = extract_inmail_fields(subject, email_text)
        return {
            'classification': classification,
            'status': 'Inbound',
            'source': 'Inbound Recruiter',
            'strategy': 'Recruiter Response',
            'fields': fields,
        }
    return {
        'classification': classification,
        'status': None,
        'source': None,
        'strategy': None,
        'fields': {},
    }


if __name__ == '__main__':
    sample_subject = 'Fwd: New jobs posted from jobsearch.grifols.com'
    sample_email = '''Begin forwarded message:\n\n> From: Grifols-jobnotification@noreply12.jobs2web.com\n> Subject: New jobs posted from jobsearch.grifols.com\n> Date: January 27, 2026 at 10:54:02 AM PST\n> To: Braden McLeish <bradenmcleish@icloud.com>\n>\n> Your Job Agent matched the following jobs.\n>\n> QO Associate - CA-Los Angeles, US\n'''
    import json
    print(json.dumps(parse_record(sample_subject, sample_email, 'bradenmcleish@icloud.com'), indent=2))