#!/usr/bin/env python3
"""
Citation Manager - Stable Citation Key Management.

Provides frozen citation keys that remain stable even when document metadata changes.
This prevents breaking manuscript references when correcting titles, authors, or years.

Features:
- Generate citation keys from document metadata
- Freeze keys to prevent future changes
- Batch operations for freezing/unfreezing
- Citation key lookup and validation

Usage:
    python citation_manager.py --generate DOC_001
    python citation_manager.py --freeze DOC_001
    python citation_manager.py --freeze-all
    python citation_manager.py --list-frozen
    python citation_manager.py --unfreeze DOC_001

Citation Key Format:
    [AuthorYear] - e.g., [Steiner1894], [Jung1921]
    For unknown authors: [Unknown_Title1900]
    For no year: [Steiner_nd]
"""

import argparse
import sys
import re
import logging
from pathlib import Path
from datetime import datetime
from typing import Optional, List, Dict, Any, Tuple

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))

from db_utils import execute_query, get_db_connection

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


# =============================================================================
# CITATION KEY GENERATION
# =============================================================================

def generate_citation_key(
    author: str = None,
    title: str = None,
    year: int = None,
    existing_keys: List[str] = None
) -> str:
    """
    Generate a citation key from document metadata.

    Format: [AuthorYear] or [AuthorYear_a] for duplicates

    Args:
        author: Author name (will extract last name)
        title: Document title (used if no author)
        year: Publication year
        existing_keys: List of existing keys to avoid duplicates

    Returns:
        Citation key string like "[Steiner1894]"
    """
    existing_keys = existing_keys or []

    # Extract author component
    if author and author.lower() not in ('unknown', 'unknown author', 'anonymous'):
        # Get last name (handle "First Last" and "Last, First" formats)
        if ',' in author:
            author_key = author.split(',')[0].strip()
        else:
            parts = author.split()
            author_key = parts[-1] if parts else 'Unknown'

        # Clean up: remove non-alphanumeric except hyphens
        author_key = re.sub(r'[^a-zA-Z\-]', '', author_key)
    elif title:
        # Use first significant word of title
        words = [w for w in title.split() if len(w) > 3 and w.lower() not in
                ('the', 'and', 'for', 'with', 'from')]
        author_key = words[0] if words else 'Untitled'
        author_key = re.sub(r'[^a-zA-Z]', '', author_key)
    else:
        author_key = 'Unknown'

    # Year component
    year_key = str(year) if year else 'nd'

    # Build base key
    base_key = f"[{author_key}{year_key}]"

    # Handle duplicates
    if base_key not in existing_keys:
        return base_key

    # Add suffix for duplicates: a, b, c, ...
    for suffix in 'abcdefghijklmnopqrstuvwxyz':
        candidate = f"[{author_key}{year_key}_{suffix}]"
        if candidate not in existing_keys:
            return candidate

    # Fallback: add number
    for i in range(2, 100):
        candidate = f"[{author_key}{year_key}_{i}]"
        if candidate not in existing_keys:
            return candidate

    return base_key  # Give up on uniqueness


def get_document_citation_key(document_id: str) -> Tuple[str, bool]:
    """
    Get the citation key for a document, generating if needed.

    Returns:
        Tuple of (citation_key, is_frozen)
    """
    result = execute_query(
        """
        SELECT bibtex_key, bibtex_key_frozen, author, title, publication_year
        FROM documents
        WHERE document_id = %s
        """,
        (document_id,),
        fetch='one'
    )

    if not result:
        raise ValueError(f"Document not found: {document_id}")

    # If frozen key exists, return it
    if result['bibtex_key'] and result['bibtex_key_frozen']:
        return result['bibtex_key'], True

    # Generate key from metadata
    existing = get_all_citation_keys()
    key = generate_citation_key(
        author=result['author'],
        title=result['title'],
        year=result['publication_year'],
        existing_keys=[k for k in existing if k != result.get('bibtex_key')]
    )

    return key, False


def get_all_citation_keys() -> List[str]:
    """Get all existing citation keys."""
    results = execute_query(
        "SELECT bibtex_key FROM documents WHERE bibtex_key IS NOT NULL",
        fetch='all'
    )
    return [r['bibtex_key'] for r in results] if results else []


# =============================================================================
# CITATION KEY FREEZING
# =============================================================================

def freeze_citation_key(document_id: str, custom_key: str = None) -> str:
    """
    Freeze a citation key for a document.

    Once frozen, the key will not change even if metadata is updated.

    Args:
        document_id: Document to freeze
        custom_key: Optional custom key (otherwise generates from metadata)

    Returns:
        The frozen citation key
    """
    # Get or generate key
    if custom_key:
        key = custom_key if custom_key.startswith('[') else f"[{custom_key}]"
    else:
        key, already_frozen = get_document_citation_key(document_id)
        if already_frozen:
            logger.info(f"Document {document_id} already has frozen key: {key}")
            return key

    # Freeze the key
    execute_query(
        """
        UPDATE documents
        SET bibtex_key = %s,
            bibtex_key_frozen = TRUE,
            bibtex_key_frozen_at = CURRENT_TIMESTAMP
        WHERE document_id = %s
        """,
        (key, document_id)
    )

    logger.info(f"Frozen citation key for {document_id}: {key}")
    return key


def unfreeze_citation_key(document_id: str) -> None:
    """
    Unfreeze a citation key, allowing it to be regenerated.

    Args:
        document_id: Document to unfreeze
    """
    execute_query(
        """
        UPDATE documents
        SET bibtex_key_frozen = FALSE,
            bibtex_key_frozen_at = NULL
        WHERE document_id = %s
        """,
        (document_id,)
    )

    logger.info(f"Unfroze citation key for {document_id}")


def freeze_all_citation_keys() -> Dict[str, str]:
    """
    Freeze citation keys for all documents that don't have frozen keys.

    Returns:
        Dict mapping document_id to frozen key
    """
    # Get documents without frozen keys
    results = execute_query(
        """
        SELECT document_id, author, title, publication_year, bibtex_key
        FROM documents
        WHERE bibtex_key_frozen IS NOT TRUE
        ORDER BY author, publication_year, title
        """,
        fetch='all'
    )

    if not results:
        logger.info("No documents need citation key freezing")
        return {}

    existing_keys = get_all_citation_keys()
    frozen = {}

    for doc in results:
        # Generate unique key
        key = generate_citation_key(
            author=doc['author'],
            title=doc['title'],
            year=doc['publication_year'],
            existing_keys=existing_keys
        )

        # Freeze it
        execute_query(
            """
            UPDATE documents
            SET bibtex_key = %s,
                bibtex_key_frozen = TRUE,
                bibtex_key_frozen_at = CURRENT_TIMESTAMP
            WHERE document_id = %s
            """,
            (key, doc['document_id'])
        )

        frozen[doc['document_id']] = key
        existing_keys.append(key)

    logger.info(f"Frozen citation keys for {len(frozen)} documents")
    return frozen


def list_frozen_citations() -> List[Dict[str, Any]]:
    """
    List all documents with frozen citation keys.

    Returns:
        List of dicts with document info and citation keys
    """
    results = execute_query(
        """
        SELECT document_id, title, author, publication_year,
               bibtex_key, bibtex_key_frozen_at
        FROM documents
        WHERE bibtex_key_frozen = TRUE
        ORDER BY bibtex_key_frozen_at DESC
        """,
        fetch='all'
    )

    return [dict(r) for r in results] if results else []


def list_unfrozen_citations() -> List[Dict[str, Any]]:
    """
    List documents without frozen citation keys.

    Returns:
        List of dicts with document info
    """
    results = execute_query(
        """
        SELECT document_id, title, author, publication_year, bibtex_key
        FROM documents
        WHERE bibtex_key_frozen IS NOT TRUE
        ORDER BY author, title
        """,
        fetch='all'
    )

    return [dict(r) for r in results] if results else []


# =============================================================================
# CITATION KEY LOOKUP
# =============================================================================

def lookup_by_citation_key(key: str) -> Optional[Dict[str, Any]]:
    """
    Look up a document by its citation key.

    Args:
        key: Citation key (with or without brackets)

    Returns:
        Document dict or None
    """
    # Normalize key format
    if not key.startswith('['):
        key = f"[{key}]"

    result = execute_query(
        """
        SELECT document_id, title, author, publication_year,
               bibtex_key, bibtex_key_frozen
        FROM documents
        WHERE bibtex_key = %s
        """,
        (key,),
        fetch='one'
    )

    return dict(result) if result else None


def validate_citation_keys() -> Dict[str, List[str]]:
    """
    Validate all citation keys for issues.

    Returns:
        Dict with 'duplicates', 'missing', 'unfrozen' lists
    """
    # Find duplicates
    duplicates_result = execute_query(
        """
        SELECT bibtex_key, COUNT(*) as count
        FROM documents
        WHERE bibtex_key IS NOT NULL
        GROUP BY bibtex_key
        HAVING COUNT(*) > 1
        """,
        fetch='all'
    )
    duplicates = [r['bibtex_key'] for r in duplicates_result] if duplicates_result else []

    # Find documents without keys
    missing_result = execute_query(
        """
        SELECT document_id FROM documents WHERE bibtex_key IS NULL
        """,
        fetch='all'
    )
    missing = [r['document_id'] for r in missing_result] if missing_result else []

    # Find unfrozen keys
    unfrozen_result = execute_query(
        """
        SELECT document_id FROM documents
        WHERE bibtex_key IS NOT NULL AND bibtex_key_frozen IS NOT TRUE
        """,
        fetch='all'
    )
    unfrozen = [r['document_id'] for r in unfrozen_result] if unfrozen_result else []

    return {
        'duplicates': duplicates,
        'missing': missing,
        'unfrozen': unfrozen
    }


# =============================================================================
# CLI
# =============================================================================

def main():
    parser = argparse.ArgumentParser(
        description='Citation Key Manager - Stable citation keys for manuscripts',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Generate and display citation key for a document
  %(prog)s --generate DOC_001

  # Freeze a citation key (prevents changes)
  %(prog)s --freeze DOC_001

  # Freeze with custom key
  %(prog)s --freeze DOC_001 --key "[Steiner1894]"

  # Freeze all unfrozen documents
  %(prog)s --freeze-all

  # List all frozen citations
  %(prog)s --list-frozen

  # List documents needing freezing
  %(prog)s --list-unfrozen

  # Unfreeze a key (allow regeneration)
  %(prog)s --unfreeze DOC_001

  # Validate all citation keys
  %(prog)s --validate

  # Look up document by citation key
  %(prog)s --lookup "[Steiner1894]"
        """
    )

    # Actions
    parser.add_argument('--generate', metavar='DOC_ID',
                       help='Generate citation key for document')
    parser.add_argument('--freeze', metavar='DOC_ID',
                       help='Freeze citation key for document')
    parser.add_argument('--freeze-all', action='store_true',
                       help='Freeze keys for all unfrozen documents')
    parser.add_argument('--unfreeze', metavar='DOC_ID',
                       help='Unfreeze citation key for document')
    parser.add_argument('--list-frozen', action='store_true',
                       help='List all frozen citation keys')
    parser.add_argument('--list-unfrozen', action='store_true',
                       help='List documents without frozen keys')
    parser.add_argument('--lookup', metavar='KEY',
                       help='Look up document by citation key')
    parser.add_argument('--validate', action='store_true',
                       help='Validate all citation keys')

    # Options
    parser.add_argument('--key', metavar='KEY',
                       help='Custom citation key (use with --freeze)')
    parser.add_argument('--format', choices=['text', 'json'], default='text',
                       help='Output format')

    args = parser.parse_args()

    # Handle actions
    if args.generate:
        key, frozen = get_document_citation_key(args.generate)
        status = "frozen" if frozen else "generated"
        print(f"{args.generate}: {key} ({status})")

    elif args.freeze:
        key = freeze_citation_key(args.freeze, args.key)
        print(f"Frozen: {args.freeze} -> {key}")

    elif args.freeze_all:
        frozen = freeze_all_citation_keys()
        print(f"\nFroze {len(frozen)} citation keys:")
        for doc_id, key in frozen.items():
            print(f"  {doc_id}: {key}")

    elif args.unfreeze:
        unfreeze_citation_key(args.unfreeze)
        print(f"Unfroze citation key for {args.unfreeze}")

    elif args.list_frozen:
        citations = list_frozen_citations()
        print(f"\nFrozen Citation Keys ({len(citations)} documents):")
        print("=" * 70)
        for doc in citations:
            frozen_date = doc['bibtex_key_frozen_at'].strftime('%Y-%m-%d') if doc['bibtex_key_frozen_at'] else 'N/A'
            print(f"  {doc['bibtex_key']:20} | {doc['document_id']}")
            print(f"    {doc['title'][:50]}...")
            print(f"    Frozen: {frozen_date}")
            print()

    elif args.list_unfrozen:
        docs = list_unfrozen_citations()
        print(f"\nDocuments Without Frozen Keys ({len(docs)}):")
        print("=" * 70)
        for doc in docs:
            current_key = doc.get('bibtex_key') or '(none)'
            print(f"  {doc['document_id']}: {current_key}")
            print(f"    {doc['title'][:60]}...")

    elif args.lookup:
        doc = lookup_by_citation_key(args.lookup)
        if doc:
            print(f"\nDocument for {args.lookup}:")
            print(f"  ID: {doc['document_id']}")
            print(f"  Title: {doc['title']}")
            print(f"  Author: {doc['author']}")
            print(f"  Year: {doc['publication_year']}")
            print(f"  Frozen: {doc['bibtex_key_frozen']}")
        else:
            print(f"No document found for citation key: {args.lookup}")

    elif args.validate:
        issues = validate_citation_keys()
        print("\nCitation Key Validation:")
        print("=" * 50)

        if issues['duplicates']:
            print(f"\nDuplicate keys ({len(issues['duplicates'])}):")
            for key in issues['duplicates']:
                print(f"  {key}")

        if issues['missing']:
            print(f"\nMissing keys ({len(issues['missing'])}):")
            for doc_id in issues['missing'][:10]:
                print(f"  {doc_id}")
            if len(issues['missing']) > 10:
                print(f"  ... and {len(issues['missing']) - 10} more")

        if issues['unfrozen']:
            print(f"\nUnfrozen keys ({len(issues['unfrozen'])}):")
            for doc_id in issues['unfrozen'][:10]:
                print(f"  {doc_id}")
            if len(issues['unfrozen']) > 10:
                print(f"  ... and {len(issues['unfrozen']) - 10} more")

        if not any(issues.values()):
            print("All citation keys are valid and frozen!")

    else:
        parser.print_help()


if __name__ == '__main__':
    main()
