#!/usr/bin/env python3
"""
Citation Generator - Generate formatted citations and bibliographies.

Supports multiple citation styles:
- Chicago (default)
- APA
- MLA
- Harvard
- BibTeX

Usage:
    python generate_citations.py --document DOC_ID
    python generate_citations.py --document DOC_ID --style apa
    python generate_citations.py --all --output bibliography.md
    python generate_citations.py --from-search "consciousness" --style chicago
    python generate_citations.py --from-file search_results.json --style mla

IMPORTANT: Static Export Limitation
-----------------------------------
This tool generates STATIC exports (files that are snapshots of database state).
Exported citation files (BibTeX, Markdown, etc.) do NOT automatically update when:
- Document metadata changes (author, title, year corrections)
- Documents are deleted or archived
- New documents are added

Best Practices:
1. Re-export citations just before final publication/submission
2. Use --from-search for working sets rather than --all
3. Consider storing only document_ids and regenerating on demand
4. For reference managers (Zotero, Mendeley), re-import after major changes
"""

import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from db_utils import get_db_connection, execute_query


class CitationGenerator:
    """Generate formatted citations in various styles."""

    def __init__(self, style: str = 'chicago'):
        """
        Initialize citation generator.

        Args:
            style: Citation style (chicago, apa, mla, harvard, bibtex)
        """
        self.style = style.lower()
        self.styles = {
            'chicago': self._format_chicago,
            'apa': self._format_apa,
            'mla': self._format_mla,
            'harvard': self._format_harvard,
            'bibtex': self._format_bibtex,
        }

        if self.style not in self.styles:
            raise ValueError(f"Unknown style: {style}. Supported: {', '.join(self.styles.keys())}")

    def get_document_metadata(self, document_id: str) -> Optional[Dict[str, Any]]:
        """Fetch document metadata from database."""
        result = execute_query(
            """
            SELECT
                document_id,
                title,
                author,
                publication_year,
                publisher,
                primary_category,
                language,
                source_file,
                created_at
            FROM documents
            WHERE document_id = %s
            """,
            (document_id,),
            fetch='one'
        )
        return dict(result) if result else None

    def get_all_documents(self) -> List[Dict[str, Any]]:
        """Fetch all document metadata from database."""
        results = execute_query(
            """
            SELECT
                document_id,
                title,
                author,
                publication_year,
                publisher,
                primary_category,
                language,
                source_file,
                created_at
            FROM documents
            WHERE quality_status != 'archived'
            ORDER BY author, publication_year, title
            """,
            fetch='all'
        )
        return [dict(r) for r in results]

    def format_citation(self, doc: Dict[str, Any]) -> str:
        """Format a single citation using the selected style."""
        formatter = self.styles[self.style]
        return formatter(doc)

    def _format_chicago(self, doc: Dict[str, Any]) -> str:
        """Format citation in Chicago style (Author-Date)."""
        author = doc.get('author') or 'Unknown Author'
        title = doc.get('title') or 'Untitled'
        year = doc.get('publication_year') or 'n.d.'
        publisher = doc.get('publisher') or ''

        # Chicago format: Last, First. Title. Place: Publisher, Year.
        citation = f"{author}. *{title}*."
        if publisher:
            citation += f" {publisher},"
        citation += f" {year}."

        return citation

    def _format_apa(self, doc: Dict[str, Any]) -> str:
        """Format citation in APA style (7th edition)."""
        author = doc.get('author') or 'Unknown Author'
        title = doc.get('title') or 'Untitled'
        year = doc.get('publication_year') or 'n.d.'
        publisher = doc.get('publisher') or ''

        # APA format: Author, A. A. (Year). Title of work. Publisher.
        # Convert "First Last" to "Last, F."
        author_parts = author.split()
        if len(author_parts) >= 2:
            last_name = author_parts[-1]
            first_initials = ' '.join(f"{name[0]}." for name in author_parts[:-1])
            author_formatted = f"{last_name}, {first_initials}"
        else:
            author_formatted = author

        citation = f"{author_formatted} ({year}). *{title}*."
        if publisher:
            citation += f" {publisher}."

        return citation

    def _format_mla(self, doc: Dict[str, Any]) -> str:
        """Format citation in MLA style (9th edition)."""
        author = doc.get('author') or 'Unknown Author'
        title = doc.get('title') or 'Untitled'
        year = doc.get('publication_year') or 'n.d.'
        publisher = doc.get('publisher') or ''

        # MLA format: Last, First. Title. Publisher, Year.
        author_parts = author.split()
        if len(author_parts) >= 2:
            author_formatted = f"{author_parts[-1]}, {' '.join(author_parts[:-1])}"
        else:
            author_formatted = author

        citation = f"{author_formatted}. *{title}*."
        if publisher:
            citation += f" {publisher},"
        citation += f" {year}."

        return citation

    def _format_harvard(self, doc: Dict[str, Any]) -> str:
        """Format citation in Harvard style."""
        author = doc.get('author') or 'Unknown Author'
        title = doc.get('title') or 'Untitled'
        year = doc.get('publication_year') or 'n.d.'
        publisher = doc.get('publisher') or ''

        # Harvard format: Author (Year) Title. Publisher.
        author_parts = author.split()
        if len(author_parts) >= 2:
            author_formatted = f"{author_parts[-1]}, {author_parts[0][0]}."
        else:
            author_formatted = author

        citation = f"{author_formatted} ({year}) *{title}*."
        if publisher:
            citation += f" {publisher}."

        return citation

    def _format_bibtex(self, doc: Dict[str, Any]) -> str:
        """Format citation as BibTeX entry."""
        author = doc.get('author') or 'Unknown Author'
        title = doc.get('title') or 'Untitled'
        year = doc.get('publication_year') or ''
        publisher = doc.get('publisher') or ''
        doc_id = doc.get('document_id', 'unknown')

        # Create citation key
        author_key = author.split()[0].lower() if author != 'Unknown Author' else 'unknown'
        year_key = year or 'nd'
        title_key = ''.join(c for c in title[:15] if c.isalnum()).lower()
        cite_key = f"{author_key}{year_key}{title_key}"

        bibtex = f"@book{{{cite_key},\n"
        bibtex += f"  author = {{{author}}},\n"
        bibtex += f"  title = {{{title}}},\n"
        if year:
            bibtex += f"  year = {{{year}}},\n"
        if publisher:
            bibtex += f"  publisher = {{{publisher}}},\n"
        bibtex += f"  note = {{Document ID: {doc_id}}}\n"
        bibtex += "}"

        return bibtex

    def generate_bibliography(
        self,
        documents: List[Dict[str, Any]],
        title: str = "Bibliography"
    ) -> str:
        """Generate a complete bibliography from a list of documents."""
        if self.style == 'bibtex':
            # BibTeX doesn't need header formatting
            return '\n\n'.join(self.format_citation(doc) for doc in documents)

        lines = [f"# {title}\n"]
        lines.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d')}*\n")
        lines.append(f"*Style: {self.style.upper()}*\n")
        lines.append(f"*Total entries: {len(documents)}*\n")
        lines.append("---\n")

        for doc in documents:
            citation = self.format_citation(doc)
            lines.append(f"- {citation}\n")

        return '\n'.join(lines)

    def generate_in_text_citation(self, doc: Dict[str, Any]) -> str:
        """Generate an in-text citation reference."""
        author = doc.get('author') or 'Unknown'
        year = doc.get('publication_year') or 'n.d.'

        # Get last name
        author_parts = author.split()
        last_name = author_parts[-1] if author_parts else 'Unknown'

        if self.style in ['chicago', 'apa', 'harvard']:
            return f"({last_name}, {year})"
        elif self.style == 'mla':
            return f"({last_name})"
        else:
            return f"({last_name}, {year})"


def load_documents_from_file(file_path: Path) -> List[Dict[str, Any]]:
    """Load document references from a JSON file (e.g., search results)."""
    with open(file_path) as f:
        data = json.load(f)

    # Handle different JSON structures
    if 'results' in data:
        # Search results format
        results = data['results']
    elif isinstance(data, list):
        results = data
    else:
        results = [data]

    # Get unique documents
    seen = set()
    unique_docs = []
    for r in results:
        doc_id = r.get('document_id')
        if doc_id and doc_id not in seen:
            seen.add(doc_id)
            unique_docs.append({
                'document_id': doc_id,
                'title': r.get('title', 'Untitled'),
                'author': r.get('author', 'Unknown Author'),
                'publication_year': r.get('year') or r.get('publication_year'),
                'publisher': r.get('publisher', ''),
            })

    return unique_docs


def search_for_documents(query: str, limit: int = 100) -> List[Dict[str, Any]]:
    """Search for documents and return unique document metadata."""
    results = execute_query(
        """
        SELECT DISTINCT ON (d.document_id)
            d.document_id,
            d.title,
            d.author,
            d.publication_year,
            d.publisher,
            d.primary_category
        FROM documents d
        JOIN chunks c ON d.document_id = c.document_id
        WHERE c.chunk_text_tsv @@ plainto_tsquery('english', %s)
          AND d.quality_status != 'archived'
        ORDER BY d.document_id
        LIMIT %s
        """,
        (query, limit),
        fetch='all'
    )
    return [dict(r) for r in results]


def main():
    parser = argparse.ArgumentParser(
        description='Generate formatted citations and bibliographies',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Single document citation
  python generate_citations.py --document DOC_20251211_STEINER

  # All documents in APA style
  python generate_citations.py --all --style apa --output bibliography.md

  # Citations from search results
  python generate_citations.py --from-search "consciousness" --style chicago

  # Citations from exported search file
  python generate_citations.py --from-file results.json --style mla --output refs.md

  # Generate BibTeX for LaTeX
  python generate_citations.py --all --style bibtex --output references.bib

Citation Styles:
  chicago  - Chicago Manual of Style (Author-Date)
  apa      - APA 7th Edition
  mla      - MLA 9th Edition
  harvard  - Harvard Referencing
  bibtex   - BibTeX format for LaTeX

IMPORTANT - Static Exports:
  Exported files are SNAPSHOTS and do NOT auto-update when metadata changes.
  Best practice: Re-export citations before final submission/publication.
        """
    )

    # Source options (mutually exclusive)
    source_group = parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument('--document', '-d', help='Generate citation for specific document ID')
    source_group.add_argument('--all', '-a', action='store_true', help='Generate bibliography for all documents')
    source_group.add_argument('--from-search', '-s', metavar='QUERY', help='Generate from search query')
    source_group.add_argument('--from-file', '-f', type=Path, help='Generate from JSON file (search results)')

    # Options
    parser.add_argument('--style', choices=['chicago', 'apa', 'mla', 'harvard', 'bibtex'],
                        default='chicago', help='Citation style (default: chicago)')
    parser.add_argument('--output', '-o', type=Path, help='Output file path')
    parser.add_argument('--in-text', action='store_true', help='Also show in-text citation format')
    parser.add_argument('--limit', '-l', type=int, default=100, help='Max documents for search (default: 100)')

    args = parser.parse_args()

    generator = CitationGenerator(style=args.style)

    # Get documents based on source
    if args.document:
        doc = generator.get_document_metadata(args.document)
        if not doc:
            print(f"Document not found: {args.document}")
            sys.exit(1)
        documents = [doc]

    elif args.all:
        documents = generator.get_all_documents()
        if not documents:
            print("No documents found in library.")
            sys.exit(1)
        print(f"Found {len(documents)} documents")

    elif args.from_search:
        documents = search_for_documents(args.from_search, args.limit)
        if not documents:
            print(f"No documents found matching: {args.from_search}")
            sys.exit(1)
        print(f"Found {len(documents)} documents matching '{args.from_search}'")

    elif args.from_file:
        if not args.from_file.exists():
            print(f"File not found: {args.from_file}")
            sys.exit(1)
        documents = load_documents_from_file(args.from_file)
        if not documents:
            print(f"No documents found in: {args.from_file}")
            sys.exit(1)
        print(f"Loaded {len(documents)} documents from {args.from_file}")

    # Generate output
    if len(documents) == 1 and not args.output:
        # Single citation to stdout
        doc = documents[0]
        citation = generator.format_citation(doc)
        print(f"\n{args.style.upper()} Citation:\n")
        print(citation)
        if args.in_text:
            in_text = generator.generate_in_text_citation(doc)
            print(f"\nIn-text citation: {in_text}")
    else:
        # Generate bibliography
        bibliography = generator.generate_bibliography(documents)

        if args.output:
            with open(args.output, 'w', encoding='utf-8') as f:
                f.write(bibliography)
            print(f"Bibliography saved to: {args.output}")
        else:
            print(bibliography)


if __name__ == '__main__':
    main()
