# =============================================================================
# Research Development Framework - Complete Dependencies
# =============================================================================
#
# Install all dependencies:
#   pip install -r requirements.txt
#
# After installing, run this ONCE to download NLTK data:
#   python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords')"
#
# For legacy .doc file support, install antiword:
#   Ubuntu/Debian: sudo apt install antiword
#   macOS: brew install antiword
#
# =============================================================================

# -----------------------------------------------------------------------------
# Core Dependencies (Database, Config, CLI)
# -----------------------------------------------------------------------------

# Database
psycopg2-binary>=2.9.0
pgvector>=0.2.0

# Configuration
PyYAML>=6.0.0
python-dotenv>=1.0.0

# HTTP client
requests>=2.31.0

# OpenAI-compatible client (works with OpenAI API and Ollama)
openai>=1.0.0

# Token counting
tiktoken>=0.5.0

# CLI utilities
tqdm>=4.65.0
rich>=13.0.0
click>=8.1.0

# -----------------------------------------------------------------------------
# Document Parsing
# -----------------------------------------------------------------------------

pypdf>=3.0.0             # PDF extraction
python-docx>=1.0.0       # DOCX extraction
pypandoc>=1.12.0         # Document conversion
striprtf>=0.0.26         # RTF file support
odfpy>=1.4.0             # ODT (OpenDocument) file support
beautifulsoup4>=4.12.0   # HTML/XML parsing
lxml>=4.9.0              # XML parser backend

# -----------------------------------------------------------------------------
# ML/NLP
# -----------------------------------------------------------------------------

scikit-learn>=1.3.0      # TF-IDF, K-means clustering
multi-rake>=0.0.2        # RAKE keyword extraction
yake>=0.4.8              # YAKE keyword extraction
nltk>=3.8.0              # Natural language toolkit

# -----------------------------------------------------------------------------
# Knowledge Graph & Data Hygiene
# -----------------------------------------------------------------------------

networkx>=3.0.0          # Graph data structures
pyvis>=0.3.0             # Network visualization
rapidfuzz>=3.0.0         # Fuzzy string matching

# -----------------------------------------------------------------------------
# Advanced RAG (Re-ranking & Semantic Chunking)
# -----------------------------------------------------------------------------

sentence-transformers>=2.2.0  # Cross-encoder re-ranking

# -----------------------------------------------------------------------------
# Terminal User Interface
# -----------------------------------------------------------------------------

textual>=0.47.0          # Modern TUI framework

# -----------------------------------------------------------------------------
# Web Search Integration
# -----------------------------------------------------------------------------

tavily-python>=0.3.0     # Tavily API for research gap filling
# Get API key from: https://tavily.com/

# -----------------------------------------------------------------------------
# Development & Testing
# -----------------------------------------------------------------------------

pytest>=7.4.0
pytest-cov>=4.1.0

# -----------------------------------------------------------------------------
# Optional Dependencies (uncomment to enable)
# -----------------------------------------------------------------------------

# OCR support (for scanned PDFs)
# pytesseract>=0.3.0
# pdf2image>=1.16.0

# EPUB support
# ebooklib>=0.18.0

# =============================================================================
# INTELLIGENCE TIERS
# =============================================================================
#
# STATISTICAL (Offline Mode):
#   No API keys required. Uses TF-IDF, RAKE, extractive summarization.
#
# LOCAL (Ollama):
#   Install Ollama: https://ollama.ai/download
#   Pull a model: ollama pull llama3
#   Start server: ollama serve
#
# CLOUD (OpenAI):
#   Get API key: https://platform.openai.com/api-keys
#   Add to .env: OPENAI_API_KEY=sk-your-key-here
#
# =============================================================================
