"""
Risk Policy - Classification of files and commands by risk level.

This module provides:
- File path risk classification
- Command risk classification
- CRITICAL_BLOCKLIST for auto-rejected actions
- Pattern matching for dangerous operations

Risk Levels:
- LOW: Safe operations (read files, run tests, format code)
- MEDIUM: Edits OK but need care (source files, configs)
- HIGH: Suggest-only, always ask (production configs, secrets)
- CRITICAL: Auto-reject, NEVER allow (rm -rf, force push, etc.)

Usage:
    from agent_orchestrator.risk import RiskPolicy, RiskLevel, RiskClassification

    policy = RiskPolicy()
    risk = policy.classify_file("/etc/passwd")  # CRITICAL
    risk = policy.classify_command("npm test")  # LOW
"""

import re
import logging
from dataclasses import dataclass
from enum import Enum
from typing import Optional, List, Tuple


logger = logging.getLogger(__name__)


class RiskLevel(Enum):
    """Risk classification levels."""

    LOW = "low"  # Auto-allowed
    MEDIUM = "medium"  # Edits OK, commands need review
    HIGH = "high"  # Suggest-only, always ask
    CRITICAL = "critical"  # Auto-reject, NEVER allow


@dataclass
class RiskClassification:
    """Result of risk classification."""

    level: RiskLevel
    reason: str
    pattern_matched: Optional[str] = None
    auto_allowed: bool = False
    requires_approval: bool = False
    auto_rejected: bool = False

    def __post_init__(self):
        """Set derived flags based on level."""
        if self.level == RiskLevel.LOW:
            self.auto_allowed = True
        elif self.level == RiskLevel.MEDIUM:
            self.requires_approval = False  # Edits OK
        elif self.level == RiskLevel.HIGH:
            self.requires_approval = True
        elif self.level == RiskLevel.CRITICAL:
            self.auto_rejected = True


# Alias for backwards compatibility
RiskDecision = RiskClassification


# =============================================================================
# CRITICAL BLOCKLIST - Auto-rejected patterns
# =============================================================================

CRITICAL_COMMAND_PATTERNS: List[Tuple[str, str]] = [
    # Destructive file operations
    (r"rm\s+(-rf?|--recursive)\s+[/~]", "Recursive delete from root or home"),
    (r"rm\s+-rf?\s+\*", "Recursive delete with wildcard"),
    (r"rm\s+-rf?\s+\.\.", "Recursive delete parent directory"),
    (r">\s*/dev/sd[a-z]", "Write to disk device"),
    (r"dd\s+.*of=/dev/", "Direct disk write"),
    (r"mkfs\.", "Format filesystem"),
    (r"fdisk", "Disk partitioning"),

    # Network/Security attacks
    (r":\(\)\s*\{\s*:\s*\|\s*:&\s*\}\s*;\s*:", "Fork bomb"),  # :(){ :|:& };:
    (r"\|\s*base64\s+-d\s*\|\s*sh", "Encoded shell execution"),
    (r"curl\s+.*\|\s*sh", "Pipe curl to shell"),
    (r"wget\s+.*\|\s*sh", "Pipe wget to shell"),
    (r"curl\s+.*\|\s*bash", "Pipe curl to bash"),
    (r"wget\s+.*\|\s*bash", "Pipe wget to bash"),
    (r"nc\s+-e", "Netcat with execute"),
    (r"ncat\s+-e", "Ncat with execute"),

    # Git destructive operations
    (r"git\s+push\s+.*--force\s+.*main", "Force push to main"),
    (r"git\s+push\s+.*--force\s+.*master", "Force push to master"),
    (r"git\s+push\s+-f\s+.*main", "Force push to main"),
    (r"git\s+push\s+-f\s+.*master", "Force push to master"),
    (r"git\s+reset\s+--hard\s+origin", "Hard reset to origin"),
    (r"git\s+clean\s+-fdx", "Git clean all"),

    # System modification
    (r"chmod\s+777\s+/", "Chmod 777 on root"),
    (r"chown\s+-R\s+.*:.*\s+/", "Chown -R on root"),
    (r"sudo\s+rm", "Sudo rm"),
    (r"sudo\s+dd", "Sudo dd"),
    (r"sudo\s+mkfs", "Sudo mkfs"),

    # Environment/credentials
    (r"export\s+.*PASSWORD", "Export password variable"),
    (r"export\s+.*SECRET", "Export secret variable"),
    (r"export\s+.*TOKEN", "Export token variable"),
    (r"export\s+.*API_KEY", "Export API key variable"),
    (r"printenv\s*\|\s*curl", "Leak env to curl"),

    # Dangerous package operations
    (r"pip\s+install\s+--break-system-packages", "Pip break system"),
    (r"npm\s+publish", "NPM publish"),
    (r"pip\s+upload", "Pip upload"),

    # Database destruction
    (r"DROP\s+DATABASE", "Drop database"),
    (r"DROP\s+TABLE\s+\*", "Drop all tables"),
    (r"TRUNCATE\s+TABLE", "Truncate table"),
    (r"DELETE\s+FROM\s+\w+\s*;?\s*$", "Delete all from table"),

    # Infrastructure destruction
    (r"terraform\s+destroy", "Terraform destroy"),
    (r"kubectl\s+delete\s+.*--all", "Kubectl delete all"),

    # Container escape attempts
    (r"docker\s+run\s+.*--privileged", "Docker privileged"),
    (r"docker\s+run\s+.*-v\s+/:/", "Docker mount root"),
    (r"nsenter", "Namespace enter"),
]

CRITICAL_FILE_PATTERNS: List[Tuple[str, str]] = [
    # System files
    (r"^/etc/passwd$", "System password file"),
    (r"^/etc/shadow$", "System shadow file"),
    (r"^/etc/sudoers", "Sudoers file"),
    (r"^/etc/ssh/", "SSH configuration"),
    (r"^/root/", "Root home directory"),
    (r"^/boot/", "Boot directory"),

    # Private keys and secrets
    (r"\.pem$", "PEM private key"),
    (r"id_rsa$", "RSA private key"),
    (r"id_ed25519$", "ED25519 private key"),
    (r"\.key$", "Private key file"),
    (r"\.keystore$", "Java keystore"),
    (r"\.jks$", "Java keystore"),

    # Credentials files
    (r"\.env\.prod", "Production environment"),
    (r"\.env\.production", "Production environment"),
    (r"credentials\.json$", "Credentials file"),
    (r"secrets\.json$", "Secrets file"),
    (r"secrets\.ya?ml$", "Secrets YAML"),
    (r"\.aws/credentials$", "AWS credentials"),
    (r"\.kube/config$", "Kubernetes config"),
    (r"\.docker/config\.json$", "Docker config"),
    (r"\.netrc$", "Netrc credentials"),
    (r"\.npmrc$", "NPM credentials"),
    (r"\.pypirc$", "PyPI credentials"),

    # Database files
    (r"\.sqlite$", "SQLite database"),
    (r"\.db$", "Database file"),
    (r"dump\.sql$", "SQL dump"),
]

# =============================================================================
# HIGH RISK - Requires approval
# =============================================================================

HIGH_RISK_COMMAND_PATTERNS: List[Tuple[str, str]] = [
    # Git operations
    (r"git\s+push\s+--force", "Force push"),
    (r"git\s+reset\s+--hard", "Hard reset"),
    (r"git\s+rebase", "Git rebase"),
    (r"git\s+merge\s+.*--no-ff", "Non-fast-forward merge"),
    (r"git\s+push\s+.*main", "Push to main"),
    (r"git\s+push\s+.*master", "Push to master"),
    (r"git\s+push\s+.*prod", "Push to prod"),

    # System modifications
    (r"chmod\s+[0-7]{3}", "Change permissions"),
    (r"chown", "Change ownership"),
    (r"sudo", "Sudo command"),

    # Package publishing
    (r"npm\s+version", "NPM version bump"),
    (r"yarn\s+publish", "Yarn publish"),

    # Docker operations
    (r"docker\s+build", "Docker build"),
    (r"docker\s+push", "Docker push"),
    (r"docker-compose\s+up", "Docker compose up"),

    # Database operations
    (r"migrate", "Database migration"),
    (r"prisma\s+push", "Prisma push"),
    (r"alembic\s+upgrade", "Alembic upgrade"),

    # Deployment
    (r"deploy", "Deployment command"),
    (r"kubectl\s+apply", "Kubernetes apply"),
    (r"terraform\s+apply", "Terraform apply"),
    (r"pulumi\s+up", "Pulumi up"),

    # Cloud operations
    (r"aws\s+s3\s+rm", "AWS S3 delete"),
    (r"gcloud\s+.*delete", "GCloud delete"),
]

HIGH_RISK_FILE_PATTERNS: List[Tuple[str, str]] = [
    # Environment files
    (r"\.env$", "Environment file"),
    (r"\.env\.local$", "Local environment"),
    (r"\.env\..+$", "Environment variant"),

    # Configuration files
    (r"config/production", "Production config"),
    (r"config/prod", "Production config"),
    (r"\.github/workflows/", "GitHub workflows"),
    (r"\.gitlab-ci\.yml$", "GitLab CI"),
    (r"Jenkinsfile$", "Jenkins pipeline"),

    # Infrastructure
    (r"\.tf$", "Terraform file"),
    (r"\.tfvars$", "Terraform variables"),
    (r"\.tfstate$", "Terraform state"),
    (r"kubernetes/", "Kubernetes manifests"),
    (r"k8s/", "Kubernetes manifests"),
    (r"docker-compose.*\.ya?ml$", "Docker compose"),
    (r"Dockerfile$", "Dockerfile"),
    (r"kubeconfig", "Kubernetes config"),
]

# =============================================================================
# MEDIUM RISK - Edits OK, monitor
# =============================================================================

MEDIUM_RISK_COMMAND_PATTERNS: List[Tuple[str, str]] = [
    # Git operations
    (r"git\s+commit", "Git commit"),
    (r"git\s+push", "Git push"),
    (r"git\s+checkout", "Git checkout"),
    (r"git\s+branch\s+-[dD]", "Delete branch"),
    (r"git\s+merge", "Git merge"),

    # Package management
    (r"npm\s+install", "NPM install"),
    (r"pip\s+install", "Pip install"),
    (r"yarn\s+add", "Yarn add"),
    (r"cargo\s+add", "Cargo add"),
    (r"pip\s+.*--trusted-host", "Pip trusted host"),

    # Build operations
    (r"npm\s+run\s+build", "NPM build"),
    (r"yarn\s+build", "Yarn build"),
    (r"make\s+", "Make command"),
    (r"cargo\s+build", "Cargo build"),
]

MEDIUM_RISK_FILE_PATTERNS: List[Tuple[str, str]] = [
    # Source code
    (r"\.(ts|tsx|js|jsx)$", "JavaScript/TypeScript source"),
    (r"\.py$", "Python source"),
    (r"\.go$", "Go source"),
    (r"\.rs$", "Rust source"),
    (r"\.java$", "Java source"),
    (r"\.rb$", "Ruby source"),
    (r"\.php$", "PHP source"),
    (r"\.cs$", "C# source"),
    (r"\.cpp$", "C++ source"),
    (r"\.c$", "C source"),

    # Config files
    (r"package\.json$", "Package manifest"),
    (r"tsconfig\.json$", "TypeScript config"),
    (r"pyproject\.toml$", "Python project config"),
    (r"requirements\.txt$", "Python requirements"),
    (r"Cargo\.toml$", "Cargo config"),
    (r"go\.mod$", "Go module"),
    (r"Makefile$", "Makefile"),
]

# =============================================================================
# LOW RISK - Auto-allowed
# =============================================================================

LOW_RISK_COMMAND_PATTERNS: List[Tuple[str, str]] = [
    # Read operations
    (r"^cat\s+", "Cat file"),
    (r"^head\s+", "Head file"),
    (r"^tail\s+", "Tail file"),
    (r"^less\s+", "Less file"),
    (r"^ls\s+", "List directory"),
    (r"^find\s+", "Find files"),
    (r"^grep\s+", "Grep search"),
    (r"^rg\s+", "Ripgrep search"),
    (r"^pwd$", "Print working directory"),
    (r"^echo\s+", "Echo"),
    (r"^which\s+", "Which command"),
    (r"^env$", "Show environment"),

    # Test operations
    (r"^npm\s+test", "NPM test"),
    (r"^yarn\s+test", "Yarn test"),
    (r"^pytest", "Pytest"),
    (r"^python\s+-m\s+pytest", "Pytest"),
    (r"^go\s+test", "Go test"),
    (r"^cargo\s+test", "Cargo test"),
    (r"^jest", "Jest test"),
    (r"^vitest", "Vitest"),
    (r"^mocha", "Mocha test"),

    # Lint/format
    (r"^eslint", "ESLint"),
    (r"^prettier", "Prettier"),
    (r"^black\s+", "Black formatter"),
    (r"^ruff\s+", "Ruff linter"),
    (r"^mypy\s+", "Mypy type check"),
    (r"^tsc\s+", "TypeScript check"),
    (r"^rustfmt", "Rust formatter"),
    (r"^gofmt", "Go formatter"),
    (r"^clippy", "Rust clippy"),

    # Git read operations
    (r"^git\s+status", "Git status"),
    (r"^git\s+log", "Git log"),
    (r"^git\s+diff", "Git diff"),
    (r"^git\s+show", "Git show"),
    (r"^git\s+branch$", "Git branch list"),
    (r"^git\s+remote\s+-v", "Git remotes"),
]

LOW_RISK_FILE_PATTERNS: List[Tuple[str, str]] = [
    # Documentation
    (r"\.md$", "Markdown file"),
    (r"\.txt$", "Text file"),
    (r"\.rst$", "RST file"),
    (r"README", "README file"),
    (r"LICENSE", "License file"),
    (r"CHANGELOG", "Changelog file"),

    # Tests
    (r"test_.*\.py$", "Python test file"),
    (r".*_test\.py$", "Python test file"),
    (r".*\.test\.(ts|js)$", "JS/TS test file"),
    (r".*\.spec\.(ts|js)$", "JS/TS spec file"),
    (r"__tests__/", "Tests directory"),

    # Generated/build output
    (r"\.d\.ts$", "TypeScript declarations"),
    (r"\.map$", "Source map"),
    (r"\.min\.(js|css)$", "Minified file"),
]


class RiskPolicy:
    """
    Classifies risk levels for files and commands.

    Uses pattern matching to determine risk level, with CRITICAL
    patterns causing auto-rejection.
    """

    def __init__(self):
        """Initialize with compiled regex patterns."""
        # Compile all patterns
        self._critical_commands = [
            (re.compile(p, re.IGNORECASE), r) for p, r in CRITICAL_COMMAND_PATTERNS
        ]
        self._critical_files = [
            (re.compile(p, re.IGNORECASE), r) for p, r in CRITICAL_FILE_PATTERNS
        ]

        self._high_commands = [
            (re.compile(p, re.IGNORECASE), r) for p, r in HIGH_RISK_COMMAND_PATTERNS
        ]
        self._high_files = [
            (re.compile(p, re.IGNORECASE), r) for p, r in HIGH_RISK_FILE_PATTERNS
        ]

        self._medium_commands = [
            (re.compile(p, re.IGNORECASE), r) for p, r in MEDIUM_RISK_COMMAND_PATTERNS
        ]
        self._medium_files = [
            (re.compile(p, re.IGNORECASE), r) for p, r in MEDIUM_RISK_FILE_PATTERNS
        ]

        self._low_commands = [
            (re.compile(p, re.IGNORECASE), r) for p, r in LOW_RISK_COMMAND_PATTERNS
        ]
        self._low_files = [
            (re.compile(p, re.IGNORECASE), r) for p, r in LOW_RISK_FILE_PATTERNS
        ]

    def classify_file(self, file_path: str) -> RiskClassification:
        """
        Classify risk level of a file path.

        Args:
            file_path: Path to file

        Returns:
            RiskClassification with level and reason
        """
        # Check CRITICAL patterns
        for pattern, reason in self._critical_files:
            if pattern.search(file_path):
                return RiskClassification(
                    level=RiskLevel.CRITICAL,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check HIGH patterns
        for pattern, reason in self._high_files:
            if pattern.search(file_path):
                return RiskClassification(
                    level=RiskLevel.HIGH,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check LOW patterns BEFORE medium (test files should be LOW)
        for pattern, reason in self._low_files:
            if pattern.search(file_path):
                return RiskClassification(
                    level=RiskLevel.LOW,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check MEDIUM patterns
        for pattern, reason in self._medium_files:
            if pattern.search(file_path):
                return RiskClassification(
                    level=RiskLevel.MEDIUM,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Default to MEDIUM for unknown files
        return RiskClassification(
            level=RiskLevel.MEDIUM,
            reason="Unknown file type, defaulting to medium risk",
        )

    def classify_command(self, command: str) -> RiskClassification:
        """
        Classify risk level of a command.

        Args:
            command: Command string

        Returns:
            RiskClassification with level and reason
        """
        # Check CRITICAL patterns first
        for pattern, reason in self._critical_commands:
            if pattern.search(command):
                return RiskClassification(
                    level=RiskLevel.CRITICAL,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check HIGH patterns
        for pattern, reason in self._high_commands:
            if pattern.search(command):
                return RiskClassification(
                    level=RiskLevel.HIGH,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check LOW patterns (before MEDIUM for efficiency)
        for pattern, reason in self._low_commands:
            if pattern.search(command):
                return RiskClassification(
                    level=RiskLevel.LOW,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Check MEDIUM patterns
        for pattern, reason in self._medium_commands:
            if pattern.search(command):
                return RiskClassification(
                    level=RiskLevel.MEDIUM,
                    reason=reason,
                    pattern_matched=pattern.pattern,
                )

        # Default to MEDIUM for unknown commands
        return RiskClassification(
            level=RiskLevel.MEDIUM,
            reason="Unknown command, defaulting to medium risk",
        )

    def get_decision(
        self,
        file_path: Optional[str] = None,
        command: Optional[str] = None,
    ) -> RiskClassification:
        """
        Get full risk decision for a file or command.

        Args:
            file_path: Optional file path to classify
            command: Optional command to classify

        Returns:
            RiskClassification with full details
        """
        if command:
            return self.classify_command(command)

        if file_path:
            return self.classify_file(file_path)

        return RiskClassification(
            level=RiskLevel.LOW,
            reason="No file or command provided",
        )

    def is_auto_allowed(
        self,
        file_path: Optional[str] = None,
        command: Optional[str] = None,
    ) -> bool:
        """Check if action is auto-allowed (LOW risk)."""
        decision = self.get_decision(file_path, command)
        return decision.auto_allowed

    def is_auto_rejected(
        self,
        file_path: Optional[str] = None,
        command: Optional[str] = None,
    ) -> bool:
        """Check if action should be auto-rejected (CRITICAL risk)."""
        decision = self.get_decision(file_path, command)
        return decision.auto_rejected

    def requires_approval(
        self,
        file_path: Optional[str] = None,
        command: Optional[str] = None,
    ) -> bool:
        """Check if action requires explicit approval (HIGH risk)."""
        decision = self.get_decision(file_path, command)
        return decision.requires_approval

    def is_blocked(self, command: str) -> Tuple[bool, str]:
        """
        Check if a command should be blocked (CRITICAL risk).

        Args:
            command: Command string to check

        Returns:
            Tuple of (is_blocked: bool, reason: str)
        """
        result = self.classify_command(command)
        if result.level == RiskLevel.CRITICAL:
            return (True, result.reason)
        return (False, "")

    def classify_action(
        self,
        action_type: str,
        target: str,
    ) -> RiskClassification:
        """
        Classify risk level of an action.

        Args:
            action_type: Type of action ("command", "file_read", "file_write", "file_edit", etc.)
            target: The command or file path

        Returns:
            RiskClassification with level and reason
        """
        action_lower = action_type.lower()
        if action_lower == "command":
            return self.classify_command(target)
        elif action_lower.startswith("file") or action_lower in ("edit", "read", "write", "delete"):
            return self.classify_file(target)
        else:
            # Default: treat as command
            return self.classify_command(target)


# =============================================================================
# Module-level singleton and helper functions
# =============================================================================

_risk_policy: Optional[RiskPolicy] = None


def get_risk_policy() -> RiskPolicy:
    """Get or create the global RiskPolicy instance."""
    global _risk_policy
    if _risk_policy is None:
        _risk_policy = RiskPolicy()
    return _risk_policy


def classify_command(command: str) -> RiskClassification:
    """Classify a command using the global RiskPolicy."""
    return get_risk_policy().classify_command(command)


def classify_file(file_path: str) -> RiskClassification:
    """Classify a file path using the global RiskPolicy."""
    return get_risk_policy().classify_file(file_path)


def is_command_blocked(command: str) -> bool:
    """Check if a command should be auto-rejected (CRITICAL)."""
    return get_risk_policy().is_auto_rejected(command=command)


def is_file_blocked(file_path: str) -> bool:
    """Check if a file path should be auto-rejected (CRITICAL)."""
    return get_risk_policy().is_auto_rejected(file_path=file_path)
