uchill/chatnext/backend/apps/chatbot/models/token_usage.py

"""
Token Usage Model - Track AI token consumption and costs.
"""

from django.db import models
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from core.models import TimestampedModel
from decimal import Decimal


class TokenUsage(TimestampedModel):
    """
    Track token usage and costs for AI interactions.

    This helps with:
    - User billing and quotas
    - Cost analytics
    - Usage patterns
    - Budget management
    """

    # User and session association
    user = models.ForeignKey(
        settings.AUTH_USER_MODEL,
        on_delete=models.CASCADE,
        related_name="token_usage",
        help_text=_("User who incurred this usage"),
    )

    chat_session = models.ForeignKey(
        "chatbot.ChatSession",
        on_delete=models.CASCADE,
        related_name="token_usage",
        null=True,
        blank=True,
        help_text=_("Chat session this usage belongs to"),
    )

    # Model information
    model_name = models.CharField(
        max_length=100, help_text=_("AI model used (e.g., gpt-4o, gpt-4o-mini)")
    )

    # Token counts
    prompt_tokens = models.IntegerField(
        default=0, help_text=_("Tokens in the prompt/input")
    )

    completion_tokens = models.IntegerField(
        default=0, help_text=_("Tokens in the completion/output")
    )

    total_tokens = models.IntegerField(
        default=0, help_text=_("Total tokens (prompt + completion)")
    )

    # Reasoning tokens (for o3/o4 models)
    reasoning_tokens = models.IntegerField(
        default=0,
        null=True,
        blank=True,
        help_text=_("Reasoning tokens (for models that support it)"),
    )

    # Cost tracking
    prompt_cost = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        default=Decimal("0.000000"),
        help_text=_("Cost for prompt tokens in USD"),
    )

    completion_cost = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        default=Decimal("0.000000"),
        help_text=_("Cost for completion tokens in USD"),
    )

    total_cost = models.DecimalField(
        max_digits=10,
        decimal_places=6,
        default=Decimal("0.000000"),
        help_text=_("Total cost in USD"),
    )

    # Request metadata
    request_type = models.CharField(
        max_length=50,
        default="chat",
        choices=[
            ("chat", "Chat Completion"),
            ("summarization", "Conversation Summarization"),
            ("embedding", "Text Embedding"),
            ("tool_call", "Tool/Function Call"),
            ("vision", "Vision Analysis"),
        ],
        help_text=_("Type of API request"),
    )

    endpoint = models.CharField(
        max_length=255, blank=True, null=True, help_text=_("API endpoint used")
    )

    # Performance metrics
    response_time_ms = models.IntegerField(
        null=True, blank=True, help_text=_("Response time in milliseconds")
    )

    was_cached = models.BooleanField(
        default=False, help_text=_("Whether response was served from cache")
    )

    # Error tracking
    had_error = models.BooleanField(
        default=False, help_text=_("Whether this request had an error")
    )

    error_message = models.TextField(
        blank=True, null=True, help_text=_("Error message if request failed")
    )

    # Additional metadata
    metadata = models.JSONField(
        default=dict, blank=True, help_text=_("Additional usage metadata")
    )

    class Meta:
        verbose_name = _("Token Usage")
        verbose_name_plural = _("Token Usage")
        ordering = ["-created_at"]
        indexes = [
            models.Index(
                fields=["user", "-created_at"], name="tokenusage_user_date_idx"
            ),
            models.Index(
                fields=["chat_session", "-created_at"],
                name="tokenusage_session_date_idx",
            ),
            models.Index(fields=["model_name"], name="tokenusage_model_idx"),
            models.Index(fields=["request_type"], name="tokenusage_type_idx"),
            models.Index(
                fields=["user", "model_name"], name="tokenusage_user_model_idx"
            ),
        ]

    def __str__(self):
        return f"{self.user.email} - {self.total_tokens} tokens - ${self.total_cost}"

    def save(self, *args, **kwargs):
        """Calculate total tokens and cost before saving."""
        # Calculate total tokens
        self.total_tokens = self.prompt_tokens + self.completion_tokens
        if self.reasoning_tokens:
            self.total_tokens += self.reasoning_tokens

        # Calculate total cost
        self.total_cost = self.prompt_cost + self.completion_cost

        super().save(*args, **kwargs)

    @classmethod
    def calculate_cost(
        cls, model_name, prompt_tokens, completion_tokens, reasoning_tokens=0
    ):
        """
        Calculate cost based on model pricing.

        Args:
            model_name: Name of the AI model
            prompt_tokens: Number of prompt tokens
            completion_tokens: Number of completion tokens
            reasoning_tokens: Number of reasoning tokens (for o1/o3 models)

        Returns:
            dict: {'prompt_cost': Decimal, 'completion_cost': Decimal, 'total_cost': Decimal}
        """
        # Pricing per 1M tokens (as of Oct 2025)
        PRICING = {
            "gpt-4o": {
                "prompt": Decimal("2.50"),  # $2.50 per 1M tokens
                "completion": Decimal("10.00"),  # $10.00 per 1M tokens
            },
            "gpt-4o-mini": {
                "prompt": Decimal("0.15"),  # $0.15 per 1M tokens
                "completion": Decimal("0.60"),  # $0.60 per 1M tokens
            },
            "gpt-4-turbo": {
                "prompt": Decimal("10.00"),
                "completion": Decimal("30.00"),
            },
            "gpt-3.5-turbo": {
                "prompt": Decimal("0.50"),
                "completion": Decimal("1.50"),
            },
            "o1-preview": {
                "prompt": Decimal("15.00"),
                "completion": Decimal("60.00"),
            },
            "o1-mini": {
                "prompt": Decimal("3.00"),
                "completion": Decimal("12.00"),
            },
        }

        # Get pricing for model
        model_pricing = PRICING.get(
            model_name, PRICING["gpt-4o-mini"]
        )  # Default to mini

        # Calculate costs (convert to per-token rate)
        prompt_cost = (Decimal(str(prompt_tokens)) * model_pricing["prompt"]) / Decimal(
            "1000000"
        )
        completion_cost = (
            Decimal(str(completion_tokens)) * model_pricing["completion"]
        ) / Decimal("1000000")

        # Reasoning tokens cost same as completion
        if reasoning_tokens:
            completion_cost += (
                Decimal(str(reasoning_tokens)) * model_pricing["completion"]
            ) / Decimal("1000000")

        return {
            "prompt_cost": prompt_cost,
            "completion_cost": completion_cost,
            "total_cost": prompt_cost + completion_cost,
        }

    @classmethod
    def get_user_usage_today(cls, user):
        """Get user's token usage for today."""
        from django.utils import timezone
        from datetime import timedelta

        today_start = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)

        usage = cls.objects.filter(user=user, created_at__gte=today_start).aggregate(
            total_tokens=models.Sum("total_tokens"),
            total_cost=models.Sum("total_cost"),
            message_count=models.Count("id"),
        )

        return {
            "total_tokens": usage["total_tokens"] or 0,
            "total_cost": usage["total_cost"] or Decimal("0.00"),
            "message_count": usage["message_count"] or 0,
        }

    @classmethod
    def check_user_limits(cls, user, additional_tokens=0):
        """
        Check if user has exceeded daily limits.

        Args:
            user: User object
            additional_tokens: Tokens about to be used

        Returns:
            dict: {'allowed': bool, 'reason': str, 'usage': dict}
        """
        try:
            preferences = user.ai_preferences
        except:
            # No preferences set, allow
            return {"allowed": True, "reason": "No limits set", "usage": {}}

        if not preferences.has_usage_limits:
            return {"allowed": True, "reason": "No limits set", "usage": {}}

        usage_today = cls.get_user_usage_today(user)

        # Check message limit
        if preferences.daily_message_limit > 0:
            if usage_today["message_count"] >= preferences.daily_message_limit:
                return {
                    "allowed": False,
                    "reason": f"Daily message limit reached ({preferences.daily_message_limit})",
                    "usage": usage_today,
                }

        # Check token limit
        if preferences.daily_token_limit > 0:
            if (
                usage_today["total_tokens"] + additional_tokens
            ) > preferences.daily_token_limit:
                return {
                    "allowed": False,
                    "reason": f"Daily token limit reached ({preferences.daily_token_limit})",
                    "usage": usage_today,
                }

        return {"allowed": True, "reason": "Within limits", "usage": usage_today}