uchill/chatnext/backend/apps/chatbot/models/token_usage.py

296 lines
9.5 KiB
Python

"""
Token Usage Model - Track AI token consumption and costs.
"""
from django.db import models
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from core.models import TimestampedModel
from decimal import Decimal
class TokenUsage(TimestampedModel):
"""
Track token usage and costs for AI interactions.
This helps with:
- User billing and quotas
- Cost analytics
- Usage patterns
- Budget management
"""
# User and session association
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="token_usage",
help_text=_("User who incurred this usage"),
)
chat_session = models.ForeignKey(
"chatbot.ChatSession",
on_delete=models.CASCADE,
related_name="token_usage",
null=True,
blank=True,
help_text=_("Chat session this usage belongs to"),
)
# Model information
model_name = models.CharField(
max_length=100, help_text=_("AI model used (e.g., gpt-4o, gpt-4o-mini)")
)
# Token counts
prompt_tokens = models.IntegerField(
default=0, help_text=_("Tokens in the prompt/input")
)
completion_tokens = models.IntegerField(
default=0, help_text=_("Tokens in the completion/output")
)
total_tokens = models.IntegerField(
default=0, help_text=_("Total tokens (prompt + completion)")
)
# Reasoning tokens (for o3/o4 models)
reasoning_tokens = models.IntegerField(
default=0,
null=True,
blank=True,
help_text=_("Reasoning tokens (for models that support it)"),
)
# Cost tracking
prompt_cost = models.DecimalField(
max_digits=10,
decimal_places=6,
default=Decimal("0.000000"),
help_text=_("Cost for prompt tokens in USD"),
)
completion_cost = models.DecimalField(
max_digits=10,
decimal_places=6,
default=Decimal("0.000000"),
help_text=_("Cost for completion tokens in USD"),
)
total_cost = models.DecimalField(
max_digits=10,
decimal_places=6,
default=Decimal("0.000000"),
help_text=_("Total cost in USD"),
)
# Request metadata
request_type = models.CharField(
max_length=50,
default="chat",
choices=[
("chat", "Chat Completion"),
("summarization", "Conversation Summarization"),
("embedding", "Text Embedding"),
("tool_call", "Tool/Function Call"),
("vision", "Vision Analysis"),
],
help_text=_("Type of API request"),
)
endpoint = models.CharField(
max_length=255, blank=True, null=True, help_text=_("API endpoint used")
)
# Performance metrics
response_time_ms = models.IntegerField(
null=True, blank=True, help_text=_("Response time in milliseconds")
)
was_cached = models.BooleanField(
default=False, help_text=_("Whether response was served from cache")
)
# Error tracking
had_error = models.BooleanField(
default=False, help_text=_("Whether this request had an error")
)
error_message = models.TextField(
blank=True, null=True, help_text=_("Error message if request failed")
)
# Additional metadata
metadata = models.JSONField(
default=dict, blank=True, help_text=_("Additional usage metadata")
)
class Meta:
verbose_name = _("Token Usage")
verbose_name_plural = _("Token Usage")
ordering = ["-created_at"]
indexes = [
models.Index(
fields=["user", "-created_at"], name="tokenusage_user_date_idx"
),
models.Index(
fields=["chat_session", "-created_at"],
name="tokenusage_session_date_idx",
),
models.Index(fields=["model_name"], name="tokenusage_model_idx"),
models.Index(fields=["request_type"], name="tokenusage_type_idx"),
models.Index(
fields=["user", "model_name"], name="tokenusage_user_model_idx"
),
]
def __str__(self):
return f"{self.user.email} - {self.total_tokens} tokens - ${self.total_cost}"
def save(self, *args, **kwargs):
"""Calculate total tokens and cost before saving."""
# Calculate total tokens
self.total_tokens = self.prompt_tokens + self.completion_tokens
if self.reasoning_tokens:
self.total_tokens += self.reasoning_tokens
# Calculate total cost
self.total_cost = self.prompt_cost + self.completion_cost
super().save(*args, **kwargs)
@classmethod
def calculate_cost(
cls, model_name, prompt_tokens, completion_tokens, reasoning_tokens=0
):
"""
Calculate cost based on model pricing.
Args:
model_name: Name of the AI model
prompt_tokens: Number of prompt tokens
completion_tokens: Number of completion tokens
reasoning_tokens: Number of reasoning tokens (for o1/o3 models)
Returns:
dict: {'prompt_cost': Decimal, 'completion_cost': Decimal, 'total_cost': Decimal}
"""
# Pricing per 1M tokens (as of Oct 2025)
PRICING = {
"gpt-4o": {
"prompt": Decimal("2.50"), # $2.50 per 1M tokens
"completion": Decimal("10.00"), # $10.00 per 1M tokens
},
"gpt-4o-mini": {
"prompt": Decimal("0.15"), # $0.15 per 1M tokens
"completion": Decimal("0.60"), # $0.60 per 1M tokens
},
"gpt-4-turbo": {
"prompt": Decimal("10.00"),
"completion": Decimal("30.00"),
},
"gpt-3.5-turbo": {
"prompt": Decimal("0.50"),
"completion": Decimal("1.50"),
},
"o1-preview": {
"prompt": Decimal("15.00"),
"completion": Decimal("60.00"),
},
"o1-mini": {
"prompt": Decimal("3.00"),
"completion": Decimal("12.00"),
},
}
# Get pricing for model
model_pricing = PRICING.get(
model_name, PRICING["gpt-4o-mini"]
) # Default to mini
# Calculate costs (convert to per-token rate)
prompt_cost = (Decimal(str(prompt_tokens)) * model_pricing["prompt"]) / Decimal(
"1000000"
)
completion_cost = (
Decimal(str(completion_tokens)) * model_pricing["completion"]
) / Decimal("1000000")
# Reasoning tokens cost same as completion
if reasoning_tokens:
completion_cost += (
Decimal(str(reasoning_tokens)) * model_pricing["completion"]
) / Decimal("1000000")
return {
"prompt_cost": prompt_cost,
"completion_cost": completion_cost,
"total_cost": prompt_cost + completion_cost,
}
@classmethod
def get_user_usage_today(cls, user):
"""Get user's token usage for today."""
from django.utils import timezone
from datetime import timedelta
today_start = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
usage = cls.objects.filter(user=user, created_at__gte=today_start).aggregate(
total_tokens=models.Sum("total_tokens"),
total_cost=models.Sum("total_cost"),
message_count=models.Count("id"),
)
return {
"total_tokens": usage["total_tokens"] or 0,
"total_cost": usage["total_cost"] or Decimal("0.00"),
"message_count": usage["message_count"] or 0,
}
@classmethod
def check_user_limits(cls, user, additional_tokens=0):
"""
Check if user has exceeded daily limits.
Args:
user: User object
additional_tokens: Tokens about to be used
Returns:
dict: {'allowed': bool, 'reason': str, 'usage': dict}
"""
try:
preferences = user.ai_preferences
except:
# No preferences set, allow
return {"allowed": True, "reason": "No limits set", "usage": {}}
if not preferences.has_usage_limits:
return {"allowed": True, "reason": "No limits set", "usage": {}}
usage_today = cls.get_user_usage_today(user)
# Check message limit
if preferences.daily_message_limit > 0:
if usage_today["message_count"] >= preferences.daily_message_limit:
return {
"allowed": False,
"reason": f"Daily message limit reached ({preferences.daily_message_limit})",
"usage": usage_today,
}
# Check token limit
if preferences.daily_token_limit > 0:
if (
usage_today["total_tokens"] + additional_tokens
) > preferences.daily_token_limit:
return {
"allowed": False,
"reason": f"Daily token limit reached ({preferences.daily_token_limit})",
"usage": usage_today,
}
return {"allowed": True, "reason": "Within limits", "usage": usage_today}