296 lines
9.5 KiB
Python
296 lines
9.5 KiB
Python
"""
|
|
Token Usage Model - Track AI token consumption and costs.
|
|
"""
|
|
|
|
from django.db import models
|
|
from django.conf import settings
|
|
from django.utils.translation import gettext_lazy as _
|
|
from core.models import TimestampedModel
|
|
from decimal import Decimal
|
|
|
|
|
|
class TokenUsage(TimestampedModel):
|
|
"""
|
|
Track token usage and costs for AI interactions.
|
|
|
|
This helps with:
|
|
- User billing and quotas
|
|
- Cost analytics
|
|
- Usage patterns
|
|
- Budget management
|
|
"""
|
|
|
|
# User and session association
|
|
user = models.ForeignKey(
|
|
settings.AUTH_USER_MODEL,
|
|
on_delete=models.CASCADE,
|
|
related_name="token_usage",
|
|
help_text=_("User who incurred this usage"),
|
|
)
|
|
|
|
chat_session = models.ForeignKey(
|
|
"chatbot.ChatSession",
|
|
on_delete=models.CASCADE,
|
|
related_name="token_usage",
|
|
null=True,
|
|
blank=True,
|
|
help_text=_("Chat session this usage belongs to"),
|
|
)
|
|
|
|
# Model information
|
|
model_name = models.CharField(
|
|
max_length=100, help_text=_("AI model used (e.g., gpt-4o, gpt-4o-mini)")
|
|
)
|
|
|
|
# Token counts
|
|
prompt_tokens = models.IntegerField(
|
|
default=0, help_text=_("Tokens in the prompt/input")
|
|
)
|
|
|
|
completion_tokens = models.IntegerField(
|
|
default=0, help_text=_("Tokens in the completion/output")
|
|
)
|
|
|
|
total_tokens = models.IntegerField(
|
|
default=0, help_text=_("Total tokens (prompt + completion)")
|
|
)
|
|
|
|
# Reasoning tokens (for o3/o4 models)
|
|
reasoning_tokens = models.IntegerField(
|
|
default=0,
|
|
null=True,
|
|
blank=True,
|
|
help_text=_("Reasoning tokens (for models that support it)"),
|
|
)
|
|
|
|
# Cost tracking
|
|
prompt_cost = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
default=Decimal("0.000000"),
|
|
help_text=_("Cost for prompt tokens in USD"),
|
|
)
|
|
|
|
completion_cost = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
default=Decimal("0.000000"),
|
|
help_text=_("Cost for completion tokens in USD"),
|
|
)
|
|
|
|
total_cost = models.DecimalField(
|
|
max_digits=10,
|
|
decimal_places=6,
|
|
default=Decimal("0.000000"),
|
|
help_text=_("Total cost in USD"),
|
|
)
|
|
|
|
# Request metadata
|
|
request_type = models.CharField(
|
|
max_length=50,
|
|
default="chat",
|
|
choices=[
|
|
("chat", "Chat Completion"),
|
|
("summarization", "Conversation Summarization"),
|
|
("embedding", "Text Embedding"),
|
|
("tool_call", "Tool/Function Call"),
|
|
("vision", "Vision Analysis"),
|
|
],
|
|
help_text=_("Type of API request"),
|
|
)
|
|
|
|
endpoint = models.CharField(
|
|
max_length=255, blank=True, null=True, help_text=_("API endpoint used")
|
|
)
|
|
|
|
# Performance metrics
|
|
response_time_ms = models.IntegerField(
|
|
null=True, blank=True, help_text=_("Response time in milliseconds")
|
|
)
|
|
|
|
was_cached = models.BooleanField(
|
|
default=False, help_text=_("Whether response was served from cache")
|
|
)
|
|
|
|
# Error tracking
|
|
had_error = models.BooleanField(
|
|
default=False, help_text=_("Whether this request had an error")
|
|
)
|
|
|
|
error_message = models.TextField(
|
|
blank=True, null=True, help_text=_("Error message if request failed")
|
|
)
|
|
|
|
# Additional metadata
|
|
metadata = models.JSONField(
|
|
default=dict, blank=True, help_text=_("Additional usage metadata")
|
|
)
|
|
|
|
class Meta:
|
|
verbose_name = _("Token Usage")
|
|
verbose_name_plural = _("Token Usage")
|
|
ordering = ["-created_at"]
|
|
indexes = [
|
|
models.Index(
|
|
fields=["user", "-created_at"], name="tokenusage_user_date_idx"
|
|
),
|
|
models.Index(
|
|
fields=["chat_session", "-created_at"],
|
|
name="tokenusage_session_date_idx",
|
|
),
|
|
models.Index(fields=["model_name"], name="tokenusage_model_idx"),
|
|
models.Index(fields=["request_type"], name="tokenusage_type_idx"),
|
|
models.Index(
|
|
fields=["user", "model_name"], name="tokenusage_user_model_idx"
|
|
),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.user.email} - {self.total_tokens} tokens - ${self.total_cost}"
|
|
|
|
def save(self, *args, **kwargs):
|
|
"""Calculate total tokens and cost before saving."""
|
|
# Calculate total tokens
|
|
self.total_tokens = self.prompt_tokens + self.completion_tokens
|
|
if self.reasoning_tokens:
|
|
self.total_tokens += self.reasoning_tokens
|
|
|
|
# Calculate total cost
|
|
self.total_cost = self.prompt_cost + self.completion_cost
|
|
|
|
super().save(*args, **kwargs)
|
|
|
|
@classmethod
|
|
def calculate_cost(
|
|
cls, model_name, prompt_tokens, completion_tokens, reasoning_tokens=0
|
|
):
|
|
"""
|
|
Calculate cost based on model pricing.
|
|
|
|
Args:
|
|
model_name: Name of the AI model
|
|
prompt_tokens: Number of prompt tokens
|
|
completion_tokens: Number of completion tokens
|
|
reasoning_tokens: Number of reasoning tokens (for o1/o3 models)
|
|
|
|
Returns:
|
|
dict: {'prompt_cost': Decimal, 'completion_cost': Decimal, 'total_cost': Decimal}
|
|
"""
|
|
# Pricing per 1M tokens (as of Oct 2025)
|
|
PRICING = {
|
|
"gpt-4o": {
|
|
"prompt": Decimal("2.50"), # $2.50 per 1M tokens
|
|
"completion": Decimal("10.00"), # $10.00 per 1M tokens
|
|
},
|
|
"gpt-4o-mini": {
|
|
"prompt": Decimal("0.15"), # $0.15 per 1M tokens
|
|
"completion": Decimal("0.60"), # $0.60 per 1M tokens
|
|
},
|
|
"gpt-4-turbo": {
|
|
"prompt": Decimal("10.00"),
|
|
"completion": Decimal("30.00"),
|
|
},
|
|
"gpt-3.5-turbo": {
|
|
"prompt": Decimal("0.50"),
|
|
"completion": Decimal("1.50"),
|
|
},
|
|
"o1-preview": {
|
|
"prompt": Decimal("15.00"),
|
|
"completion": Decimal("60.00"),
|
|
},
|
|
"o1-mini": {
|
|
"prompt": Decimal("3.00"),
|
|
"completion": Decimal("12.00"),
|
|
},
|
|
}
|
|
|
|
# Get pricing for model
|
|
model_pricing = PRICING.get(
|
|
model_name, PRICING["gpt-4o-mini"]
|
|
) # Default to mini
|
|
|
|
# Calculate costs (convert to per-token rate)
|
|
prompt_cost = (Decimal(str(prompt_tokens)) * model_pricing["prompt"]) / Decimal(
|
|
"1000000"
|
|
)
|
|
completion_cost = (
|
|
Decimal(str(completion_tokens)) * model_pricing["completion"]
|
|
) / Decimal("1000000")
|
|
|
|
# Reasoning tokens cost same as completion
|
|
if reasoning_tokens:
|
|
completion_cost += (
|
|
Decimal(str(reasoning_tokens)) * model_pricing["completion"]
|
|
) / Decimal("1000000")
|
|
|
|
return {
|
|
"prompt_cost": prompt_cost,
|
|
"completion_cost": completion_cost,
|
|
"total_cost": prompt_cost + completion_cost,
|
|
}
|
|
|
|
@classmethod
|
|
def get_user_usage_today(cls, user):
|
|
"""Get user's token usage for today."""
|
|
from django.utils import timezone
|
|
from datetime import timedelta
|
|
|
|
today_start = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
usage = cls.objects.filter(user=user, created_at__gte=today_start).aggregate(
|
|
total_tokens=models.Sum("total_tokens"),
|
|
total_cost=models.Sum("total_cost"),
|
|
message_count=models.Count("id"),
|
|
)
|
|
|
|
return {
|
|
"total_tokens": usage["total_tokens"] or 0,
|
|
"total_cost": usage["total_cost"] or Decimal("0.00"),
|
|
"message_count": usage["message_count"] or 0,
|
|
}
|
|
|
|
@classmethod
|
|
def check_user_limits(cls, user, additional_tokens=0):
|
|
"""
|
|
Check if user has exceeded daily limits.
|
|
|
|
Args:
|
|
user: User object
|
|
additional_tokens: Tokens about to be used
|
|
|
|
Returns:
|
|
dict: {'allowed': bool, 'reason': str, 'usage': dict}
|
|
"""
|
|
try:
|
|
preferences = user.ai_preferences
|
|
except:
|
|
# No preferences set, allow
|
|
return {"allowed": True, "reason": "No limits set", "usage": {}}
|
|
|
|
if not preferences.has_usage_limits:
|
|
return {"allowed": True, "reason": "No limits set", "usage": {}}
|
|
|
|
usage_today = cls.get_user_usage_today(user)
|
|
|
|
# Check message limit
|
|
if preferences.daily_message_limit > 0:
|
|
if usage_today["message_count"] >= preferences.daily_message_limit:
|
|
return {
|
|
"allowed": False,
|
|
"reason": f"Daily message limit reached ({preferences.daily_message_limit})",
|
|
"usage": usage_today,
|
|
}
|
|
|
|
# Check token limit
|
|
if preferences.daily_token_limit > 0:
|
|
if (
|
|
usage_today["total_tokens"] + additional_tokens
|
|
) > preferences.daily_token_limit:
|
|
return {
|
|
"allowed": False,
|
|
"reason": f"Daily token limit reached ({preferences.daily_token_limit})",
|
|
"usage": usage_today,
|
|
}
|
|
|
|
return {"allowed": True, "reason": "Within limits", "usage": usage_today}
|