uchill/chatnext/backend/apps/chatbot/services/summarization_service.py

365 lines
12 KiB
Python

"""
Summarization Service
Handles automatic conversation summarization using LangGraph's SummarizationNode.
Implements October 2025 best practices for memory management.
Usage:
from apps.chatbot.services import SummarizationService
# Create summarization node
node = SummarizationService.create_summarization_node(
model_name="gpt-4o-mini",
max_tokens=384,
max_summary_tokens=128
)
# Check if summarization needed
if SummarizationService.should_summarize(messages):
# Summarization will happen automatically via pre_model_hook
pass
"""
from typing import List, Dict, Any, Optional, Callable
from uuid import UUID
from langchain_core.messages import BaseMessage, SystemMessage
from langchain_core.messages.utils import count_tokens_approximately
from langchain_openai import ChatOpenAI
from langmem.short_term import SummarizationNode
from apps.chatbot.models import ChatSession
class SummarizationService:
"""Service for managing conversation summarization."""
@staticmethod
def create_summarization_node(
model_name: str = "gpt-4o-mini",
max_tokens: int = 384,
max_summary_tokens: int = 128,
token_counter: Optional[Callable] = None,
output_messages_key: str = "llm_input_messages",
summarization_prompt: Optional[str] = None
) -> SummarizationNode:
"""
Create a SummarizationNode for use with LangGraph agents.
Args:
model_name: Model to use for summarization (cheaper model recommended)
max_tokens: Start summarizing when message history exceeds this
max_summary_tokens: Maximum tokens in the summary
token_counter: Custom token counting function (optional)
output_messages_key: Key for summarized messages in state
summarization_prompt: Custom summarization prompt (optional)
Returns:
Configured SummarizationNode instance
Example:
# Create summarization node
summarization_node = SummarizationService.create_summarization_node(
model_name="gpt-4o-mini",
max_tokens=500,
max_summary_tokens=150
)
# Use with ReAct agent
from langgraph.prebuilt import create_react_agent
agent = create_react_agent(
model=ChatOpenAI(model="gpt-4o"),
tools=[...],
pre_model_hook=summarization_node, # Auto-summarization!
checkpointer=checkpointer
)
"""
model = ChatOpenAI(model=model_name, temperature=0)
kwargs = {
"token_counter": token_counter or count_tokens_approximately,
"model": model,
"max_tokens": max_tokens,
"max_summary_tokens": max_summary_tokens,
"output_messages_key": output_messages_key,
}
if summarization_prompt:
kwargs["summarization_prompt"] = summarization_prompt
return SummarizationNode(**kwargs)
@staticmethod
def should_summarize(
messages: List[BaseMessage],
threshold: int = 384,
token_counter: Optional[Callable] = None
) -> bool:
"""
Check if conversation should be summarized.
Args:
messages: List of messages to check
threshold: Token threshold for summarization
token_counter: Custom token counting function
Returns:
True if summarization recommended
Example:
if SummarizationService.should_summarize(messages):
print("Conversation will be summarized on next turn")
"""
counter = token_counter or count_tokens_approximately
token_count = counter(messages)
return token_count > threshold
@staticmethod
def get_summarization_config(
session: ChatSession
) -> Dict[str, Any]:
"""
Get summarization configuration from chat session settings.
Args:
session: ChatSession with summarization preferences
Returns:
Configuration dict for SummarizationNode
Example:
config = SummarizationService.get_summarization_config(session)
node = SummarizationService.create_summarization_node(**config)
"""
# Get user preferences
user_prefs = session.user.ai_preferences
config = {
"model_name": "gpt-4o-mini", # Use cheaper model for summarization
"max_tokens": user_prefs.summarization_threshold or 384,
"max_summary_tokens": user_prefs.max_summary_tokens or 128,
"output_messages_key": "llm_input_messages"
}
return config
@staticmethod
def manual_summarize(
messages: List[BaseMessage],
model_name: str = "gpt-4o-mini",
max_summary_tokens: int = 128,
custom_prompt: Optional[str] = None
) -> str:
"""
Manually summarize a conversation (without SummarizationNode).
Args:
messages: Messages to summarize
model_name: Model to use
max_summary_tokens: Max tokens in summary
custom_prompt: Custom summarization prompt
Returns:
Summary text
Example:
summary = SummarizationService.manual_summarize(
messages=old_messages,
max_summary_tokens=200
)
"""
model = ChatOpenAI(
model=model_name,
temperature=0,
max_tokens=max_summary_tokens
)
# Build conversation text
conversation_text = "\n".join([
f"{msg.type}: {msg.content}"
for msg in messages
])
# Default or custom prompt
prompt = custom_prompt or (
"Concisely summarize the key points of this conversation. "
"Focus on important information and context:\n\n"
f"{conversation_text}"
)
response = model.invoke([SystemMessage(content=prompt)])
return response.content
@staticmethod
def create_summary_message(summary_text: str) -> SystemMessage:
"""
Create a system message containing the summary.
Args:
summary_text: The summary text
Returns:
SystemMessage with summary
Example:
summary_msg = SummarizationService.create_summary_message(
"Previous conversation covered Python basics and loops."
)
messages = [summary_msg] + recent_messages
"""
return SystemMessage(
content=f"Previous conversation summary: {summary_text}",
additional_kwargs={"is_summary": True}
)
@staticmethod
def summarize_and_compress(
messages: List[BaseMessage],
keep_recent: int = 10,
model_name: str = "gpt-4o-mini"
) -> List[BaseMessage]:
"""
Summarize older messages and keep recent ones in full.
Args:
messages: All messages
keep_recent: Number of recent messages to keep in full
model_name: Model for summarization
Returns:
Compressed message list with summary + recent messages
Example:
compressed = SummarizationService.summarize_and_compress(
messages=all_messages,
keep_recent=10
)
"""
if len(messages) <= keep_recent:
return messages
# Split into old and recent
old_messages = messages[:-keep_recent]
recent_messages = messages[-keep_recent:]
# Summarize old messages
summary = SummarizationService.manual_summarize(
messages=old_messages,
model_name=model_name
)
# Create summary message
summary_msg = SummarizationService.create_summary_message(summary)
# Combine summary + recent
return [summary_msg] + recent_messages
@staticmethod
def get_token_count(
messages: List[BaseMessage],
counter: Optional[Callable] = None
) -> int:
"""
Count tokens in message list.
Args:
messages: Messages to count
counter: Custom token counter (optional)
Returns:
Total token count
Example:
tokens = SummarizationService.get_token_count(messages)
print(f"Conversation uses {tokens} tokens")
"""
counter = counter or count_tokens_approximately
return counter(messages)
@staticmethod
def estimate_summary_savings(
messages: List[BaseMessage],
keep_recent: int = 10,
max_summary_tokens: int = 128
) -> Dict[str, int]:
"""
Estimate token savings from summarization.
Args:
messages: Messages to analyze
keep_recent: How many recent to keep
max_summary_tokens: Expected summary size
Returns:
Dict with original, compressed, and saved tokens
Example:
savings = SummarizationService.estimate_summary_savings(messages)
print(f"Would save {savings['saved_tokens']} tokens")
"""
original_tokens = SummarizationService.get_token_count(messages)
if len(messages) <= keep_recent:
return {
'original_tokens': original_tokens,
'compressed_tokens': original_tokens,
'saved_tokens': 0,
'savings_percent': 0.0
}
recent_tokens = SummarizationService.get_token_count(
messages[-keep_recent:]
)
compressed_tokens = max_summary_tokens + recent_tokens
saved_tokens = original_tokens - compressed_tokens
savings_percent = (saved_tokens / original_tokens * 100) if original_tokens > 0 else 0
return {
'original_tokens': original_tokens,
'compressed_tokens': compressed_tokens,
'saved_tokens': saved_tokens,
'savings_percent': round(savings_percent, 2)
}
@staticmethod
def update_session_summarization_settings(
session_id: UUID,
enable: bool,
threshold: Optional[int] = None,
max_summary_tokens: Optional[int] = None
) -> ChatSession:
"""
Update summarization settings for a session.
Args:
session_id: Chat session ID
enable: Enable/disable summarization
threshold: Token threshold (optional)
max_summary_tokens: Max summary size (optional)
Returns:
Updated ChatSession
Example:
session = SummarizationService.update_session_summarization_settings(
session_id=session.id,
enable=True,
threshold=500
)
"""
session = ChatSession.objects.get(id=session_id)
session.enable_summarization = enable
if threshold:
# Store in user preferences
prefs = session.user.ai_preferences
prefs.summarization_threshold = threshold
if max_summary_tokens:
prefs.max_summary_tokens = max_summary_tokens
prefs.save()
session.save()
return session