365 lines
12 KiB
Python
365 lines
12 KiB
Python
"""
|
|
Summarization Service
|
|
|
|
Handles automatic conversation summarization using LangGraph's SummarizationNode.
|
|
Implements October 2025 best practices for memory management.
|
|
|
|
Usage:
|
|
from apps.chatbot.services import SummarizationService
|
|
|
|
# Create summarization node
|
|
node = SummarizationService.create_summarization_node(
|
|
model_name="gpt-4o-mini",
|
|
max_tokens=384,
|
|
max_summary_tokens=128
|
|
)
|
|
|
|
# Check if summarization needed
|
|
if SummarizationService.should_summarize(messages):
|
|
# Summarization will happen automatically via pre_model_hook
|
|
pass
|
|
"""
|
|
|
|
from typing import List, Dict, Any, Optional, Callable
|
|
from uuid import UUID
|
|
|
|
from langchain_core.messages import BaseMessage, SystemMessage
|
|
from langchain_core.messages.utils import count_tokens_approximately
|
|
from langchain_openai import ChatOpenAI
|
|
from langmem.short_term import SummarizationNode
|
|
|
|
from apps.chatbot.models import ChatSession
|
|
|
|
|
|
class SummarizationService:
|
|
"""Service for managing conversation summarization."""
|
|
|
|
@staticmethod
|
|
def create_summarization_node(
|
|
model_name: str = "gpt-4o-mini",
|
|
max_tokens: int = 384,
|
|
max_summary_tokens: int = 128,
|
|
token_counter: Optional[Callable] = None,
|
|
output_messages_key: str = "llm_input_messages",
|
|
summarization_prompt: Optional[str] = None
|
|
) -> SummarizationNode:
|
|
"""
|
|
Create a SummarizationNode for use with LangGraph agents.
|
|
|
|
Args:
|
|
model_name: Model to use for summarization (cheaper model recommended)
|
|
max_tokens: Start summarizing when message history exceeds this
|
|
max_summary_tokens: Maximum tokens in the summary
|
|
token_counter: Custom token counting function (optional)
|
|
output_messages_key: Key for summarized messages in state
|
|
summarization_prompt: Custom summarization prompt (optional)
|
|
|
|
Returns:
|
|
Configured SummarizationNode instance
|
|
|
|
Example:
|
|
# Create summarization node
|
|
summarization_node = SummarizationService.create_summarization_node(
|
|
model_name="gpt-4o-mini",
|
|
max_tokens=500,
|
|
max_summary_tokens=150
|
|
)
|
|
|
|
# Use with ReAct agent
|
|
from langgraph.prebuilt import create_react_agent
|
|
|
|
agent = create_react_agent(
|
|
model=ChatOpenAI(model="gpt-4o"),
|
|
tools=[...],
|
|
pre_model_hook=summarization_node, # Auto-summarization!
|
|
checkpointer=checkpointer
|
|
)
|
|
"""
|
|
model = ChatOpenAI(model=model_name, temperature=0)
|
|
|
|
kwargs = {
|
|
"token_counter": token_counter or count_tokens_approximately,
|
|
"model": model,
|
|
"max_tokens": max_tokens,
|
|
"max_summary_tokens": max_summary_tokens,
|
|
"output_messages_key": output_messages_key,
|
|
}
|
|
|
|
if summarization_prompt:
|
|
kwargs["summarization_prompt"] = summarization_prompt
|
|
|
|
return SummarizationNode(**kwargs)
|
|
|
|
@staticmethod
|
|
def should_summarize(
|
|
messages: List[BaseMessage],
|
|
threshold: int = 384,
|
|
token_counter: Optional[Callable] = None
|
|
) -> bool:
|
|
"""
|
|
Check if conversation should be summarized.
|
|
|
|
Args:
|
|
messages: List of messages to check
|
|
threshold: Token threshold for summarization
|
|
token_counter: Custom token counting function
|
|
|
|
Returns:
|
|
True if summarization recommended
|
|
|
|
Example:
|
|
if SummarizationService.should_summarize(messages):
|
|
print("Conversation will be summarized on next turn")
|
|
"""
|
|
counter = token_counter or count_tokens_approximately
|
|
token_count = counter(messages)
|
|
return token_count > threshold
|
|
|
|
@staticmethod
|
|
def get_summarization_config(
|
|
session: ChatSession
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Get summarization configuration from chat session settings.
|
|
|
|
Args:
|
|
session: ChatSession with summarization preferences
|
|
|
|
Returns:
|
|
Configuration dict for SummarizationNode
|
|
|
|
Example:
|
|
config = SummarizationService.get_summarization_config(session)
|
|
node = SummarizationService.create_summarization_node(**config)
|
|
"""
|
|
# Get user preferences
|
|
user_prefs = session.user.ai_preferences
|
|
|
|
config = {
|
|
"model_name": "gpt-4o-mini", # Use cheaper model for summarization
|
|
"max_tokens": user_prefs.summarization_threshold or 384,
|
|
"max_summary_tokens": user_prefs.max_summary_tokens or 128,
|
|
"output_messages_key": "llm_input_messages"
|
|
}
|
|
|
|
return config
|
|
|
|
@staticmethod
|
|
def manual_summarize(
|
|
messages: List[BaseMessage],
|
|
model_name: str = "gpt-4o-mini",
|
|
max_summary_tokens: int = 128,
|
|
custom_prompt: Optional[str] = None
|
|
) -> str:
|
|
"""
|
|
Manually summarize a conversation (without SummarizationNode).
|
|
|
|
Args:
|
|
messages: Messages to summarize
|
|
model_name: Model to use
|
|
max_summary_tokens: Max tokens in summary
|
|
custom_prompt: Custom summarization prompt
|
|
|
|
Returns:
|
|
Summary text
|
|
|
|
Example:
|
|
summary = SummarizationService.manual_summarize(
|
|
messages=old_messages,
|
|
max_summary_tokens=200
|
|
)
|
|
"""
|
|
model = ChatOpenAI(
|
|
model=model_name,
|
|
temperature=0,
|
|
max_tokens=max_summary_tokens
|
|
)
|
|
|
|
# Build conversation text
|
|
conversation_text = "\n".join([
|
|
f"{msg.type}: {msg.content}"
|
|
for msg in messages
|
|
])
|
|
|
|
# Default or custom prompt
|
|
prompt = custom_prompt or (
|
|
"Concisely summarize the key points of this conversation. "
|
|
"Focus on important information and context:\n\n"
|
|
f"{conversation_text}"
|
|
)
|
|
|
|
response = model.invoke([SystemMessage(content=prompt)])
|
|
return response.content
|
|
|
|
@staticmethod
|
|
def create_summary_message(summary_text: str) -> SystemMessage:
|
|
"""
|
|
Create a system message containing the summary.
|
|
|
|
Args:
|
|
summary_text: The summary text
|
|
|
|
Returns:
|
|
SystemMessage with summary
|
|
|
|
Example:
|
|
summary_msg = SummarizationService.create_summary_message(
|
|
"Previous conversation covered Python basics and loops."
|
|
)
|
|
messages = [summary_msg] + recent_messages
|
|
"""
|
|
return SystemMessage(
|
|
content=f"Previous conversation summary: {summary_text}",
|
|
additional_kwargs={"is_summary": True}
|
|
)
|
|
|
|
@staticmethod
|
|
def summarize_and_compress(
|
|
messages: List[BaseMessage],
|
|
keep_recent: int = 10,
|
|
model_name: str = "gpt-4o-mini"
|
|
) -> List[BaseMessage]:
|
|
"""
|
|
Summarize older messages and keep recent ones in full.
|
|
|
|
Args:
|
|
messages: All messages
|
|
keep_recent: Number of recent messages to keep in full
|
|
model_name: Model for summarization
|
|
|
|
Returns:
|
|
Compressed message list with summary + recent messages
|
|
|
|
Example:
|
|
compressed = SummarizationService.summarize_and_compress(
|
|
messages=all_messages,
|
|
keep_recent=10
|
|
)
|
|
"""
|
|
if len(messages) <= keep_recent:
|
|
return messages
|
|
|
|
# Split into old and recent
|
|
old_messages = messages[:-keep_recent]
|
|
recent_messages = messages[-keep_recent:]
|
|
|
|
# Summarize old messages
|
|
summary = SummarizationService.manual_summarize(
|
|
messages=old_messages,
|
|
model_name=model_name
|
|
)
|
|
|
|
# Create summary message
|
|
summary_msg = SummarizationService.create_summary_message(summary)
|
|
|
|
# Combine summary + recent
|
|
return [summary_msg] + recent_messages
|
|
|
|
@staticmethod
|
|
def get_token_count(
|
|
messages: List[BaseMessage],
|
|
counter: Optional[Callable] = None
|
|
) -> int:
|
|
"""
|
|
Count tokens in message list.
|
|
|
|
Args:
|
|
messages: Messages to count
|
|
counter: Custom token counter (optional)
|
|
|
|
Returns:
|
|
Total token count
|
|
|
|
Example:
|
|
tokens = SummarizationService.get_token_count(messages)
|
|
print(f"Conversation uses {tokens} tokens")
|
|
"""
|
|
counter = counter or count_tokens_approximately
|
|
return counter(messages)
|
|
|
|
@staticmethod
|
|
def estimate_summary_savings(
|
|
messages: List[BaseMessage],
|
|
keep_recent: int = 10,
|
|
max_summary_tokens: int = 128
|
|
) -> Dict[str, int]:
|
|
"""
|
|
Estimate token savings from summarization.
|
|
|
|
Args:
|
|
messages: Messages to analyze
|
|
keep_recent: How many recent to keep
|
|
max_summary_tokens: Expected summary size
|
|
|
|
Returns:
|
|
Dict with original, compressed, and saved tokens
|
|
|
|
Example:
|
|
savings = SummarizationService.estimate_summary_savings(messages)
|
|
print(f"Would save {savings['saved_tokens']} tokens")
|
|
"""
|
|
original_tokens = SummarizationService.get_token_count(messages)
|
|
|
|
if len(messages) <= keep_recent:
|
|
return {
|
|
'original_tokens': original_tokens,
|
|
'compressed_tokens': original_tokens,
|
|
'saved_tokens': 0,
|
|
'savings_percent': 0.0
|
|
}
|
|
|
|
recent_tokens = SummarizationService.get_token_count(
|
|
messages[-keep_recent:]
|
|
)
|
|
|
|
compressed_tokens = max_summary_tokens + recent_tokens
|
|
saved_tokens = original_tokens - compressed_tokens
|
|
savings_percent = (saved_tokens / original_tokens * 100) if original_tokens > 0 else 0
|
|
|
|
return {
|
|
'original_tokens': original_tokens,
|
|
'compressed_tokens': compressed_tokens,
|
|
'saved_tokens': saved_tokens,
|
|
'savings_percent': round(savings_percent, 2)
|
|
}
|
|
|
|
@staticmethod
|
|
def update_session_summarization_settings(
|
|
session_id: UUID,
|
|
enable: bool,
|
|
threshold: Optional[int] = None,
|
|
max_summary_tokens: Optional[int] = None
|
|
) -> ChatSession:
|
|
"""
|
|
Update summarization settings for a session.
|
|
|
|
Args:
|
|
session_id: Chat session ID
|
|
enable: Enable/disable summarization
|
|
threshold: Token threshold (optional)
|
|
max_summary_tokens: Max summary size (optional)
|
|
|
|
Returns:
|
|
Updated ChatSession
|
|
|
|
Example:
|
|
session = SummarizationService.update_session_summarization_settings(
|
|
session_id=session.id,
|
|
enable=True,
|
|
threshold=500
|
|
)
|
|
"""
|
|
session = ChatSession.objects.get(id=session_id)
|
|
session.enable_summarization = enable
|
|
|
|
if threshold:
|
|
# Store in user preferences
|
|
prefs = session.user.ai_preferences
|
|
prefs.summarization_threshold = threshold
|
|
if max_summary_tokens:
|
|
prefs.max_summary_tokens = max_summary_tokens
|
|
prefs.save()
|
|
|
|
session.save()
|
|
return session
|