Fix reply-context speaker confusion with structured metadata pipeline
Previously, when a user replied to Miku's message via Discord's reply
feature, Miku's quoted words were embedded directly into the user's
message text using the format:
[Replying to your message: "Miku's words"] User's response
This caused two problems:
1. The LLM had to parse "your message" to determine the quoted text
was MIKU's words — fragile and frequently misattributed
2. When stored in episodic memory as [User]: ..., Miku's quoted words
were permanently mislabeled under the user's speaker prefix
Now reply context flows through as structured metadata:
- bot/bot.py captures the replied-to text WITHOUT embedding it in prompt
- cat_client.py passes it as discord_reply_context in the WebSocket payload
- discord_bridge.py injects it as agent_input['reply_context'] — a
CLEARLY LABELED note: [The user is replying to what you (Miku) said — ...]
- miku_personality.py + evil_miku_personality.py render it via
{reply_context} placeholder in the prompt suffix, between memory
context and conversation history
This keeps Miku's words as a separate context note, never mixed into
the user's HumanMessage. Episodic memory only stores the user's actual
words. The fallback path (when Cat is unavailable) also uses a cleaner
format with explicit speaker labels.
This commit is contained in:
15
bot/bot.py
15
bot/bot.py
@@ -284,8 +284,12 @@ async def on_message(message):
|
|||||||
|
|
||||||
prompt = text # No cleanup — keep it raw
|
prompt = text # No cleanup — keep it raw
|
||||||
user_id = str(message.author.id)
|
user_id = str(message.author.id)
|
||||||
|
reply_context = None # Will be passed as structured metadata to Cat pipeline
|
||||||
|
|
||||||
# If user is replying to a specific message, add context marker
|
# If user is replying to a specific message, capture the context
|
||||||
|
# WITHOUT embedding it in the prompt text (that caused speaker confusion).
|
||||||
|
# Instead, it's passed as structured metadata — the Cat plugin injects it
|
||||||
|
# into the prompt as a clearly labeled context note, preserving speaker boundaries.
|
||||||
if message.reference:
|
if message.reference:
|
||||||
try:
|
try:
|
||||||
replied_msg = await message.channel.fetch_message(message.reference.message_id)
|
replied_msg = await message.channel.fetch_message(message.reference.message_id)
|
||||||
@@ -293,8 +297,7 @@ async def on_message(message):
|
|||||||
if replied_msg.author == globals.client.user:
|
if replied_msg.author == globals.client.user:
|
||||||
# Truncate the replied message to keep prompt manageable
|
# Truncate the replied message to keep prompt manageable
|
||||||
replied_content = replied_msg.content[:200] + "..." if len(replied_msg.content) > 200 else replied_msg.content
|
replied_content = replied_msg.content[:200] + "..." if len(replied_msg.content) > 200 else replied_msg.content
|
||||||
# Add reply context marker to the prompt
|
reply_context = replied_content
|
||||||
prompt = f'[Replying to your message: "{replied_content}"] {prompt}'
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to fetch replied message for context: {e}")
|
logger.error(f"Failed to fetch replied message for context: {e}")
|
||||||
|
|
||||||
@@ -364,6 +367,7 @@ async def on_message(message):
|
|||||||
author_name=author_name,
|
author_name=author_name,
|
||||||
mood=current_mood,
|
mood=current_mood,
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
|
reply_context=reply_context,
|
||||||
)
|
)
|
||||||
if cat_result:
|
if cat_result:
|
||||||
response, cat_full_prompt = cat_result
|
response, cat_full_prompt = cat_result
|
||||||
@@ -395,8 +399,11 @@ async def on_message(message):
|
|||||||
|
|
||||||
# Fallback to direct LLM query if Cat didn't respond
|
# Fallback to direct LLM query if Cat didn't respond
|
||||||
if not response:
|
if not response:
|
||||||
|
fallback_prompt = prompt
|
||||||
|
if reply_context:
|
||||||
|
fallback_prompt = f'[Context: you (Miku) said: {reply_context}]\n[User says:] {prompt}'
|
||||||
response = await query_llama(
|
response = await query_llama(
|
||||||
prompt,
|
fallback_prompt,
|
||||||
user_id=str(message.author.id),
|
user_id=str(message.author.id),
|
||||||
guild_id=guild_id,
|
guild_id=guild_id,
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
|
|||||||
@@ -109,6 +109,7 @@ class CatAdapter:
|
|||||||
mood: Optional[str] = None,
|
mood: Optional[str] = None,
|
||||||
response_type: str = "dm_response",
|
response_type: str = "dm_response",
|
||||||
media_type: Optional[str] = None,
|
media_type: Optional[str] = None,
|
||||||
|
reply_context: Optional[str] = None,
|
||||||
) -> Optional[tuple]:
|
) -> Optional[tuple]:
|
||||||
"""
|
"""
|
||||||
Send a message through the Cat pipeline via WebSocket and get a response.
|
Send a message through the Cat pipeline via WebSocket and get a response.
|
||||||
@@ -162,6 +163,12 @@ class CatAdapter:
|
|||||||
# Pass media type so discord_bridge can add MEDIA NOTE to the prompt
|
# Pass media type so discord_bridge can add MEDIA NOTE to the prompt
|
||||||
if media_type:
|
if media_type:
|
||||||
payload["discord_media_type"] = media_type
|
payload["discord_media_type"] = media_type
|
||||||
|
# Pass the message the user is replying to (if any) as structured metadata.
|
||||||
|
# The discord_bridge plugin injects this into the prompt as a clearly-labeled
|
||||||
|
# context note — keeping Miku's words separate from the user's message text
|
||||||
|
# and preventing the speaker confusion that the old embed-in-prompt format caused.
|
||||||
|
if reply_context:
|
||||||
|
payload["discord_reply_context"] = reply_context
|
||||||
# Pass current Discord activity if it changed recently (30-min decay window)
|
# Pass current Discord activity if it changed recently (30-min decay window)
|
||||||
activity_label = get_current_activity_fresh()
|
activity_label = get_current_activity_fresh()
|
||||||
if activity_label:
|
if activity_label:
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict:
|
|||||||
evil_mode = user_message_json.get('discord_evil_mode', False)
|
evil_mode = user_message_json.get('discord_evil_mode', False)
|
||||||
media_type = user_message_json.get('discord_media_type', None)
|
media_type = user_message_json.get('discord_media_type', None)
|
||||||
activity = user_message_json.get('discord_activity', None)
|
activity = user_message_json.get('discord_activity', None)
|
||||||
|
reply_context = user_message_json.get('discord_reply_context', None)
|
||||||
|
|
||||||
# Also check working memory for backward compatibility
|
# Also check working memory for backward compatibility
|
||||||
if not guild_id:
|
if not guild_id:
|
||||||
@@ -57,6 +58,7 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict:
|
|||||||
cat.working_memory['evil_mode'] = evil_mode
|
cat.working_memory['evil_mode'] = evil_mode
|
||||||
cat.working_memory['media_type'] = media_type
|
cat.working_memory['media_type'] = media_type
|
||||||
cat.working_memory['activity'] = activity
|
cat.working_memory['activity'] = activity
|
||||||
|
cat.working_memory['reply_context'] = reply_context
|
||||||
|
|
||||||
return user_message_json
|
return user_message_json
|
||||||
|
|
||||||
@@ -375,7 +377,21 @@ Please respond in a way that reflects this emotional tone."""
|
|||||||
print(f" [Discord Bridge] Error building system prefix: {e}")
|
print(f" [Discord Bridge] Error building system prefix: {e}")
|
||||||
system_prefix = cat.working_memory.get('full_system_prefix', '[system prefix not available]')
|
system_prefix = cat.working_memory.get('full_system_prefix', '[system prefix not available]')
|
||||||
|
|
||||||
full_prompt = f"{system_prefix}\n\n# Context\n\n{episodic_mem}\n\n{declarative_mem}\n\n{tools_output}\n\n# Conversation until now:\nHuman: {user_input}"
|
# Build reply context note if the user is replying to Miku's message.
|
||||||
|
# This injects Miku's quoted words as a SEPARATE clearly-labeled context note
|
||||||
|
# (not embedded in the user's message text). Keeps speaker boundaries intact
|
||||||
|
# and prevents the LLM from misattributing Miku's words to the user.
|
||||||
|
# Uses a colon+space delimiter (no nested quotes) to avoid formatting issues
|
||||||
|
# when the replied message itself contains double-quote characters.
|
||||||
|
reply_context = cat.working_memory.get('reply_context')
|
||||||
|
if reply_context:
|
||||||
|
reply_context_note = f'[The user is replying to what you (Miku) said — you said: {reply_context}]'
|
||||||
|
agent_input['reply_context'] = reply_context_note
|
||||||
|
else:
|
||||||
|
reply_context_note = ''
|
||||||
|
agent_input['reply_context'] = ''
|
||||||
|
|
||||||
|
full_prompt = f"{system_prefix}\n\n# Context\n\n{episodic_mem}\n\n{declarative_mem}\n\n{tools_output}\n\n{reply_context_note}\n\n# Conversation until now:\nHuman: {user_input}"
|
||||||
cat.working_memory['last_full_prompt'] = full_prompt
|
cat.working_memory['last_full_prompt'] = full_prompt
|
||||||
|
|
||||||
return agent_input
|
return agent_input
|
||||||
|
|||||||
@@ -119,6 +119,8 @@ def agent_prompt_suffix(suffix, cat):
|
|||||||
|
|
||||||
{{tools_output}}
|
{{tools_output}}
|
||||||
|
|
||||||
|
{{reply_context}}
|
||||||
|
|
||||||
[Current mood: {mood_name.upper()} — respond accordingly]
|
[Current mood: {mood_name.upper()} — respond accordingly]
|
||||||
|
|
||||||
# Conversation until now:
|
# Conversation until now:
|
||||||
|
|||||||
@@ -91,6 +91,8 @@ def agent_prompt_suffix(suffix, cat):
|
|||||||
|
|
||||||
{tools_output}
|
{tools_output}
|
||||||
|
|
||||||
|
{reply_context}
|
||||||
|
|
||||||
# Conversation until now:
|
# Conversation until now:
|
||||||
(Note: In the conversation below, "Human" = the person you're talking to, "AI" = you, Miku. Pay attention to who said what.)"""
|
(Note: In the conversation below, "Human" = the person you're talking to, "AI" = you, Miku. Pay attention to who said what.)"""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user