fix(memory): prevent stale name facts from overriding Discord display name

Two bugs were causing Miku to call users by wrong names: BUG 1 - No authoritative source: Declarative name facts ('The user's name is Lily') were injected into the prompt without any counterweight. If an old consolidation run extracted a wrong name, Miku would believe it forever. Fix: agent_prompt_prefix now appends the user's Discord display name as AUTHORITATIVE context, with explicit instruction to prefer it over any contradictory name facts. BUG 2 - Dedup prevented name updates: _is_duplicate_fact() used vector similarity to detect duplicates. 'The user's name is Lily' and 'The user's name is koko210Serve' are ~80% identical text, giving high cosine similarity (>0.85 threshold). New correct name facts were silently rejected as 'duplicates'. Fix: name facts now use _find_existing_fact() to compare fact_value directly. If the name changed, old fact is deleted and new one stored. Also: the extraction prompt now includes the user's Discord display name as a hint, so the LLM knows the authoritative name when extracting facts during consolidation.
2026-05-17 11:20:49 +03:00
parent 5f06758c3e
commit 46ea4f2c53
1 changed files with 71 additions and 5 deletions
--- a/cat-plugins/memory_consolidation/memory_consolidation.py
+++ b/cat-plugins/memory_consolidation/memory_consolidation.py
@@ -266,6 +266,9 @@ def agent_prompt_prefix(prefix, cat):
                    current_evil = cat.working_memory.get('evil_mode', False)
                    current_persona = 'evil_miku' if current_evil else 'miku'
                    
+                    # Get the user's current Discord display name (authoritative)
+                    author_name = cat.working_memory.get('author_name', '')
+                    
                    # Build the facts section with persona annotations
                    facts_text = "\n\n## Personal Facts About the User:\n"
                    for fact, fact_persona in high_confidence_facts:
@@ -275,6 +278,12 @@ def agent_prompt_prefix(prefix, cat):
                            facts_text += f"- {fact} (learned as {source_label})\n"
                        else:
                            facts_text += f"- {fact}\n"
+                    
+                    # Add authoritative Discord display name — this OVERRIDES any stale name facts
+                    if author_name:
+                        facts_text += f"\n**AUTHORITATIVE: The user's current Discord display name is \"{author_name}\".**\n"
+                        facts_text += "Use THIS name when addressing them. If any name fact above contradicts this, the display name is the truth.\n"
+                    
                    facts_text += "\n(Use these facts when answering the user's question)\n"
                    prefix += facts_text
                    print(f"[Declarative] Injected {len(high_confidence_facts)} facts into prompt (personas: {seen_personas}, current: {current_persona})")
@@ -549,8 +558,16 @@ def extract_and_store_facts(client, memory_ids, cat, user_id, persona='miku'):
        else:
            persona_context = "\nNOTE: These messages were exchanged with Normal Miku (the cheerful virtual idol).\n"

+        # Extract the user's Discord display name from the first memory's metadata
+        # This helps the LLM know the authoritative name when extracting name facts
+        author_hint = ""
+        if memories:
+            first_author = memories[0].payload.get('metadata', {}).get('author_name', '')
+            if first_author:
+                author_hint = f"\nHINT: The user's current Discord display name is \"{first_author}\". Use this when determining their name.\n"
+
        extraction_prompt = f"""Analyze these user messages and extract ONLY factual personal information.
-{persona_context}
+{persona_context}{author_hint}
 User messages:
 {conversation_context}

@@ -623,10 +640,26 @@ IMPORTANT:
                        fact_type = 'education'
                        fact_value = fact_text.split("graduated from")[-1].strip()

-                    # Duplicate detection
-                    if _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
-                        print(f"[Fact Skip] Duplicate: {fact_text}")
-                        continue
+                    # Duplicate detection — with special handling for name facts
+                    # Name facts with different values replace old ones (don't skip)
+                    if fact_type == 'name':
+                        existing_name = _find_existing_fact(client, cat, fact_type, user_id)
+                        if existing_name:
+                            old_value = existing_name['payload']['metadata'].get('fact_value', '')
+                            if old_value.lower() != fact_value.lower():
+                                # Different name — delete old, store new
+                                client.delete(
+                                    collection_name='declarative',
+                                    points_selector=[existing_name['id']]
+                                )
+                                print(f"[Fact Update] Name changed: '{old_value}' → '{fact_value}'")
+                            else:
+                                print(f"[Fact Skip] Name unchanged: '{fact_value}'")
+                                continue
+                    else:
+                        if _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
+                            print(f"[Fact Skip] Duplicate: {fact_text}")
+                            continue

                    # Store fact using Cat's embedder
                    fact_embedding = cat.embedder.embed_query(fact_text)
@@ -661,6 +694,39 @@ IMPORTANT:
    return facts_stored


+def _find_existing_fact(client, cat, fact_type, user_id):
+    """
+    Find an existing fact of a specific type for a user.
+    Returns a dict with 'id' and 'payload' keys, or None.
+    Used by name-fact update logic to replace old names with new ones.
+    """
+    try:
+        dummy_embedding = cat.embedder.embed_query("find fact")
+        
+        results = client.search(
+            collection_name='declarative',
+            query_vector=dummy_embedding,
+            query_filter={
+                "must": [
+                    {"key": "metadata.source", "match": {"value": user_id}},
+                    {"key": "metadata.fact_type", "match": {"value": fact_type}},
+                ]
+            },
+            limit=1,
+            score_threshold=0.0
+        )
+        
+        if results:
+            point = results[0]
+            return {'id': point.id, 'payload': {'metadata': point.payload.get('metadata', {})}}
+        
+        return None
+        
+    except Exception as e:
+        print(f"[Find Fact] Error: {e}")
+        return None
+
+
 def _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
    """
    Check if a similar fact already exists for this user.