fix(memory): prevent stale name facts from overriding Discord display name

Two bugs were causing Miku to call users by wrong names: BUG 1 - No authoritative source: Declarative name facts ('The user's name is Lily') were injected into the prompt without any counterweight. If an old consolidation run extracted a wrong name, Miku would believe it forever. Fix: agent_prompt_prefix now appends the user's Discord display name as AUTHORITATIVE context, with explicit instruction to prefer it over any contradictory name facts. BUG 2 - Dedup prevented name updates: _is_duplicate_fact() used vector similarity to detect duplicates. 'The user's name is Lily' and 'The user's name is koko210Serve' are ~80% identical text, giving high cosine similarity (>0.85 threshold). New correct name facts were silently rejected as 'duplicates'. Fix: name facts now use _find_existing_fact() to compare fact_value directly. If the name changed, old fact is deleted and new one stored. Also: the extraction prompt now includes the user's Discord display name as a hint, so the LLM knows the authoritative name when extracting facts during consolidation.
2026-05-17 11:20:49 +03:00
parent 5f06758c3e
commit 46ea4f2c53
1 changed files with 71 additions and 5 deletions
--- a/cat-plugins/memory_consolidation/memory_consolidation.py
+++ b/cat-plugins/memory_consolidation/memory_consolidation.py
@@ -266,6 +266,9 @@ def agent_prompt_prefix(prefix, cat):
                    current_evil = cat.working_memory.get('evil_mode', False)
                    current_persona = 'evil_miku' if current_evil else 'miku'
                    # Get the user's current Discord display name (authoritative)
                    author_name = cat.working_memory.get('author_name', '')
                    # Build the facts section with persona annotations
                    facts_text = "\n\n## Personal Facts About the User:\n"
                    for fact, fact_persona in high_confidence_facts:
@@ -275,6 +278,12 @@ def agent_prompt_prefix(prefix, cat):
                            facts_text += f"- {fact} (learned as {source_label})\n"
                        else:
                            facts_text += f"- {fact}\n"
                    # Add authoritative Discord display name — this OVERRIDES any stale name facts
                    if author_name:
                        facts_text += f"\n**AUTHORITATIVE: The user's current Discord display name is \"{author_name}\".**\n"
                        facts_text += "Use THIS name when addressing them. If any name fact above contradicts this, the display name is the truth.\n"
                    facts_text += "\n(Use these facts when answering the user's question)\n"
                    prefix += facts_text
                    print(f"[Declarative] Injected {len(high_confidence_facts)} facts into prompt (personas: {seen_personas}, current: {current_persona})")
@@ -549,8 +558,16 @@ def extract_and_store_facts(client, memory_ids, cat, user_id, persona='miku'):
        else:
            persona_context = "\nNOTE: These messages were exchanged with Normal Miku (the cheerful virtual idol).\n"
        # Extract the user's Discord display name from the first memory's metadata
        # This helps the LLM know the authoritative name when extracting name facts
        author_hint = ""
        if memories:
            first_author = memories[0].payload.get('metadata', {}).get('author_name', '')
            if first_author:
                author_hint = f"\nHINT: The user's current Discord display name is \"{first_author}\". Use this when determining their name.\n"
        extraction_prompt = f"""Analyze these user messages and extract ONLY factual personal information.
-{persona_context}
+{persona_context}{author_hint}
 User messages:
 {conversation_context}
@@ -623,7 +640,23 @@ IMPORTANT:
                        fact_type = 'education'
                        fact_value = fact_text.split("graduated from")[-1].strip()
-                    # Duplicate detection
+                    # Duplicate detection — with special handling for name facts
                    # Name facts with different values replace old ones (don't skip)
                    if fact_type == 'name':
                        existing_name = _find_existing_fact(client, cat, fact_type, user_id)
                        if existing_name:
                            old_value = existing_name['payload']['metadata'].get('fact_value', '')
                            if old_value.lower() != fact_value.lower():
                                # Different name — delete old, store new
                                client.delete(
                                    collection_name='declarative',
                                    points_selector=[existing_name['id']]
                                )
                                print(f"[Fact Update] Name changed: '{old_value}' → '{fact_value}'")
                            else:
                                print(f"[Fact Skip] Name unchanged: '{fact_value}'")
                                continue
                    else:
                        if _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
                            print(f"[Fact Skip] Duplicate: {fact_text}")
                            continue
@@ -661,6 +694,39 @@ IMPORTANT:
    return facts_stored
 def _find_existing_fact(client, cat, fact_type, user_id):
    """
    Find an existing fact of a specific type for a user.
    Returns a dict with 'id' and 'payload' keys, or None.
    Used by name-fact update logic to replace old names with new ones.
    """
    try:
        dummy_embedding = cat.embedder.embed_query("find fact")
        results = client.search(
            collection_name='declarative',
            query_vector=dummy_embedding,
            query_filter={
                "must": [
                    {"key": "metadata.source", "match": {"value": user_id}},
                    {"key": "metadata.fact_type", "match": {"value": fact_type}},
                ]
            },
            limit=1,
            score_threshold=0.0
        )
        if results:
            point = results[0]
            return {'id': point.id, 'payload': {'metadata': point.payload.get('metadata', {})}}
        return None
    except Exception as e:
        print(f"[Find Fact] Error: {e}")
        return None
 def _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
    """
    Check if a similar fact already exists for this user.