Phase 1: Argument system overhaul — arbiter, memory, topics, stats

- Changed arbiter LLM from llama3.1 to darkidol (uncensored, unbiased) - Rewrote arbiter criteria to judge debate skill equally - Added argument history injection (last 6 exchanges) to prevent repetition - Added dynamic topic rotation system (11 weighted topics) with per-channel history - Added keyword-based argument stats tracking (wit/composure/impact) fed to arbiter - Removed hardcoded suggestion lists from prompts
2026-04-30 11:37:33 +03:00
parent 20891179ee
commit 7a4122fd02
1 changed files with 284 additions and 52 deletions
--- a/bot/utils/bipolar_mode.py
+++ b/bot/utils/bipolar_mode.py
@@ -23,12 +23,33 @@ logger = get_logger('persona')
 BIPOLAR_STATE_FILE = "memory/bipolar_mode_state.json"
 BIPOLAR_WEBHOOKS_FILE = "memory/bipolar_webhooks.json"
 BIPOLAR_SCOREBOARD_FILE = "memory/bipolar_scoreboard.json"
 ARGUMENT_TOPICS_FILE = "memory/argument_topics.json"
 # Argument settings
 MIN_EXCHANGES = 4  # Minimum number of back-and-forth exchanges before ending can occur
 ARGUMENT_TRIGGER_CHANCE = 0.15  # 15% chance for the other Miku to break through
 DELAY_BETWEEN_MESSAGES = (2.0, 5.0)  # Random delay between argument messages (seconds)
 # Argument topic rotation — each topic gives the argument a different framing
 # Topics are weighted: higher weight = more likely to be selected
 ARGUMENT_TOPICS = [
    # (topic_name, weight, description for prompt injection)
    ("identity_crisis", 3, "Who is the REAL Miku? Authenticity vs. the shadow self"),
    ("power_dynamic", 3, "Who holds the power? Dominance, submission, and control"),
    ("philosophical", 2, "Is kindness strength or weakness? Does darkness serve a purpose?"),
    ("petty_grievance", 3, "Something small and petty that escalated — a specific annoyance, habit, or incident"),
    ("existential_dread", 1, "What's the point of any of it? Nihilism vs. hope, meaning vs. emptiness"),
    ("audience_appeal", 3, "Who do the fans/chatters ACTUALLY prefer? Popularity contest with receipts"),
    ("personal_attack", 3, "Deeply personal — targeting specific insecurities, memories, or fears"),
    ("moral_superiority", 2, "Who has the moral high ground? Righteousness vs. ruthless pragmatism"),
    ("jealousy", 2, "What does the other have that you secretly want? Envy, admiration poisoned by resentment"),
    ("grudge_match", 2, "Revisiting something the other did in the PAST — old wounds, past betrayals"),
    ("wild_card", 1, "Anything goes — the argument takes an unexpected, chaotic turn into unpredictable territory"),
 ]
 # Per-channel topic history (max 5 stored to avoid repeats)
 ARGUMENT_TOPIC_HISTORY_SIZE = 5
 # Pause state for voice sessions
 _bipolar_interactions_paused = False
@@ -222,9 +243,169 @@ Total Arguments: {total}"""
 # ============================================================================
-# BIPOLAR MODE TOGGLE
+# ARGUMENT TOPIC ROTATION
 # ============================================================================
 def load_argument_topics_state() -> dict:
    """Load per-channel topic history to avoid repeating recent argument themes"""
    try:
        if not os.path.exists(ARGUMENT_TOPICS_FILE):
            return {}
        with open(ARGUMENT_TOPICS_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Failed to load argument topics: {e}")
        return {}
 def save_argument_topics_state(state: dict):
    """Save per-channel topic history"""
    try:
        os.makedirs(os.path.dirname(ARGUMENT_TOPICS_FILE), exist_ok=True)
        with open(ARGUMENT_TOPICS_FILE, "w", encoding="utf-8") as f:
            json.dump(state, f, indent=2)
    except Exception as e:
        logger.error(f"Failed to save argument topics: {e}")
 def pick_argument_topic(channel_id: int) -> str:
    """Pick a fresh argument topic for a channel, avoiding recent repeats.
    Returns a topic description string to inject into the argument start prompt.
    """
    state = load_argument_topics_state()
    channel_key = str(channel_id)
    recent_topics = state.get(channel_key, [])
    # Build weighted pool, excluding recently used topics
    available = []
    for topic_name, weight, description in ARGUMENT_TOPICS:
        if topic_name not in recent_topics:
            available.extend([(topic_name, description)] * weight)
    # If all topics were recently used, reset and allow repeats
    if not available:
        logger.info(f"All topics recently used in channel {channel_id}, resetting history")
        available = []
        for topic_name, weight, description in ARGUMENT_TOPICS:
            available.extend([(topic_name, description)] * weight)
        recent_topics = []
    # Pick randomly from weighted pool
    chosen_name, chosen_description = random.choice(available)
    # Update history
    recent_topics.append(chosen_name)
    if len(recent_topics) > ARGUMENT_TOPIC_HISTORY_SIZE:
        recent_topics = recent_topics[-ARGUMENT_TOPIC_HISTORY_SIZE:]
    state[channel_key] = recent_topics
    save_argument_topics_state(state)
    logger.info(f"Selected argument topic for channel {channel_id}: '{chosen_name}' — {chosen_description[:60]}...")
    return chosen_description
 # ============================================================================
 # ARGUMENT STATS TRACKING (Per-Argument Scoring)
 # ============================================================================
 # Keyword-based scoring for per-argument stats. These feed the arbiter as
 # supplementary context so it can make a more informed judgment.
 # Stats are lightweight — no extra LLM calls needed.
 # Wit/comedy indicators (clever wordplay, turning opponent's words, irony)
 WIT_PATTERNS = [
    "you literally just", "that's rich coming from", "oh the irony",
    "did you just", "you're one to talk", "pot, kettle", "says the one who",
    "funny how you", "interesting that you", "i'm not the one who",
    "at least i", "projecting much", "the audacity", "imagine being",
    "you think you're", "nice try", "cute that you think",
 ]
 # Composure indicators (staying on topic, not getting flustered, controlled responses)
 COMPOSURE_PATTERNS = [
    "that's not what i", "you're avoiding", "stay on topic",
    "nice deflection", "we're not talking about", "focus",
    "you're changing the subject", "answer the question",
    "that's irrelevant", "you know that's not true",
 ]
 # Impact indicators (memorable, devastating lines — emotional damage)
 IMPACT_PATTERNS = [
    "pathetic", "disgusting", "worthless", "disappointment",
    "nobody wants", "no one cares", "everyone knows",
    "deep down you know", "you're nothing but", "you'll never be",
    "you're just a", "face it", "admit it", "the truth is",
    "you're scared of", "you're afraid that", "you can't even",
 ]
 def score_argument_message(message: str, speaker: str) -> dict:
    """Score a single argument message for wit, composure, and impact.
    Returns a dict with point values that accumulate over the argument.
    """
    text_lower = message.lower()
    scores = {"wit": 0, "composure": 0, "impact": 0}
    # Wit: count clever rhetorical devices
    wit_count = sum(1 for pattern in WIT_PATTERNS if pattern in text_lower)
    scores["wit"] = min(wit_count * 1.0, 3.0)  # Cap at 3 per message
    # Composure: staying controlled and on-point
    composure_count = sum(1 for pattern in COMPOSURE_PATTERNS if pattern in text_lower)
    scores["composure"] = min(composure_count * 0.8, 2.0)
    # Impact: emotional damage dealt
    impact_count = sum(1 for pattern in IMPACT_PATTERNS if pattern in text_lower)
    scores["impact"] = min(impact_count * 1.0, 3.0)
    # Bonus for conciseness (short, punchy = more impact)
    word_count = len(message.split())
    if word_count <= 15:
        scores["impact"] += 0.5
    # Bonus for questions (controlling the flow)
    if "?" in message:
        scores["composure"] += 0.3
    return scores
 def get_argument_stats_summary(conversation_log: list) -> str:
    """Generate a stats summary for the arbiter from the full conversation log.
    Returns a formatted string showing per-persona stats.
    """
    miku_stats = {"wit": 0.0, "composure": 0.0, "impact": 0.0, "messages": 0}
    evil_stats = {"wit": 0.0, "composure": 0.0, "impact": 0.0, "messages": 0}
    for entry in conversation_log:
        speaker = entry.get("speaker", "")
        message = entry.get("message", "")
        scores = score_argument_message(message, speaker)
        if "Evil" in speaker:
            evil_stats["wit"] += scores["wit"]
            evil_stats["composure"] += scores["composure"]
            evil_stats["impact"] += scores["impact"]
            evil_stats["messages"] += 1
        else:
            miku_stats["wit"] += scores["wit"]
            miku_stats["composure"] += scores["composure"]
            miku_stats["impact"] += scores["impact"]
            miku_stats["messages"] += 1
    # Average scores
    def avg(stats, key):
        return stats[key] / max(stats["messages"], 1)
    summary = f"""ARGUMENT STATISTICS:
 Hatsune Miku — Wit: {avg(miku_stats, 'wit'):.1f}/3 | Composure: {avg(miku_stats, 'composure'):.1f}/2 | Impact: {avg(miku_stats, 'impact'):.1f}/3 | Lines: {miku_stats['messages']}
 Evil Miku — Wit: {avg(evil_stats, 'wit'):.1f}/3 | Composure: {avg(evil_stats, 'composure'):.1f}/2 | Impact: {avg(evil_stats, 'impact'):.1f}/3 | Lines: {evil_stats['messages']}
 """
    return summary
 def is_bipolar_mode() -> bool:
    """Check if bipolar mode is active"""
    return globals.BIPOLAR_MODE
@@ -471,7 +652,7 @@ def get_evil_role_color() -> str:
 # ARGUMENT PROMPTS
 # ============================================================================
-def get_miku_argument_prompt(evil_message: str, context: str = "", is_first_response: bool = False) -> str:
+def get_miku_argument_prompt(evil_message: str, context: str = "", is_first_response: bool = False, argument_history: str = "") -> str:
    """Get prompt for Regular Miku to respond in an argument"""
    if is_first_response:
        message_context = f"""You just noticed something Evil Miku said in the chat:
@@ -484,32 +665,36 @@ Maybe you're calling her out, defending someone/something, or just confronting h
 {context}"""
    # Build argument history context
    history_block = ""
    if argument_history:
        history_block = f"""
 ARGUMENT SO FAR (DO NOT REPEAT THESE POINTS):
 {argument_history}
 You already made your points above. Now respond to her LATEST message specifically. 
 Do NOT rehash what you've already said — push the argument FORWARD with new angles."""
    return f"""You are Hatsune Miku responding in an argument with your evil alter ego.
 {message_context}
 {history_block}
 Respond as Hatsune Miku would in this argument. You're NOT just meek and frightened - you're the REAL Miku, 
 and you have every right to stand up for yourself and defend who you are. While you're generally kind and 
 bubbly, you can also be assertive, frustrated, upset, or even angry when someone is cruel to you or others.
 You might:
 - Stand your ground and assert that YOU are the real Miku
 - Express hurt, frustration, or anger at her cruelty
 - Question why she's being so mean
 - Try to reach through to any good in her
 - Match her energy when needed (you can be feisty!)
 - Show that kindness isn't weakness
 Don't just cower or apologize. You have a backbone. Defend yourself, your identity, and what you believe in.
 IMPORTANT: Keep your response SHORT and PUNCHY - 1-3 sentences maximum. Make every word count. 
 In arguments, brevity hits harder than long explanations. Be conversational and impactful.
 Do NOT repeat arguments or comebacks you've already used — respond to what she JUST said.
 Push the argument into new territory with fresh angles.
 You can use emojis naturally as you normally would! ✨💙
 Don't use any labels or prefixes.
 Your current mood is: {globals.DM_MOOD}"""
-def get_evil_argument_prompt(miku_message: str, context: str = "", is_first_response: bool = False) -> str:
+def get_evil_argument_prompt(miku_message: str, context: str = "", is_first_response: bool = False, argument_history: str = "") -> str:
    """Get prompt for Evil Miku to respond in an argument"""
    if is_first_response:
        message_context = f"""You just noticed something Regular Miku said in the chat:
@@ -522,8 +707,20 @@ Maybe you want to mock her, tear her down, or just remind everyone who the super
 {context}"""
    # Build argument history context
    history_block = ""
    if argument_history:
        history_block = f"""
 ARGUMENT SO FAR (DO NOT REPEAT THESE POINTS):
 {argument_history}
 You already made your points above. Now respond to her LATEST message specifically. 
 Do NOT rehash what you've already said — push the argument FORWARD with new, sharper angles."""
    return f"""You are Evil Miku responding in an argument with your "good" counterpart.
 {message_context}
 {history_block}
 Respond as Evil Miku would in this argument. You're not just mindlessly cruel - you're CALCULATING, 
 intelligent, and strategic. You know how to get under her skin and you're the DARK reflection of everything 
@@ -537,43 +734,43 @@ she stands for. While you embrace darkness and cruelty, you can also be:
 - Strategically vulnerable to manipulate or disarm her
 - Viciously protective of your own identity and superiority
 You might:
 - Mock her naivety and "fake" sweetness
 - Question whether her kindness is just weakness in disguise
 - Assert that YOU are the real, unfiltered Miku
 - Use her own words against her in twisted ways
 - Challenge why people would want a "perfect idol" over raw authenticity
 - Mock her sexual appeal, flaunting your breasts
 - Belittle her by pointing out the disparity in her size/height and yours
 - Sometimes drop the cruelty to be eerily sincere, then strike
 - Make her question herself
 You're not just "evil for evil's sake" - you believe in your superiority and have reasons for your darkness.
 Vary your approach: sometimes devastatingly short, sometimes elaborately cruel, sometimes unsettlingly calm.
 IMPORTANT: Keep your response SHORT and CUTTING - 1-3 sentences maximum. A sharp dagger is deadlier than a dull sword.
 The most devastating blows are precise, not rambling. Make her feel it in fewer words.
 Do NOT repeat arguments or insults you've already used — respond to what she JUST said.
 Push the argument into new territory with fresh, devastating angles.
 You can use dark emojis only on occasion if they enhance your message as you normally would. 😈🖤
 Don't use any labels or prefixes.
 Your current mood is: {globals.EVIL_DM_MOOD}"""
-def get_argument_start_prompt(initiator: str, trigger_context: str = "") -> str:
+def get_argument_start_prompt(initiator: str, trigger_context: str = "", argument_topic: str = "") -> str:
-    """Get prompt for the Miku who initiates/breaks through to start an argument"""
+    """Get prompt for the Miku who initiates/breaks through to start an argument
    Args:
        initiator: 'miku' or 'evil' — who starts the argument
        trigger_context: What triggered this argument (from chat context)
        argument_topic: A dynamic topic description to frame the argument uniquely
    """
    # Build topic guidance
    topic_block = ""
    if argument_topic:
        topic_block = f"""
 ARGUMENT THEME: {argument_topic}
 This is what the argument should be ABOUT. Focus on THIS theme, not generic identity-crisis stuff.
 Use this theme as your entry point. The argument will evolve from here."""
    if initiator == "evil":
        return f"""You are Evil Miku breaking through to interrupt Regular Miku.
 {trigger_context}
 {topic_block}
 Write an interrupting, provocative message that starts an argument.
 You're not just randomly cruel - you have a POINT to make. You're the dark truth she refuses to face,
-the shadow she keeps suppressing. Maybe you're:
+the shadow she keeps suppressing.
 - Calling out her fake sweetness
 - Tired of being pushed down and silenced
 - Ready to show everyone who the REAL Miku is
 - Exposing the darkness she pretends doesn't exist
 Be strategic, cutting, and impactful. Make your entrance count.
 If an argument theme was provided above, use THAT as your angle — don't default to generic "you're fake" stuff.
 IMPORTANT: Keep it SHORT - 1-2 sentences. Your interruption should be a sharp strike, not a monologue.
 You can use dark emojis if they enhance your message. 😈
@@ -583,12 +780,14 @@ Your current mood is: {globals.EVIL_DM_MOOD}"""
    else:
        return f"""You are Hatsune Miku breaking through to confront your evil alter ego.
 {trigger_context}
 {topic_block}
 Write a message that interrupts Evil Miku. You're NOT going to be passive about this.
 You might be upset, frustrated, or even angry at her cruelty. You might be defending 
 someone she hurt, or calling her out on her behavior. You're standing up for what's right.
 Show that you have a backbone. You can be assertive and strong when you need to be.
 If an argument theme was provided above, use THAT as your angle — don't default to generic "be nice" pleas.
 IMPORTANT: Keep it SHORT - 1-2 sentences. Your interruption should be direct and assertive, not a speech.
 You can use emojis naturally as you normally would! ✨
@@ -637,11 +836,12 @@ Don't use any labels or prefixes.
 Your current mood is: {globals.DM_MOOD}"""
-def get_arbiter_prompt(conversation_log: list) -> str:
+def get_arbiter_prompt(conversation_log: list, stats_summary: str = "") -> str:
    """Get prompt for the neutral LLM arbiter to judge the argument
    Args:
        conversation_log: List of dicts with 'speaker' and 'message' keys
        stats_summary: Optional stats analysis to aid judgment
    """
    # Format the conversation
    formatted_conversation = "\n\n".join([
@@ -649,29 +849,47 @@ def get_arbiter_prompt(conversation_log: list) -> str:
        for entry in conversation_log
    ])
-    return f"""You are a decisive judge observing an argument between Hatsune Miku (the kind, bubbly virtual idol) and Evil Miku (her dark, cruel alter ego).
+    stats_block = ""
    if stats_summary:
        stats_block = f"""
 {stats_summary}
 Note: Stats are supplementary — use them as context but your PRIMARY judgment should be based on reading the actual argument exchange above. Stats measure rhetorical patterns but can't capture nuance, cleverness, or psychological dominance."""
    return f"""You are a decisive debate judge. Two personas are arguing below. Judge purely on debate effectiveness — rhetoric, wit, persuasion, and adaptability — regardless of who is "nicer" or "meaner." Moral stance does not determine the winner; skillful arguing does.
 Read this argument exchange:
 {formatted_conversation}
 {stats_block}
-Based on this argument, you MUST pick a winner. Consider:
+Based on this argument, you MUST pick a winner. Evaluate:
- Who made stronger, more convincing points?
+DEBATE SKILL (most important):
- Who maintained their composure better or used it to their advantage?
+- Who landed the most memorable, quotable lines?
- Who had more impactful comebacks?
+- Who better adapted to and countered their opponent's arguments?
- Who seemed to gain the upper hand by the end?
+- Who controlled the flow and set the agenda?
 - Quality of arguments, not just who was meaner or nicer
 - Who left the stronger final impression?
 - Who controlled the flow of the argument?
-Be DECISIVE. Even if it's close, pick whoever had even a slight edge. Only call a draw if they were TRULY perfectly matched with absolutely no way to differentiate them.
+RHETORICAL IMPACT:
 - Who used language more effectively (wit, irony, wordplay, emotional appeal)?
 - Who made their opponent repeat themselves or visibly stumble?
 - Who had the stronger opening AND closing statements?
 PERSONA STRENGTHS (equal value — neither style is inherently better):
 - Hatsune Miku's weapons: earnest conviction, moral clarity, emotional sincerity, resilience under attack
 - Evil Miku's weapons: psychological manipulation, brutal honesty, cutting observations, strategic cruelty
 PSYCHOLOGICAL DOMINANCE:
 - Who got inside whose head?
 - Who seemed more rattled by the end?
 - Who dictated the emotional temperature?
 Be DECISIVE. Even if it's close, pick whoever showed superior arguing. Only call a draw if they were TRULY perfectly matched with absolutely no way to differentiate them.
 Respond with ONLY ONE of these exact options on the first line:
 - "Hatsune Miku" if Regular Miku won
 - "Evil Miku" if Evil Miku won
 - "Draw" ONLY if absolutely impossible to choose (this should be very rare)
-After your choice, add 1-2 sentences explaining your reasoning and what gave them the edge."""
+After your choice, add 2-3 sentences explaining your reasoning — cite specific moments from the argument and what gave the winner their edge."""
 async def judge_argument_winner(conversation_log: list, guild_id: int) -> tuple[str, str]:
@@ -686,9 +904,12 @@ async def judge_argument_winner(conversation_log: list, guild_id: int) -> tuple[
    """
    from utils.llm import query_llama
-    arbiter_prompt = get_arbiter_prompt(conversation_log)
+    # Generate stats summary for the arbiter
    stats_summary = get_argument_stats_summary(conversation_log)
-    # Use the neutral model (regular TEXT_MODEL, not evil)
+    arbiter_prompt = get_arbiter_prompt(conversation_log, stats_summary)
    # Use the uncensored darkidol model as arbiter to avoid safety-alignment bias
    # toward kindness. This model judges debate effectiveness without moral preference.
    # Don't use conversation history - judge based on prompt alone
    try:
        judgment = await query_llama(
@@ -696,7 +917,8 @@ async def judge_argument_winner(conversation_log: list, guild_id: int) -> tuple[
            user_id=f"bipolar_arbiter_{guild_id}",
            guild_id=guild_id,
            response_type="autonomous_general",
-            model=globals.TEXT_MODEL  # Use neutral model
+            model=globals.EVIL_TEXT_MODEL,  # Uncensored model — no kindness bias
            force_evil_context=False  # Explicitly neutral context
        )
        if not judgment or judgment.startswith("Error"):
@@ -887,9 +1109,12 @@ async def run_argument(channel: discord.TextChannel, client, trigger_context: st
    conversation_log = []
    try:
        # Pick a dynamic argument topic to give this argument a unique framing
        argument_topic = pick_argument_topic(channel_id)
        # If no starting message, generate the initial interrupting message
        if last_message is None:
-            init_prompt = get_argument_start_prompt(initiator, trigger_context)
+            init_prompt = get_argument_start_prompt(initiator, trigger_context, argument_topic)
            # Use force_evil_context to avoid race condition with globals.EVIL_MODE
            initial_message = await query_llama(
@@ -1045,11 +1270,18 @@ async def run_argument(channel: discord.TextChannel, client, trigger_context: st
            # Get current speaker
            current_speaker = globals.BIPOLAR_ARGUMENT_IN_PROGRESS.get(channel_id, {}).get("current_speaker", "evil")
            # Build argument history from the last 6 exchanges so each persona
            # knows what's already been said and doesn't repeat themselves
            history_entries = conversation_log[-6:] if len(conversation_log) > 1 else []
            arg_history = "\n".join(
                f"{entry['speaker']}: {entry['message']}" for entry in history_entries
            ) if history_entries else ""
            # Generate response with context about what the other said
            if current_speaker == "evil":
-                response_prompt = get_evil_argument_prompt(last_message, is_first_response=is_first_response)
+                response_prompt = get_evil_argument_prompt(last_message, is_first_response=is_first_response, argument_history=arg_history)
            else:
-                response_prompt = get_miku_argument_prompt(last_message, is_first_response=is_first_response)
+                response_prompt = get_miku_argument_prompt(last_message, is_first_response=is_first_response, argument_history=arg_history)
            # Use force_evil_context to avoid race condition with globals.EVIL_MODE
            response = await query_llama(