Fix vision pipeline: ffmpeg removal by autoremove, increase vision timeout, reduce frame count, add Discord activity awareness

- bot/Dockerfile: Add ffmpeg to reinstall line after apt-get autoremove (autoremove was sweeping up ffmpeg as 'no longer needed' after playwright install) - bot/utils/image_handling.py: Increase video analysis timeout 120s→300s, 6→3 for Tenor GIFs (GTX 1660 VRAM constraint) - bot/utils/activities.py: Add _activity_changed_at timestamp tracking, get_current_activity_label() and get_current_activity_fresh() with 30-min decay - bot/utils/cat_client.py: Pass current Discord activity to Cheshire Cat pipeline - bot/utils/llm.py: Inject current Discord activity into system prompt - cat-plugins/*: Forward Discord activity through working_memory to personality plugins - bot/persona/*/preamble.txt: Add Discord status usage guidelines for character prompts - llama-swap-rocm-config.yaml: Add qwen3.5 model entry for ComfyUI prompt generation - AGENTS.md: New project documentation file
2026-05-27 01:18:12 +03:00
parent d333c61c8f
commit 9d2c14fa0b
12 changed files with 168 additions and 6 deletions
--- a/bot/utils/activities.py
+++ b/bot/utils/activities.py
@@ -71,6 +71,7 @@ MANUAL_OVERRIDE_DURATION = 1800  # 30 minutes

 # ── Current activity tracking ──
 _current_activity = None  # dict: {type, name, state, url} or None
+_activity_changed_at = 0.0  # Unix timestamp of last activity change; 0 = never set

 # Cache: (data_dict, file_mtime)
 _activities_cache = None
@@ -307,10 +308,48 @@ def get_current_activity():


 def _set_current_activity(activity_dict):
-    """Update the tracked current activity. Thread-safe."""
-    global _current_activity
+    """Update the tracked current activity. Thread-safe.
+    
+    Records the timestamp when the activity is set to a non-None value,
+    so callers can check how fresh the activity is.
+    """
+    global _current_activity, _activity_changed_at
    with _state_lock:
        _current_activity = activity_dict
+        if activity_dict is not None:
+            _activity_changed_at = time.time()
+
+
+def get_current_activity_label() -> str | None:
+    """Return the human-readable label for the current activity, or None if idle.
+    
+    Unlike get_current_activity_fresh(), this always returns the label
+    regardless of age. Useful for the Web UI and API endpoints.
+    """
+    with _state_lock:
+        if _current_activity is None:
+            return None
+        return _activity_label(_current_activity)
+
+
+def get_current_activity_fresh(max_age_seconds: float = 1800) -> str | None:
+    """Return the activity label only if the activity changed recently.
+    
+    Args:
+        max_age_seconds: Maximum age in seconds (default 30 minutes).
+        
+    Returns:
+        Human-readable activity label (e.g. "Playing osu!") if the activity
+        was set within max_age_seconds, or None if idle or too old.
+    """
+    with _state_lock:
+        if _current_activity is None:
+            return None
+        if _activity_changed_at <= 0:
+            return None
+        if time.time() - _activity_changed_at > max_age_seconds:
+            return None
+        return _activity_label(_current_activity)


 # ══════════════════════════════════════════════════════════════════════════════
--- a/bot/utils/cat_client.py
+++ b/bot/utils/cat_client.py
@@ -20,6 +20,7 @@ from typing import Optional, Dict, Any, List

 import globals
 from utils.logger import get_logger
+from utils.activities import get_current_activity_fresh

 logger = get_logger('llm')  # Use existing 'llm' logger component

@@ -161,6 +162,10 @@ class CatAdapter:
        # Pass media type so discord_bridge can add MEDIA NOTE to the prompt
        if media_type:
            payload["discord_media_type"] = media_type
+        # Pass current Discord activity if it changed recently (30-min decay window)
+        activity_label = get_current_activity_fresh()
+        if activity_label:
+            payload["discord_activity"] = activity_label

        try:
            # Build WebSocket URL from HTTP base URL
--- a/bot/utils/image_handling.py
+++ b/bot/utils/image_handling.py
@@ -158,7 +158,7 @@ async def convert_gif_to_mp4(gif_bytes):
        return None


-async def extract_video_frames(video_bytes, num_frames=4):
+async def extract_video_frames(video_bytes, num_frames=6):
    """
    Extract frames from a video or GIF for analysis.
    Returns a list of base64-encoded frames.
@@ -384,7 +384,7 @@ async def analyze_video_with_vision(video_frames, media_type="video", user_promp
            vision_url = get_vision_gpu_url()
            logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
            
-            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
+            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=300)) as response:
                if response.status == 200:
                    data = await response.json()
                    result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
--- a/bot/utils/llm.py
+++ b/bot/utils/llm.py
@@ -13,6 +13,7 @@ from utils.moods import load_mood_description
 from utils.conversation_history import conversation_history
 from utils.logger import get_logger
 from utils.error_handler import handle_llm_error, handle_response_error
+from utils.activities import get_current_activity_fresh

 logger = get_logger('llm')

@@ -374,6 +375,10 @@ VARIATION RULES (必須のバリエーションルール):
 {character_name} is currently feeling: {current_mood}
 Please respond in a way that reflects this emotional tone.{pfp_context}"""

+    # Inject current Discord activity if it changed recently (30-min decay window)
+    activity_label = get_current_activity_fresh()
+    if activity_label:
+        full_system_prompt += f"\nHer Discord status: {activity_label}"

    # Add media type awareness if provided
    if media_type: