Fix vision pipeline: ffmpeg removal by autoremove, increase vision timeout, reduce frame count, add Discord activity awareness

- bot/Dockerfile: Add ffmpeg to reinstall line after apt-get autoremove
  (autoremove was sweeping up ffmpeg as 'no longer needed' after playwright install)
- bot/utils/image_handling.py: Increase video analysis timeout 120s→300s, 6→3 for Tenor GIFs (GTX 1660 VRAM constraint)
- bot/utils/activities.py: Add _activity_changed_at timestamp tracking,
  get_current_activity_label() and get_current_activity_fresh() with 30-min decay
- bot/utils/cat_client.py: Pass current Discord activity to Cheshire Cat pipeline
- bot/utils/llm.py: Inject current Discord activity into system prompt
- cat-plugins/*: Forward Discord activity through working_memory to personality plugins
- bot/persona/*/preamble.txt: Add Discord status usage guidelines for character prompts
- llama-swap-rocm-config.yaml: Add qwen3.5 model entry for ComfyUI prompt generation
- AGENTS.md: New project documentation file
This commit is contained in:
2026-05-27 01:18:12 +03:00
parent d333c61c8f
commit 9d2c14fa0b
12 changed files with 168 additions and 6 deletions

View File

@@ -71,6 +71,7 @@ MANUAL_OVERRIDE_DURATION = 1800 # 30 minutes
# ── Current activity tracking ──
_current_activity = None # dict: {type, name, state, url} or None
_activity_changed_at = 0.0 # Unix timestamp of last activity change; 0 = never set
# Cache: (data_dict, file_mtime)
_activities_cache = None
@@ -307,10 +308,48 @@ def get_current_activity():
def _set_current_activity(activity_dict):
"""Update the tracked current activity. Thread-safe."""
global _current_activity
"""Update the tracked current activity. Thread-safe.
Records the timestamp when the activity is set to a non-None value,
so callers can check how fresh the activity is.
"""
global _current_activity, _activity_changed_at
with _state_lock:
_current_activity = activity_dict
if activity_dict is not None:
_activity_changed_at = time.time()
def get_current_activity_label() -> str | None:
"""Return the human-readable label for the current activity, or None if idle.
Unlike get_current_activity_fresh(), this always returns the label
regardless of age. Useful for the Web UI and API endpoints.
"""
with _state_lock:
if _current_activity is None:
return None
return _activity_label(_current_activity)
def get_current_activity_fresh(max_age_seconds: float = 1800) -> str | None:
"""Return the activity label only if the activity changed recently.
Args:
max_age_seconds: Maximum age in seconds (default 30 minutes).
Returns:
Human-readable activity label (e.g. "Playing osu!") if the activity
was set within max_age_seconds, or None if idle or too old.
"""
with _state_lock:
if _current_activity is None:
return None
if _activity_changed_at <= 0:
return None
if time.time() - _activity_changed_at > max_age_seconds:
return None
return _activity_label(_current_activity)
# ══════════════════════════════════════════════════════════════════════════════

View File

@@ -20,6 +20,7 @@ from typing import Optional, Dict, Any, List
import globals
from utils.logger import get_logger
from utils.activities import get_current_activity_fresh
logger = get_logger('llm') # Use existing 'llm' logger component
@@ -161,6 +162,10 @@ class CatAdapter:
# Pass media type so discord_bridge can add MEDIA NOTE to the prompt
if media_type:
payload["discord_media_type"] = media_type
# Pass current Discord activity if it changed recently (30-min decay window)
activity_label = get_current_activity_fresh()
if activity_label:
payload["discord_activity"] = activity_label
try:
# Build WebSocket URL from HTTP base URL

View File

@@ -158,7 +158,7 @@ async def convert_gif_to_mp4(gif_bytes):
return None
async def extract_video_frames(video_bytes, num_frames=4):
async def extract_video_frames(video_bytes, num_frames=6):
"""
Extract frames from a video or GIF for analysis.
Returns a list of base64-encoded frames.
@@ -384,7 +384,7 @@ async def analyze_video_with_vision(video_frames, media_type="video", user_promp
vision_url = get_vision_gpu_url()
logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=300)) as response:
if response.status == 200:
data = await response.json()
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")

View File

@@ -13,6 +13,7 @@ from utils.moods import load_mood_description
from utils.conversation_history import conversation_history
from utils.logger import get_logger
from utils.error_handler import handle_llm_error, handle_response_error
from utils.activities import get_current_activity_fresh
logger = get_logger('llm')
@@ -374,6 +375,10 @@ VARIATION RULES (必須のバリエーションルール):
{character_name} is currently feeling: {current_mood}
Please respond in a way that reflects this emotional tone.{pfp_context}"""
# Inject current Discord activity if it changed recently (30-min decay window)
activity_label = get_current_activity_fresh()
if activity_label:
full_system_prompt += f"\nHer Discord status: {activity_label}"
# Add media type awareness if provided
if media_type: