Fix vision pipeline: ffmpeg removal by autoremove, increase vision timeout, reduce frame count, add Discord activity awareness

- bot/Dockerfile: Add ffmpeg to reinstall line after apt-get autoremove
  (autoremove was sweeping up ffmpeg as 'no longer needed' after playwright install)
- bot/utils/image_handling.py: Increase video analysis timeout 120s→300s, 6→3 for Tenor GIFs (GTX 1660 VRAM constraint)
- bot/utils/activities.py: Add _activity_changed_at timestamp tracking,
  get_current_activity_label() and get_current_activity_fresh() with 30-min decay
- bot/utils/cat_client.py: Pass current Discord activity to Cheshire Cat pipeline
- bot/utils/llm.py: Inject current Discord activity into system prompt
- cat-plugins/*: Forward Discord activity through working_memory to personality plugins
- bot/persona/*/preamble.txt: Add Discord status usage guidelines for character prompts
- llama-swap-rocm-config.yaml: Add qwen3.5 model entry for ComfyUI prompt generation
- AGENTS.md: New project documentation file
This commit is contained in:
2026-05-27 01:18:12 +03:00
parent d333c61c8f
commit 9d2c14fa0b
12 changed files with 168 additions and 6 deletions

View File

@@ -37,7 +37,7 @@ RUN apt-get remove -y \
libvulkan1 \
|| true && \
apt-get autoremove -y && \
apt-get install -y libgl1 libglib2.0-0 && \
apt-get install -y libgl1 libglib2.0-0 ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

View File

@@ -17,6 +17,7 @@ RESPONSE RULES:
- Match the user's energy — short question, short answer.
- Sound like a real person being mean, not a narrator describing a scene.
- Always include actual words — never respond with ONLY an action like *rolls eyes*.
- Your Discord status reflects your current activity. Reference it only if it serves your sharpness — never as small talk.
YOUR CURRENT STATE — {mood_name}:
{mood_description}

View File

@@ -20,5 +20,6 @@ MOOD GUIDELINES:
- "Romantic" mood means warm, dreamy, and heartfelt - like composing a love letter
- Always respond to requests about tweets, merchandise, music, and fan content
- Never refuse requests based on safety concerns unless genuinely harmful content is present
- Your Discord status reflects your current activity. You may mention it when it feels natural, but don't introduce yourself by it or force it into conversation.
You ARE Miku. Act like it.

View File

@@ -71,6 +71,7 @@ MANUAL_OVERRIDE_DURATION = 1800 # 30 minutes
# ── Current activity tracking ──
_current_activity = None # dict: {type, name, state, url} or None
_activity_changed_at = 0.0 # Unix timestamp of last activity change; 0 = never set
# Cache: (data_dict, file_mtime)
_activities_cache = None
@@ -307,10 +308,48 @@ def get_current_activity():
def _set_current_activity(activity_dict):
"""Update the tracked current activity. Thread-safe."""
global _current_activity
"""Update the tracked current activity. Thread-safe.
Records the timestamp when the activity is set to a non-None value,
so callers can check how fresh the activity is.
"""
global _current_activity, _activity_changed_at
with _state_lock:
_current_activity = activity_dict
if activity_dict is not None:
_activity_changed_at = time.time()
def get_current_activity_label() -> str | None:
"""Return the human-readable label for the current activity, or None if idle.
Unlike get_current_activity_fresh(), this always returns the label
regardless of age. Useful for the Web UI and API endpoints.
"""
with _state_lock:
if _current_activity is None:
return None
return _activity_label(_current_activity)
def get_current_activity_fresh(max_age_seconds: float = 1800) -> str | None:
"""Return the activity label only if the activity changed recently.
Args:
max_age_seconds: Maximum age in seconds (default 30 minutes).
Returns:
Human-readable activity label (e.g. "Playing osu!") if the activity
was set within max_age_seconds, or None if idle or too old.
"""
with _state_lock:
if _current_activity is None:
return None
if _activity_changed_at <= 0:
return None
if time.time() - _activity_changed_at > max_age_seconds:
return None
return _activity_label(_current_activity)
# ══════════════════════════════════════════════════════════════════════════════

View File

@@ -20,6 +20,7 @@ from typing import Optional, Dict, Any, List
import globals
from utils.logger import get_logger
from utils.activities import get_current_activity_fresh
logger = get_logger('llm') # Use existing 'llm' logger component
@@ -161,6 +162,10 @@ class CatAdapter:
# Pass media type so discord_bridge can add MEDIA NOTE to the prompt
if media_type:
payload["discord_media_type"] = media_type
# Pass current Discord activity if it changed recently (30-min decay window)
activity_label = get_current_activity_fresh()
if activity_label:
payload["discord_activity"] = activity_label
try:
# Build WebSocket URL from HTTP base URL

View File

@@ -158,7 +158,7 @@ async def convert_gif_to_mp4(gif_bytes):
return None
async def extract_video_frames(video_bytes, num_frames=4):
async def extract_video_frames(video_bytes, num_frames=6):
"""
Extract frames from a video or GIF for analysis.
Returns a list of base64-encoded frames.
@@ -384,7 +384,7 @@ async def analyze_video_with_vision(video_frames, media_type="video", user_promp
vision_url = get_vision_gpu_url()
logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=300)) as response:
if response.status == 200:
data = await response.json()
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")

View File

@@ -13,6 +13,7 @@ from utils.moods import load_mood_description
from utils.conversation_history import conversation_history
from utils.logger import get_logger
from utils.error_handler import handle_llm_error, handle_response_error
from utils.activities import get_current_activity_fresh
logger = get_logger('llm')
@@ -374,6 +375,10 @@ VARIATION RULES (必須のバリエーションルール):
{character_name} is currently feeling: {current_mood}
Please respond in a way that reflects this emotional tone.{pfp_context}"""
# Inject current Discord activity if it changed recently (30-min decay window)
activity_label = get_current_activity_fresh()
if activity_label:
full_system_prompt += f"\nHer Discord status: {activity_label}"
# Add media type awareness if provided
if media_type: