From fc4674bb13138196cfcb9e50fd40a42dfc2ab870 Mon Sep 17 00:00:00 2001 From: koko210Serve Date: Wed, 15 Apr 2026 12:19:37 +0300 Subject: [PATCH] refactor: extract media processing from bot.py into image_handling.py (Phase D Step 19) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create process_media_in_message() in utils/image_handling.py that handles all 4 media types: image attachments, video/GIF attachments, Tenor GIF embeds, and rich embeds - DRY the send→log→bipolar tail pattern (5x repeated) into _send_log_bipolar() helper - Unify rich/article/link embed handling to use rephrase_as_miku() instead of inline Cat→LLM routing, fixing a mood-resolution bug (was using globals.DM_MOOD for servers) - Add 'rich_embed' media_type to rephrase_as_miku() prefix switch - Remove 3 inline 'import base64' from bot.py (already module-level in image_handling.py) - bot.py: 986 → 623 lines (-363) - image_handling.py: 559 → 881 lines (+322) - All 170 tests pass (21 config/state + 149 route split) --- bot/bot.py | 379 +----------------------------------- bot/utils/image_handling.py | 339 +++++++++++++++++++++++++++++++- 2 files changed, 339 insertions(+), 379 deletions(-) diff --git a/bot/bot.py b/bot/bot.py index c3459bd..341a77e 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -19,15 +19,7 @@ from utils.scheduled import ( send_monday_video ) from utils.image_handling import ( - download_and_encode_image, - download_and_encode_media, - extract_video_frames, - analyze_image_with_qwen, - analyze_video_with_vision, - rephrase_as_miku, - extract_tenor_gif_url, - convert_gif_to_mp4, - extract_embed_content + process_media_in_message, ) from utils.core import ( is_miku_addressed, @@ -266,344 +258,11 @@ async def on_message(message): ) return - # If message has an image, video, or GIF attachment - if message.attachments: - for attachment in message.attachments: - # Handle images - if any(attachment.filename.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp"]): - base64_img = await download_and_encode_image(attachment.url) - if not base64_img: - await message.channel.send("I couldn't load the image, sorry!") - return - - # Analyze image (objective description) - qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt) - if not qwen_description or not qwen_description.strip(): - await message.channel.send("I couldn't see that image clearly, sorry! Try sending it again.") - return - # For DMs, pass None as guild_id to use DM mood - guild_id = message.guild.id if message.guild else None - miku_reply = await rephrase_as_miku( - qwen_description, - prompt, - guild_id=guild_id, - user_id=str(message.author.id), - author_name=message.author.display_name, - media_type="image" - ) - - if is_dm: - logger.info(f"💌 DM image response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})") - else: - logger.info(f"💬 Server image response to {message.author.display_name} in {message.guild.name} (using server mood)") - - response_message = await message.channel.send(miku_reply) - - # Log the bot's DM response - if is_dm: - dm_logger.log_user_message(message.author, response_message, is_bot_message=True) - - # For server messages, check if opposite persona should interject - if not is_dm and globals.BIPOLAR_MODE: - try: - from utils.persona_dialogue import check_for_interjection - current_persona = "evil" if globals.EVIL_MODE else "miku" - create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check") - except Exception as e: - logger.error(f"Error checking for persona interjection: {e}") - - return - - # Handle videos and GIFs - elif any(attachment.filename.lower().endswith(ext) for ext in [".gif", ".mp4", ".webm", ".mov"]): - # Determine media type - is_gif = attachment.filename.lower().endswith('.gif') - media_type = "gif" if is_gif else "video" - - logger.debug(f"🎬 Processing {media_type}: {attachment.filename}") - - # Download the media - media_bytes_b64 = await download_and_encode_media(attachment.url) - if not media_bytes_b64: - await message.channel.send(f"I couldn't load the {media_type}, sorry!") - return - - # Decode back to bytes for frame extraction - import base64 - media_bytes = base64.b64decode(media_bytes_b64) - - # If it's a GIF, convert to MP4 for better processing - if is_gif: - logger.debug(f"🔄 Converting GIF to MP4 for processing...") - mp4_bytes = await convert_gif_to_mp4(media_bytes) - if mp4_bytes: - media_bytes = mp4_bytes - logger.info(f"✅ GIF converted to MP4") - else: - logger.warning(f"GIF conversion failed, trying direct processing") - - # Extract frames - frames = await extract_video_frames(media_bytes, num_frames=6) - - if not frames: - await message.channel.send(f"I couldn't extract frames from that {media_type}, sorry!") - return - - logger.debug(f"📹 Extracted {len(frames)} frames from {attachment.filename}") - - # Analyze the video/GIF with appropriate media type - video_description = await analyze_video_with_vision(frames, media_type=media_type, user_prompt=prompt) - if not video_description or not video_description.strip(): - await message.channel.send(f"I couldn't analyze that {media_type} clearly, sorry! Try sending it again.") - return - # For DMs, pass None as guild_id to use DM mood - guild_id = message.guild.id if message.guild else None - miku_reply = await rephrase_as_miku( - video_description, - prompt, - guild_id=guild_id, - user_id=str(message.author.id), - author_name=message.author.display_name, - media_type=media_type - ) - - if is_dm: - logger.info(f"💌 DM {media_type} response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})") - else: - logger.info(f"💬 Server video response to {message.author.display_name} in {message.guild.name} (using server mood)") - - response_message = await message.channel.send(miku_reply) - - # Log the bot's DM response - if is_dm: - dm_logger.log_user_message(message.author, response_message, is_bot_message=True) - - # For server messages, check if opposite persona should interject - if not is_dm and globals.BIPOLAR_MODE: - try: - from utils.persona_dialogue import check_for_interjection - current_persona = "evil" if globals.EVIL_MODE else "miku" - create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check") - except Exception as e: - logger.error(f"Error checking for persona interjection: {e}") - - return - - # Check for embeds (articles, images, videos, GIFs, etc.) - if message.embeds: - for embed in message.embeds: - # Handle Tenor GIF embeds specially (Discord uses these for /gif command) - if embed.type == 'gifv' and embed.url and 'tenor.com' in embed.url: - logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}") - - # Extract the actual GIF URL from Tenor - gif_url = await extract_tenor_gif_url(embed.url) - if not gif_url: - # Try using the embed's video or image URL as fallback - if hasattr(embed, 'video') and embed.video: - gif_url = embed.video.url - elif hasattr(embed, 'thumbnail') and embed.thumbnail: - gif_url = embed.thumbnail.url - - if not gif_url: - logger.warning(f"Could not extract GIF URL from Tenor embed") - continue - - # Download the GIF - media_bytes_b64 = await download_and_encode_media(gif_url) - if not media_bytes_b64: - await message.channel.send("I couldn't load that Tenor GIF, sorry!") - return - - # Decode to bytes - import base64 - media_bytes = base64.b64decode(media_bytes_b64) - - # Convert GIF to MP4 - logger.debug(f"Converting Tenor GIF to MP4 for processing...") - mp4_bytes = await convert_gif_to_mp4(media_bytes) - if not mp4_bytes: - logger.warning(f"GIF conversion failed, trying direct frame extraction") - mp4_bytes = media_bytes - else: - logger.debug(f"Tenor GIF converted to MP4") - - # Extract frames - frames = await extract_video_frames(mp4_bytes, num_frames=6) - - if not frames: - await message.channel.send("I couldn't extract frames from that GIF, sorry!") - return - - logger.info(f"📹 Extracted {len(frames)} frames from Tenor GIF") - - # Analyze the GIF with tenor_gif media type - video_description = await analyze_video_with_vision(frames, media_type="tenor_gif", user_prompt=prompt) - if not video_description or not video_description.strip(): - await message.channel.send("I couldn't analyze that GIF clearly, sorry! Try sending it again.") - return - guild_id = message.guild.id if message.guild else None - miku_reply = await rephrase_as_miku( - video_description, - prompt, - guild_id=guild_id, - user_id=str(message.author.id), - author_name=message.author.display_name, - media_type="tenor_gif" - ) - - if is_dm: - logger.info(f"💌 DM Tenor GIF response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})") - else: - logger.info(f"💬 Server Tenor GIF response to {message.author.display_name} in {message.guild.name} (using server mood)") - - response_message = await message.channel.send(miku_reply) - - # Log the bot's DM response - if is_dm: - dm_logger.log_user_message(message.author, response_message, is_bot_message=True) - - # For server messages, check if opposite persona should interject - if not is_dm and globals.BIPOLAR_MODE: - try: - from utils.persona_dialogue import check_for_interjection - current_persona = "evil" if globals.EVIL_MODE else "miku" - create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check") - except Exception as e: - logger.error(f"Error checking for persona interjection: {e}") - - return - - # Handle other types of embeds (rich, article, image, video, link) - elif embed.type in ['rich', 'article', 'image', 'video', 'link']: - logger.error(f"Processing {embed.type} embed") - - # Extract content from embed - embed_content = await extract_embed_content(embed) - - if not embed_content['has_content']: - logger.warning(f"Embed has no extractable content, skipping") - continue - - # Build context string with embed text - embed_context_parts = [] - if embed_content['text']: - embed_context_parts.append(f"[Embedded content: {embed_content['text'][:500]}{'...' if len(embed_content['text']) > 500 else ''}]") - - # Process images from embed - if embed_content['images']: - for img_url in embed_content['images']: - logger.error(f"Processing image from embed: {img_url}") - try: - base64_img = await download_and_encode_image(img_url) - if base64_img: - logger.info(f"Image downloaded, analyzing with vision model...") - # Analyze image - qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt) - truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description - logger.error(f"Vision analysis result: {truncated}") - if qwen_description and qwen_description.strip(): - embed_context_parts.append(f"[Embedded image shows: {qwen_description}]") - else: - logger.error(f"Failed to download image from embed") - except Exception as e: - logger.error(f"Error processing embedded image: {e}") - import traceback - traceback.print_exc() - - # Process videos from embed - if embed_content['videos']: - for video_url in embed_content['videos']: - logger.info(f"🎬 Processing video from embed: {video_url}") - try: - media_bytes_b64 = await download_and_encode_media(video_url) - if media_bytes_b64: - import base64 - media_bytes = base64.b64decode(media_bytes_b64) - frames = await extract_video_frames(media_bytes, num_frames=6) - if frames: - logger.info(f"📹 Extracted {len(frames)} frames, analyzing with vision model...") - video_description = await analyze_video_with_vision(frames, media_type="video", user_prompt=prompt) - logger.info(f"Video analysis result: {video_description[:100]}...") - if video_description and video_description.strip(): - embed_context_parts.append(f"[Embedded video shows: {video_description}]") - else: - logger.error(f"Failed to extract frames from video") - else: - logger.error(f"Failed to download video from embed") - except Exception as e: - logger.error(f"Error processing embedded video: {e}") - import traceback - traceback.print_exc() - - # Combine embed context with user prompt - if embed_context_parts: - full_context = '\n'.join(embed_context_parts) - enhanced_prompt = f"{full_context}\n\nUser message: {prompt}" if prompt else full_context - - # Get Miku's response - guild_id = message.guild.id if message.guild else None - response_type = "dm_response" if is_dm else "server_response" - author_name = message.author.display_name - - # Phase 3: Try Cat pipeline first for embed responses too - response = None - if globals.USE_CHESHIRE_CAT: - try: - from utils.cat_client import cat_adapter - cat_result = await cat_adapter.query( - text=enhanced_prompt, - user_id=str(message.author.id), - guild_id=str(guild_id) if guild_id else None, - author_name=author_name, - mood=globals.DM_MOOD, - response_type=response_type, - ) - if cat_result: - response, cat_full_prompt = cat_result - logger.info(f"🐱 Cat embed response for {author_name}") - import datetime - globals.LAST_CAT_INTERACTION = { - "full_prompt": cat_full_prompt, - "response": response[:500] if response else "", - "user": author_name, - "mood": globals.DM_MOOD, - "timestamp": datetime.datetime.now().isoformat(), - } - except Exception as e: - logger.warning(f"🐱 Cat embed error, fallback: {e}") - response = None - - if not response: - response = await query_llama( - enhanced_prompt, - user_id=str(message.author.id), - guild_id=guild_id, - response_type=response_type, - author_name=author_name - ) - - if is_dm: - logger.info(f"💌 DM embed response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})") - else: - logger.info(f"💬 Server embed response to {message.author.display_name} in {message.guild.name}") - - response_message = await message.channel.send(response) - - # Log the bot's DM response - if is_dm: - dm_logger.log_user_message(message.author, response_message, is_bot_message=True) - - # For server messages, check if opposite persona should interject - if not is_dm and globals.BIPOLAR_MODE: - try: - from utils.persona_dialogue import check_for_interjection - current_persona = "evil" if globals.EVIL_MODE else "miku" - create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check") - except Exception as e: - logger.error(f"Error checking for persona interjection: {e}") - - return + # Dispatch media processing (images, videos, GIFs, embeds) + # to utils/image_handling.process_media_in_message() + guild_id = message.guild.id if message.guild else None + if await process_media_in_message(message, prompt, is_dm, guild_id): + return # Check if this is an image generation request from utils.image_generation import detect_image_request, handle_image_generation_request @@ -686,30 +345,8 @@ async def on_message(message): author_name=author_name ) - if is_dm: - logger.info(f"💌 DM response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})") - else: - logger.info(f"💬 Server response to {message.author.display_name} in {message.guild.name} (using server mood)") - - response_message = await message.channel.send(response) - - # Log the bot's DM response - if is_dm: - dm_logger.log_user_message(message.author, response_message, is_bot_message=True) - - # For server messages, check if opposite persona should interject (persona dialogue system) - if not is_dm and globals.BIPOLAR_MODE: - logger.debug(f"Attempting to check for interjection (is_dm={is_dm}, BIPOLAR_MODE={globals.BIPOLAR_MODE})") - try: - from utils.persona_dialogue import check_for_interjection - current_persona = "evil" if globals.EVIL_MODE else "miku" - logger.debug(f"Creating interjection check task for persona: {current_persona}") - # Pass the bot's response message for analysis - create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check") - except Exception as e: - logger.error(f"Error checking for persona interjection: {e}") - import traceback - traceback.print_exc() + from utils.image_handling import _send_log_bipolar + response_message = await _send_log_bipolar(message, response, is_dm) # For server messages, do server-specific mood detection if not is_dm and message.guild: diff --git a/bot/utils/image_handling.py b/bot/utils/image_handling.py index cde61dc..ba4754b 100644 --- a/bot/utils/image_handling.py +++ b/bot/utils/image_handling.py @@ -418,14 +418,13 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No # Format the user's message to include vision context with media type # This will be saved to history automatically by query_llama - if media_type == "gif": - media_prefix = "Looking at a GIF" - elif media_type == "tenor_gif": - media_prefix = "Looking at a Tenor GIF" - elif media_type == "video": - media_prefix = "Looking at a video" - else: # image - media_prefix = "Looking at an image" + _MEDIA_PREFIXES = { + "gif": "Looking at a GIF", + "tenor_gif": "Looking at a Tenor GIF", + "video": "Looking at a video", + "rich_embed": "Looking at embedded content", + } + media_prefix = _MEDIA_PREFIXES.get(media_type, "Looking at an image") if user_prompt: # Include media type, vision description, and user's text @@ -503,6 +502,330 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No analyze_image_with_qwen = analyze_image_with_vision +# --------------------------------------------------------------------------- +# Shared tail helper — send response, log DM, check bipolar interjection +# --------------------------------------------------------------------------- + +async def _send_log_bipolar(message, reply_text, is_dm, *, media_label=""): + """ + Common tail shared by every media handler *and* the text-fallback path in + bot.py. Sends *reply_text* to the channel, logs the reply in the DM + ledger when appropriate, and fires a bipolar-interjection check for server + messages. + + Returns the sent ``discord.Message`` so callers can use it if needed. + """ + from utils.dm_logger import dm_logger + from utils.task_tracker import create_tracked_task + + label = f" {media_label}" if media_label else "" + if is_dm: + logger.info( + f"💌 DM{label} response to {message.author.display_name} " + f"(using DM mood: {globals.DM_MOOD})" + ) + else: + guild_name = message.guild.name if message.guild else "unknown" + logger.info( + f"💬 Server{label} response to {message.author.display_name} " + f"in {guild_name} (using server mood)" + ) + + response_message = await message.channel.send(reply_text) + + # Log bot's reply in the DM ledger + if is_dm: + dm_logger.log_user_message(message.author, response_message, is_bot_message=True) + + # Bipolar-mode interjection check (server messages only) + if not is_dm and globals.BIPOLAR_MODE: + try: + from utils.persona_dialogue import check_for_interjection + current_persona = "evil" if globals.EVIL_MODE else "miku" + create_tracked_task( + check_for_interjection(response_message, current_persona), + task_name="interjection_check", + ) + except Exception as e: + logger.error(f"Error checking for persona interjection: {e}") + + return response_message + + +# --------------------------------------------------------------------------- +# High-level media dispatcher — called from bot.py on_message() +# --------------------------------------------------------------------------- + +async def process_media_in_message(message, prompt, is_dm, guild_id) -> bool: + """ + Inspect *message* for image/video/GIF attachments and embeds. + + If any media is found and successfully processed, a reply is sent to the + channel and this function returns ``True``. Otherwise it returns + ``False`` so the caller can fall through to text-only handling. + """ + author_id = str(message.author.id) + author_name = message.author.display_name + + # ---- 1. Image attachments (.jpg, .jpeg, .png, .webp) ----------------- + if message.attachments: + for attachment in message.attachments: + lower = attachment.filename.lower() + + if any(lower.endswith(ext) for ext in (".jpg", ".jpeg", ".png", ".webp")): + base64_img = await download_and_encode_image(attachment.url) + if not base64_img: + await message.channel.send("I couldn't load the image, sorry!") + return True + + qwen_description = await analyze_image_with_vision(base64_img, user_prompt=prompt) + if not qwen_description or not qwen_description.strip(): + await message.channel.send( + "I couldn't see that image clearly, sorry! Try sending it again." + ) + return True + + miku_reply = await rephrase_as_miku( + qwen_description, prompt, + guild_id=guild_id, + user_id=author_id, + author_name=author_name, + media_type="image", + ) + await _send_log_bipolar(message, miku_reply, is_dm, media_label="image") + return True + + # ---- 2. Video / GIF attachments (.gif, .mp4, .webm, .mov) ---- + elif any(lower.endswith(ext) for ext in (".gif", ".mp4", ".webm", ".mov")): + is_gif = lower.endswith(".gif") + media_type = "gif" if is_gif else "video" + + logger.debug(f"🎬 Processing {media_type}: {attachment.filename}") + + media_bytes_b64 = await download_and_encode_media(attachment.url) + if not media_bytes_b64: + await message.channel.send(f"I couldn't load the {media_type}, sorry!") + return True + + media_bytes = base64.b64decode(media_bytes_b64) + + if is_gif: + logger.debug("🔄 Converting GIF to MP4 for processing...") + mp4_bytes = await convert_gif_to_mp4(media_bytes) + if mp4_bytes: + media_bytes = mp4_bytes + logger.info("✅ GIF converted to MP4") + else: + logger.warning("GIF conversion failed, trying direct processing") + + frames = await extract_video_frames(media_bytes, num_frames=6) + if not frames: + await message.channel.send( + f"I couldn't extract frames from that {media_type}, sorry!" + ) + return True + + logger.debug( + f"📹 Extracted {len(frames)} frames from {attachment.filename}" + ) + + video_description = await analyze_video_with_vision( + frames, media_type=media_type, user_prompt=prompt, + ) + if not video_description or not video_description.strip(): + await message.channel.send( + f"I couldn't analyze that {media_type} clearly, sorry! " + "Try sending it again." + ) + return True + + miku_reply = await rephrase_as_miku( + video_description, prompt, + guild_id=guild_id, + user_id=author_id, + author_name=author_name, + media_type=media_type, + ) + await _send_log_bipolar(message, miku_reply, is_dm, media_label=media_type) + return True + + # ---- 3. Tenor GIF embeds (gifv from tenor.com) ----------------------- + if message.embeds: + for embed in message.embeds: + if embed.type == "gifv" and embed.url and "tenor.com" in embed.url: + logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}") + + gif_url = await extract_tenor_gif_url(embed.url) + if not gif_url: + if hasattr(embed, "video") and embed.video: + gif_url = embed.video.url + elif hasattr(embed, "thumbnail") and embed.thumbnail: + gif_url = embed.thumbnail.url + + if not gif_url: + logger.warning("Could not extract GIF URL from Tenor embed") + continue + + media_bytes_b64 = await download_and_encode_media(gif_url) + if not media_bytes_b64: + await message.channel.send( + "I couldn't load that Tenor GIF, sorry!" + ) + return True + + media_bytes = base64.b64decode(media_bytes_b64) + + logger.debug("Converting Tenor GIF to MP4 for processing...") + mp4_bytes = await convert_gif_to_mp4(media_bytes) + if not mp4_bytes: + logger.warning( + "GIF conversion failed, trying direct frame extraction" + ) + mp4_bytes = media_bytes + else: + logger.debug("Tenor GIF converted to MP4") + + frames = await extract_video_frames(mp4_bytes, num_frames=6) + if not frames: + await message.channel.send( + "I couldn't extract frames from that GIF, sorry!" + ) + return True + + logger.info( + f"📹 Extracted {len(frames)} frames from Tenor GIF" + ) + + video_description = await analyze_video_with_vision( + frames, media_type="tenor_gif", user_prompt=prompt, + ) + if not video_description or not video_description.strip(): + await message.channel.send( + "I couldn't analyze that GIF clearly, sorry! " + "Try sending it again." + ) + return True + + miku_reply = await rephrase_as_miku( + video_description, prompt, + guild_id=guild_id, + user_id=author_id, + author_name=author_name, + media_type="tenor_gif", + ) + await _send_log_bipolar( + message, miku_reply, is_dm, media_label="Tenor GIF", + ) + return True + + # ---- 4. Rich / article / image / video / link embeds --------- + elif embed.type in ("rich", "article", "image", "video", "link"): + logger.info(f"Processing {embed.type} embed") + + embed_content = await extract_embed_content(embed) + if not embed_content["has_content"]: + logger.warning("Embed has no extractable content, skipping") + continue + + embed_context_parts = [] + if embed_content["text"]: + truncated = embed_content["text"][:500] + if len(embed_content["text"]) > 500: + truncated += "..." + embed_context_parts.append( + f"[Embedded content: {truncated}]" + ) + + # Analyze images found inside the embed + for img_url in embed_content["images"]: + logger.info(f"Processing image from embed: {img_url}") + try: + base64_img = await download_and_encode_image(img_url) + if base64_img: + logger.info( + "Image downloaded, analyzing with vision model..." + ) + qwen_description = await analyze_image_with_vision( + base64_img, user_prompt=prompt, + ) + if qwen_description and qwen_description.strip(): + embed_context_parts.append( + f"[Embedded image shows: {qwen_description}]" + ) + else: + logger.error("Failed to download image from embed") + except Exception as e: + logger.error(f"Error processing embedded image: {e}") + + # Analyze videos found inside the embed + for video_url in embed_content["videos"]: + logger.info( + f"🎬 Processing video from embed: {video_url}" + ) + try: + media_bytes_b64 = await download_and_encode_media( + video_url, + ) + if media_bytes_b64: + media_bytes = base64.b64decode(media_bytes_b64) + frames = await extract_video_frames( + media_bytes, num_frames=6, + ) + if frames: + logger.info( + f"📹 Extracted {len(frames)} frames, " + "analyzing with vision model..." + ) + video_description = ( + await analyze_video_with_vision( + frames, + media_type="video", + user_prompt=prompt, + ) + ) + if ( + video_description + and video_description.strip() + ): + embed_context_parts.append( + f"[Embedded video shows: " + f"{video_description}]" + ) + else: + logger.error( + "Failed to extract frames from video" + ) + else: + logger.error( + "Failed to download video from embed" + ) + except Exception as e: + logger.error( + f"Error processing embedded video: {e}" + ) + + if not embed_context_parts: + continue + + # Build a combined vision description and route through + # rephrase_as_miku (which handles Cat → LLM fallback, + # mood resolution, and LAST_CAT_INTERACTION tracking). + combined_description = "\n".join(embed_context_parts) + miku_reply = await rephrase_as_miku( + combined_description, prompt, + guild_id=guild_id, + user_id=author_id, + author_name=author_name, + media_type="rich_embed", + ) + await _send_log_bipolar( + message, miku_reply, is_dm, media_label="embed", + ) + return True + + return False + + async def extract_embed_content(embed): """ Extract text and media content from a Discord embed.