refactor: extract media processing from bot.py into image_handling.py (Phase D Step 19)

- Create process_media_in_message() in utils/image_handling.py that handles all 4 media types: image attachments, video/GIF attachments, Tenor GIF embeds, and rich embeds - DRY the send→log→bipolar tail pattern (5x repeated) into _send_log_bipolar() helper - Unify rich/article/link embed handling to use rephrase_as_miku() instead of inline Cat→LLM routing, fixing a mood-resolution bug (was using globals.DM_MOOD for servers) - Add 'rich_embed' media_type to rephrase_as_miku() prefix switch - Remove 3 inline 'import base64' from bot.py (already module-level in image_handling.py) - bot.py: 986 → 623 lines (-363) - image_handling.py: 559 → 881 lines (+322) - All 170 tests pass (21 config/state + 149 route split)
2026-04-15 12:19:37 +03:00
parent 979217e7cc
commit fc4674bb13
2 changed files with 339 additions and 379 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -19,15 +19,7 @@ from utils.scheduled import (
    send_monday_video
 )
 from utils.image_handling import (
-    download_and_encode_image,
+    process_media_in_message,
    download_and_encode_media,
    extract_video_frames,
    analyze_image_with_qwen,
    analyze_video_with_vision,
    rephrase_as_miku,
    extract_tenor_gif_url,
    convert_gif_to_mp4,
    extract_embed_content
 )
 from utils.core import (
    is_miku_addressed,
@@ -266,343 +258,10 @@ async def on_message(message):
                )
                return
-            # If message has an image, video, or GIF attachment
+            # Dispatch media processing (images, videos, GIFs, embeds)
-            if message.attachments:
+            # to utils/image_handling.process_media_in_message()
                for attachment in message.attachments:
                    # Handle images
                    if any(attachment.filename.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp"]):
                        base64_img = await download_and_encode_image(attachment.url)
                        if not base64_img:
                            await message.channel.send("I couldn't load the image, sorry!")
                            return
                        # Analyze image (objective description)
                        qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt)
                        if not qwen_description or not qwen_description.strip():
                            await message.channel.send("I couldn't see that image clearly, sorry! Try sending it again.")
                            return
                        # For DMs, pass None as guild_id to use DM mood
            guild_id = message.guild.id if message.guild else None
-                        miku_reply = await rephrase_as_miku(
+            if await process_media_in_message(message, prompt, is_dm, guild_id):
                            qwen_description, 
                            prompt, 
                            guild_id=guild_id,
                            user_id=str(message.author.id),
                            author_name=message.author.display_name,
                            media_type="image"
                        )
                        if is_dm:
                            logger.info(f"💌 DM image response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
                        else:
                            logger.info(f"💬 Server image response to {message.author.display_name} in {message.guild.name} (using server mood)")
                        response_message = await message.channel.send(miku_reply)
                        # Log the bot's DM response
                        if is_dm:
                            dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
                        # For server messages, check if opposite persona should interject
                        if not is_dm and globals.BIPOLAR_MODE:
                            try:
                                from utils.persona_dialogue import check_for_interjection
                                current_persona = "evil" if globals.EVIL_MODE else "miku"
                                create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
                            except Exception as e:
                                logger.error(f"Error checking for persona interjection: {e}")
                        return
                    # Handle videos and GIFs
                    elif any(attachment.filename.lower().endswith(ext) for ext in [".gif", ".mp4", ".webm", ".mov"]):
                        # Determine media type
                        is_gif = attachment.filename.lower().endswith('.gif')
                        media_type = "gif" if is_gif else "video"
                        logger.debug(f"🎬 Processing {media_type}: {attachment.filename}")
                        # Download the media
                        media_bytes_b64 = await download_and_encode_media(attachment.url)
                        if not media_bytes_b64:
                            await message.channel.send(f"I couldn't load the {media_type}, sorry!")
                            return
                        # Decode back to bytes for frame extraction
                        import base64
                        media_bytes = base64.b64decode(media_bytes_b64)
                        # If it's a GIF, convert to MP4 for better processing
                        if is_gif:
                            logger.debug(f"🔄 Converting GIF to MP4 for processing...")
                            mp4_bytes = await convert_gif_to_mp4(media_bytes)
                            if mp4_bytes:
                                media_bytes = mp4_bytes
                                logger.info(f"✅ GIF converted to MP4")
                            else:
                                logger.warning(f"GIF conversion failed, trying direct processing")
                        # Extract frames
                        frames = await extract_video_frames(media_bytes, num_frames=6)
                        if not frames:
                            await message.channel.send(f"I couldn't extract frames from that {media_type}, sorry!")
                            return
                        logger.debug(f"📹 Extracted {len(frames)} frames from {attachment.filename}")
                        # Analyze the video/GIF with appropriate media type
                        video_description = await analyze_video_with_vision(frames, media_type=media_type, user_prompt=prompt)
                        if not video_description or not video_description.strip():
                            await message.channel.send(f"I couldn't analyze that {media_type} clearly, sorry! Try sending it again.")
                            return
                        # For DMs, pass None as guild_id to use DM mood
                        guild_id = message.guild.id if message.guild else None
                        miku_reply = await rephrase_as_miku(
                            video_description, 
                            prompt, 
                            guild_id=guild_id,
                            user_id=str(message.author.id),
                            author_name=message.author.display_name,
                            media_type=media_type
                        )
                        if is_dm:
                            logger.info(f"💌 DM {media_type} response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
                        else:
                            logger.info(f"💬 Server video response to {message.author.display_name} in {message.guild.name} (using server mood)")
                        response_message = await message.channel.send(miku_reply)
                        # Log the bot's DM response
                        if is_dm:
                            dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
                        # For server messages, check if opposite persona should interject
                        if not is_dm and globals.BIPOLAR_MODE:
                            try:
                                from utils.persona_dialogue import check_for_interjection
                                current_persona = "evil" if globals.EVIL_MODE else "miku"
                                create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
                            except Exception as e:
                                logger.error(f"Error checking for persona interjection: {e}")
                        return
            # Check for embeds (articles, images, videos, GIFs, etc.)
            if message.embeds:
                for embed in message.embeds:
                    # Handle Tenor GIF embeds specially (Discord uses these for /gif command)
                    if embed.type == 'gifv' and embed.url and 'tenor.com' in embed.url:
                        logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}")
                        # Extract the actual GIF URL from Tenor
                        gif_url = await extract_tenor_gif_url(embed.url)
                        if not gif_url:
                            # Try using the embed's video or image URL as fallback
                            if hasattr(embed, 'video') and embed.video:
                                gif_url = embed.video.url
                            elif hasattr(embed, 'thumbnail') and embed.thumbnail:
                                gif_url = embed.thumbnail.url
                        if not gif_url:
                            logger.warning(f"Could not extract GIF URL from Tenor embed")
                            continue
                        # Download the GIF
                        media_bytes_b64 = await download_and_encode_media(gif_url)
                        if not media_bytes_b64:
                            await message.channel.send("I couldn't load that Tenor GIF, sorry!")
                            return
                        # Decode to bytes
                        import base64
                        media_bytes = base64.b64decode(media_bytes_b64)
                        # Convert GIF to MP4
                        logger.debug(f"Converting Tenor GIF to MP4 for processing...")
                        mp4_bytes = await convert_gif_to_mp4(media_bytes)
                        if not mp4_bytes:
                            logger.warning(f"GIF conversion failed, trying direct frame extraction")
                            mp4_bytes = media_bytes
                        else:
                            logger.debug(f"Tenor GIF converted to MP4")
                        # Extract frames
                        frames = await extract_video_frames(mp4_bytes, num_frames=6)
                        if not frames:
                            await message.channel.send("I couldn't extract frames from that GIF, sorry!")
                            return
                        logger.info(f"📹 Extracted {len(frames)} frames from Tenor GIF")
                        # Analyze the GIF with tenor_gif media type
                        video_description = await analyze_video_with_vision(frames, media_type="tenor_gif", user_prompt=prompt)
                        if not video_description or not video_description.strip():
                            await message.channel.send("I couldn't analyze that GIF clearly, sorry! Try sending it again.")
                            return
                        guild_id = message.guild.id if message.guild else None
                        miku_reply = await rephrase_as_miku(
                            video_description, 
                            prompt, 
                            guild_id=guild_id,
                            user_id=str(message.author.id),
                            author_name=message.author.display_name,
                            media_type="tenor_gif"
                        )
                        if is_dm:
                            logger.info(f"💌 DM Tenor GIF response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
                        else:
                            logger.info(f"💬 Server Tenor GIF response to {message.author.display_name} in {message.guild.name} (using server mood)")
                        response_message = await message.channel.send(miku_reply)
                        # Log the bot's DM response
                        if is_dm:
                            dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
                        # For server messages, check if opposite persona should interject
                        if not is_dm and globals.BIPOLAR_MODE:
                            try:
                                from utils.persona_dialogue import check_for_interjection
                                current_persona = "evil" if globals.EVIL_MODE else "miku"
                                create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
                            except Exception as e:
                                logger.error(f"Error checking for persona interjection: {e}")
                        return
                    # Handle other types of embeds (rich, article, image, video, link)
                    elif embed.type in ['rich', 'article', 'image', 'video', 'link']:
                        logger.error(f"Processing {embed.type} embed")
                        # Extract content from embed
                        embed_content = await extract_embed_content(embed)
                        if not embed_content['has_content']:
                            logger.warning(f"Embed has no extractable content, skipping")
                            continue
                        # Build context string with embed text
                        embed_context_parts = []
                        if embed_content['text']:
                            embed_context_parts.append(f"[Embedded content: {embed_content['text'][:500]}{'...' if len(embed_content['text']) > 500 else ''}]")
                        # Process images from embed
                        if embed_content['images']:
                            for img_url in embed_content['images']:
                                logger.error(f"Processing image from embed: {img_url}")
                                try:
                                    base64_img = await download_and_encode_image(img_url)
                                    if base64_img:
                                        logger.info(f"Image downloaded, analyzing with vision model...")
                                        # Analyze image
                                        qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt)
                                        truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description
                                        logger.error(f"Vision analysis result: {truncated}")
                                        if qwen_description and qwen_description.strip():
                                            embed_context_parts.append(f"[Embedded image shows: {qwen_description}]")
                                    else:
                                        logger.error(f"Failed to download image from embed")
                                except Exception as e:
                                    logger.error(f"Error processing embedded image: {e}")
                                    import traceback
                                    traceback.print_exc()
                        # Process videos from embed
                        if embed_content['videos']:
                            for video_url in embed_content['videos']:
                                logger.info(f"🎬 Processing video from embed: {video_url}")
                                try:
                                    media_bytes_b64 = await download_and_encode_media(video_url)
                                    if media_bytes_b64:
                                        import base64
                                        media_bytes = base64.b64decode(media_bytes_b64)
                                        frames = await extract_video_frames(media_bytes, num_frames=6)
                                        if frames:
                                            logger.info(f"📹 Extracted {len(frames)} frames, analyzing with vision model...")
                                            video_description = await analyze_video_with_vision(frames, media_type="video", user_prompt=prompt)
                                            logger.info(f"Video analysis result: {video_description[:100]}...")
                                            if video_description and video_description.strip():
                                                embed_context_parts.append(f"[Embedded video shows: {video_description}]")
                                        else:
                                            logger.error(f"Failed to extract frames from video")
                                    else:
                                        logger.error(f"Failed to download video from embed")
                                except Exception as e:
                                    logger.error(f"Error processing embedded video: {e}")
                                    import traceback
                                    traceback.print_exc()
                        # Combine embed context with user prompt
                        if embed_context_parts:
                            full_context = '\n'.join(embed_context_parts)
                            enhanced_prompt = f"{full_context}\n\nUser message: {prompt}" if prompt else full_context
                            # Get Miku's response
                            guild_id = message.guild.id if message.guild else None
                            response_type = "dm_response" if is_dm else "server_response"
                            author_name = message.author.display_name
                            # Phase 3: Try Cat pipeline first for embed responses too
                            response = None
                            if globals.USE_CHESHIRE_CAT:
                                try:
                                    from utils.cat_client import cat_adapter
                                    cat_result = await cat_adapter.query(
                                        text=enhanced_prompt,
                                        user_id=str(message.author.id),
                                        guild_id=str(guild_id) if guild_id else None,
                                        author_name=author_name,
                                        mood=globals.DM_MOOD,
                                        response_type=response_type,
                                    )
                                    if cat_result:
                                        response, cat_full_prompt = cat_result
                                        logger.info(f"🐱 Cat embed response for {author_name}")
                                        import datetime
                                        globals.LAST_CAT_INTERACTION = {
                                            "full_prompt": cat_full_prompt,
                                            "response": response[:500] if response else "",
                                            "user": author_name,
                                            "mood": globals.DM_MOOD,
                                            "timestamp": datetime.datetime.now().isoformat(),
                                        }
                                except Exception as e:
                                    logger.warning(f"🐱 Cat embed error, fallback: {e}")
                                    response = None
                            if not response:
                                response = await query_llama(
                                    enhanced_prompt,
                                    user_id=str(message.author.id),
                                    guild_id=guild_id,
                                    response_type=response_type,
                                    author_name=author_name
                                )
                            if is_dm:
                                logger.info(f"💌 DM embed response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
                            else:
                                logger.info(f"💬 Server embed response to {message.author.display_name} in {message.guild.name}")
                            response_message = await message.channel.send(response)
                            # Log the bot's DM response
                            if is_dm:
                                dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
                            # For server messages, check if opposite persona should interject
                            if not is_dm and globals.BIPOLAR_MODE:
                                try:
                                    from utils.persona_dialogue import check_for_interjection
                                    current_persona = "evil" if globals.EVIL_MODE else "miku"
                                    create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
                                except Exception as e:
                                    logger.error(f"Error checking for persona interjection: {e}")
                return
            # Check if this is an image generation request
@@ -686,30 +345,8 @@ async def on_message(message):
                    author_name=author_name
                )
-            if is_dm:
+            from utils.image_handling import _send_log_bipolar
-                logger.info(f"💌 DM response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
+            response_message = await _send_log_bipolar(message, response, is_dm)
            else:
                logger.info(f"💬 Server response to {message.author.display_name} in {message.guild.name} (using server mood)")
            response_message = await message.channel.send(response)
            # Log the bot's DM response
            if is_dm:
                dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
            # For server messages, check if opposite persona should interject (persona dialogue system)
            if not is_dm and globals.BIPOLAR_MODE:
                logger.debug(f"Attempting to check for interjection (is_dm={is_dm}, BIPOLAR_MODE={globals.BIPOLAR_MODE})")
                try:
                    from utils.persona_dialogue import check_for_interjection
                    current_persona = "evil" if globals.EVIL_MODE else "miku"
                    logger.debug(f"Creating interjection check task for persona: {current_persona}")
                    # Pass the bot's response message for analysis
                    create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
                except Exception as e:
                    logger.error(f"Error checking for persona interjection: {e}")
                    import traceback
                    traceback.print_exc()
        # For server messages, do server-specific mood detection
        if not is_dm and message.guild:
--- a/bot/utils/image_handling.py
+++ b/bot/utils/image_handling.py
@@ -418,14 +418,13 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No
    # Format the user's message to include vision context with media type
    # This will be saved to history automatically by query_llama
-    if media_type == "gif":
+    _MEDIA_PREFIXES = {
-        media_prefix = "Looking at a GIF"
+        "gif": "Looking at a GIF",
-    elif media_type == "tenor_gif":
+        "tenor_gif": "Looking at a Tenor GIF",
-        media_prefix = "Looking at a Tenor GIF"
+        "video": "Looking at a video",
-    elif media_type == "video":
+        "rich_embed": "Looking at embedded content",
-        media_prefix = "Looking at a video"
+    }
-    else:  # image
+    media_prefix = _MEDIA_PREFIXES.get(media_type, "Looking at an image")
        media_prefix = "Looking at an image"
    if user_prompt:
        # Include media type, vision description, and user's text
@@ -503,6 +502,330 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No
 analyze_image_with_qwen = analyze_image_with_vision
 # ---------------------------------------------------------------------------
 # Shared tail helper — send response, log DM, check bipolar interjection
 # ---------------------------------------------------------------------------
 async def _send_log_bipolar(message, reply_text, is_dm, *, media_label=""):
    """
    Common tail shared by every media handler *and* the text-fallback path in
    bot.py.  Sends *reply_text* to the channel, logs the reply in the DM
    ledger when appropriate, and fires a bipolar-interjection check for server
    messages.
    Returns the sent ``discord.Message`` so callers can use it if needed.
    """
    from utils.dm_logger import dm_logger
    from utils.task_tracker import create_tracked_task
    label = f" {media_label}" if media_label else ""
    if is_dm:
        logger.info(
            f"💌 DM{label} response to {message.author.display_name} "
            f"(using DM mood: {globals.DM_MOOD})"
        )
    else:
        guild_name = message.guild.name if message.guild else "unknown"
        logger.info(
            f"💬 Server{label} response to {message.author.display_name} "
            f"in {guild_name} (using server mood)"
        )
    response_message = await message.channel.send(reply_text)
    # Log bot's reply in the DM ledger
    if is_dm:
        dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
    # Bipolar-mode interjection check (server messages only)
    if not is_dm and globals.BIPOLAR_MODE:
        try:
            from utils.persona_dialogue import check_for_interjection
            current_persona = "evil" if globals.EVIL_MODE else "miku"
            create_tracked_task(
                check_for_interjection(response_message, current_persona),
                task_name="interjection_check",
            )
        except Exception as e:
            logger.error(f"Error checking for persona interjection: {e}")
    return response_message
 # ---------------------------------------------------------------------------
 # High-level media dispatcher — called from bot.py on_message()
 # ---------------------------------------------------------------------------
 async def process_media_in_message(message, prompt, is_dm, guild_id) -> bool:
    """
    Inspect *message* for image/video/GIF attachments and embeds.
    If any media is found and successfully processed, a reply is sent to the
    channel and this function returns ``True``.  Otherwise it returns
    ``False`` so the caller can fall through to text-only handling.
    """
    author_id = str(message.author.id)
    author_name = message.author.display_name
    # ---- 1. Image attachments (.jpg, .jpeg, .png, .webp) -----------------
    if message.attachments:
        for attachment in message.attachments:
            lower = attachment.filename.lower()
            if any(lower.endswith(ext) for ext in (".jpg", ".jpeg", ".png", ".webp")):
                base64_img = await download_and_encode_image(attachment.url)
                if not base64_img:
                    await message.channel.send("I couldn't load the image, sorry!")
                    return True
                qwen_description = await analyze_image_with_vision(base64_img, user_prompt=prompt)
                if not qwen_description or not qwen_description.strip():
                    await message.channel.send(
                        "I couldn't see that image clearly, sorry! Try sending it again."
                    )
                    return True
                miku_reply = await rephrase_as_miku(
                    qwen_description, prompt,
                    guild_id=guild_id,
                    user_id=author_id,
                    author_name=author_name,
                    media_type="image",
                )
                await _send_log_bipolar(message, miku_reply, is_dm, media_label="image")
                return True
            # ---- 2. Video / GIF attachments (.gif, .mp4, .webm, .mov) ----
            elif any(lower.endswith(ext) for ext in (".gif", ".mp4", ".webm", ".mov")):
                is_gif = lower.endswith(".gif")
                media_type = "gif" if is_gif else "video"
                logger.debug(f"🎬 Processing {media_type}: {attachment.filename}")
                media_bytes_b64 = await download_and_encode_media(attachment.url)
                if not media_bytes_b64:
                    await message.channel.send(f"I couldn't load the {media_type}, sorry!")
                    return True
                media_bytes = base64.b64decode(media_bytes_b64)
                if is_gif:
                    logger.debug("🔄 Converting GIF to MP4 for processing...")
                    mp4_bytes = await convert_gif_to_mp4(media_bytes)
                    if mp4_bytes:
                        media_bytes = mp4_bytes
                        logger.info("✅ GIF converted to MP4")
                    else:
                        logger.warning("GIF conversion failed, trying direct processing")
                frames = await extract_video_frames(media_bytes, num_frames=6)
                if not frames:
                    await message.channel.send(
                        f"I couldn't extract frames from that {media_type}, sorry!"
                    )
                    return True
                logger.debug(
                    f"📹 Extracted {len(frames)} frames from {attachment.filename}"
                )
                video_description = await analyze_video_with_vision(
                    frames, media_type=media_type, user_prompt=prompt,
                )
                if not video_description or not video_description.strip():
                    await message.channel.send(
                        f"I couldn't analyze that {media_type} clearly, sorry! "
                        "Try sending it again."
                    )
                    return True
                miku_reply = await rephrase_as_miku(
                    video_description, prompt,
                    guild_id=guild_id,
                    user_id=author_id,
                    author_name=author_name,
                    media_type=media_type,
                )
                await _send_log_bipolar(message, miku_reply, is_dm, media_label=media_type)
                return True
    # ---- 3. Tenor GIF embeds (gifv from tenor.com) -----------------------
    if message.embeds:
        for embed in message.embeds:
            if embed.type == "gifv" and embed.url and "tenor.com" in embed.url:
                logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}")
                gif_url = await extract_tenor_gif_url(embed.url)
                if not gif_url:
                    if hasattr(embed, "video") and embed.video:
                        gif_url = embed.video.url
                    elif hasattr(embed, "thumbnail") and embed.thumbnail:
                        gif_url = embed.thumbnail.url
                if not gif_url:
                    logger.warning("Could not extract GIF URL from Tenor embed")
                    continue
                media_bytes_b64 = await download_and_encode_media(gif_url)
                if not media_bytes_b64:
                    await message.channel.send(
                        "I couldn't load that Tenor GIF, sorry!"
                    )
                    return True
                media_bytes = base64.b64decode(media_bytes_b64)
                logger.debug("Converting Tenor GIF to MP4 for processing...")
                mp4_bytes = await convert_gif_to_mp4(media_bytes)
                if not mp4_bytes:
                    logger.warning(
                        "GIF conversion failed, trying direct frame extraction"
                    )
                    mp4_bytes = media_bytes
                else:
                    logger.debug("Tenor GIF converted to MP4")
                frames = await extract_video_frames(mp4_bytes, num_frames=6)
                if not frames:
                    await message.channel.send(
                        "I couldn't extract frames from that GIF, sorry!"
                    )
                    return True
                logger.info(
                    f"📹 Extracted {len(frames)} frames from Tenor GIF"
                )
                video_description = await analyze_video_with_vision(
                    frames, media_type="tenor_gif", user_prompt=prompt,
                )
                if not video_description or not video_description.strip():
                    await message.channel.send(
                        "I couldn't analyze that GIF clearly, sorry! "
                        "Try sending it again."
                    )
                    return True
                miku_reply = await rephrase_as_miku(
                    video_description, prompt,
                    guild_id=guild_id,
                    user_id=author_id,
                    author_name=author_name,
                    media_type="tenor_gif",
                )
                await _send_log_bipolar(
                    message, miku_reply, is_dm, media_label="Tenor GIF",
                )
                return True
            # ---- 4. Rich / article / image / video / link embeds ---------
            elif embed.type in ("rich", "article", "image", "video", "link"):
                logger.info(f"Processing {embed.type} embed")
                embed_content = await extract_embed_content(embed)
                if not embed_content["has_content"]:
                    logger.warning("Embed has no extractable content, skipping")
                    continue
                embed_context_parts = []
                if embed_content["text"]:
                    truncated = embed_content["text"][:500]
                    if len(embed_content["text"]) > 500:
                        truncated += "..."
                    embed_context_parts.append(
                        f"[Embedded content: {truncated}]"
                    )
                # Analyze images found inside the embed
                for img_url in embed_content["images"]:
                    logger.info(f"Processing image from embed: {img_url}")
                    try:
                        base64_img = await download_and_encode_image(img_url)
                        if base64_img:
                            logger.info(
                                "Image downloaded, analyzing with vision model..."
                            )
                            qwen_description = await analyze_image_with_vision(
                                base64_img, user_prompt=prompt,
                            )
                            if qwen_description and qwen_description.strip():
                                embed_context_parts.append(
                                    f"[Embedded image shows: {qwen_description}]"
                                )
                        else:
                            logger.error("Failed to download image from embed")
                    except Exception as e:
                        logger.error(f"Error processing embedded image: {e}")
                # Analyze videos found inside the embed
                for video_url in embed_content["videos"]:
                    logger.info(
                        f"🎬 Processing video from embed: {video_url}"
                    )
                    try:
                        media_bytes_b64 = await download_and_encode_media(
                            video_url,
                        )
                        if media_bytes_b64:
                            media_bytes = base64.b64decode(media_bytes_b64)
                            frames = await extract_video_frames(
                                media_bytes, num_frames=6,
                            )
                            if frames:
                                logger.info(
                                    f"📹 Extracted {len(frames)} frames, "
                                    "analyzing with vision model..."
                                )
                                video_description = (
                                    await analyze_video_with_vision(
                                        frames,
                                        media_type="video",
                                        user_prompt=prompt,
                                    )
                                )
                                if (
                                    video_description
                                    and video_description.strip()
                                ):
                                    embed_context_parts.append(
                                        f"[Embedded video shows: "
                                        f"{video_description}]"
                                    )
                            else:
                                logger.error(
                                    "Failed to extract frames from video"
                                )
                        else:
                            logger.error(
                                "Failed to download video from embed"
                            )
                    except Exception as e:
                        logger.error(
                            f"Error processing embedded video: {e}"
                        )
                if not embed_context_parts:
                    continue
                # Build a combined vision description and route through
                # rephrase_as_miku (which handles Cat → LLM fallback,
                # mood resolution, and LAST_CAT_INTERACTION tracking).
                combined_description = "\n".join(embed_context_parts)
                miku_reply = await rephrase_as_miku(
                    combined_description, prompt,
                    guild_id=guild_id,
                    user_id=author_id,
                    author_name=author_name,
                    media_type="rich_embed",
                )
                await _send_log_bipolar(
                    message, miku_reply, is_dm, media_label="embed",
                )
                return True
    return False
 async def extract_embed_content(embed):
    """
    Extract text and media content from a Discord embed.