refactor: extract media processing from bot.py into image_handling.py (Phase D Step 19)
- Create process_media_in_message() in utils/image_handling.py that handles all 4 media types: image attachments, video/GIF attachments, Tenor GIF embeds, and rich embeds - DRY the send→log→bipolar tail pattern (5x repeated) into _send_log_bipolar() helper - Unify rich/article/link embed handling to use rephrase_as_miku() instead of inline Cat→LLM routing, fixing a mood-resolution bug (was using globals.DM_MOOD for servers) - Add 'rich_embed' media_type to rephrase_as_miku() prefix switch - Remove 3 inline 'import base64' from bot.py (already module-level in image_handling.py) - bot.py: 986 → 623 lines (-363) - image_handling.py: 559 → 881 lines (+322) - All 170 tests pass (21 config/state + 149 route split)
This commit is contained in:
375
bot/bot.py
375
bot/bot.py
@@ -19,15 +19,7 @@ from utils.scheduled import (
|
|||||||
send_monday_video
|
send_monday_video
|
||||||
)
|
)
|
||||||
from utils.image_handling import (
|
from utils.image_handling import (
|
||||||
download_and_encode_image,
|
process_media_in_message,
|
||||||
download_and_encode_media,
|
|
||||||
extract_video_frames,
|
|
||||||
analyze_image_with_qwen,
|
|
||||||
analyze_video_with_vision,
|
|
||||||
rephrase_as_miku,
|
|
||||||
extract_tenor_gif_url,
|
|
||||||
convert_gif_to_mp4,
|
|
||||||
extract_embed_content
|
|
||||||
)
|
)
|
||||||
from utils.core import (
|
from utils.core import (
|
||||||
is_miku_addressed,
|
is_miku_addressed,
|
||||||
@@ -266,343 +258,10 @@ async def on_message(message):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# If message has an image, video, or GIF attachment
|
# Dispatch media processing (images, videos, GIFs, embeds)
|
||||||
if message.attachments:
|
# to utils/image_handling.process_media_in_message()
|
||||||
for attachment in message.attachments:
|
|
||||||
# Handle images
|
|
||||||
if any(attachment.filename.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp"]):
|
|
||||||
base64_img = await download_and_encode_image(attachment.url)
|
|
||||||
if not base64_img:
|
|
||||||
await message.channel.send("I couldn't load the image, sorry!")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Analyze image (objective description)
|
|
||||||
qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt)
|
|
||||||
if not qwen_description or not qwen_description.strip():
|
|
||||||
await message.channel.send("I couldn't see that image clearly, sorry! Try sending it again.")
|
|
||||||
return
|
|
||||||
# For DMs, pass None as guild_id to use DM mood
|
|
||||||
guild_id = message.guild.id if message.guild else None
|
guild_id = message.guild.id if message.guild else None
|
||||||
miku_reply = await rephrase_as_miku(
|
if await process_media_in_message(message, prompt, is_dm, guild_id):
|
||||||
qwen_description,
|
|
||||||
prompt,
|
|
||||||
guild_id=guild_id,
|
|
||||||
user_id=str(message.author.id),
|
|
||||||
author_name=message.author.display_name,
|
|
||||||
media_type="image"
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_dm:
|
|
||||||
logger.info(f"💌 DM image response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
|
|
||||||
else:
|
|
||||||
logger.info(f"💬 Server image response to {message.author.display_name} in {message.guild.name} (using server mood)")
|
|
||||||
|
|
||||||
response_message = await message.channel.send(miku_reply)
|
|
||||||
|
|
||||||
# Log the bot's DM response
|
|
||||||
if is_dm:
|
|
||||||
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
|
||||||
|
|
||||||
# For server messages, check if opposite persona should interject
|
|
||||||
if not is_dm and globals.BIPOLAR_MODE:
|
|
||||||
try:
|
|
||||||
from utils.persona_dialogue import check_for_interjection
|
|
||||||
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
|
||||||
create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking for persona interjection: {e}")
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle videos and GIFs
|
|
||||||
elif any(attachment.filename.lower().endswith(ext) for ext in [".gif", ".mp4", ".webm", ".mov"]):
|
|
||||||
# Determine media type
|
|
||||||
is_gif = attachment.filename.lower().endswith('.gif')
|
|
||||||
media_type = "gif" if is_gif else "video"
|
|
||||||
|
|
||||||
logger.debug(f"🎬 Processing {media_type}: {attachment.filename}")
|
|
||||||
|
|
||||||
# Download the media
|
|
||||||
media_bytes_b64 = await download_and_encode_media(attachment.url)
|
|
||||||
if not media_bytes_b64:
|
|
||||||
await message.channel.send(f"I couldn't load the {media_type}, sorry!")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Decode back to bytes for frame extraction
|
|
||||||
import base64
|
|
||||||
media_bytes = base64.b64decode(media_bytes_b64)
|
|
||||||
|
|
||||||
# If it's a GIF, convert to MP4 for better processing
|
|
||||||
if is_gif:
|
|
||||||
logger.debug(f"🔄 Converting GIF to MP4 for processing...")
|
|
||||||
mp4_bytes = await convert_gif_to_mp4(media_bytes)
|
|
||||||
if mp4_bytes:
|
|
||||||
media_bytes = mp4_bytes
|
|
||||||
logger.info(f"✅ GIF converted to MP4")
|
|
||||||
else:
|
|
||||||
logger.warning(f"GIF conversion failed, trying direct processing")
|
|
||||||
|
|
||||||
# Extract frames
|
|
||||||
frames = await extract_video_frames(media_bytes, num_frames=6)
|
|
||||||
|
|
||||||
if not frames:
|
|
||||||
await message.channel.send(f"I couldn't extract frames from that {media_type}, sorry!")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.debug(f"📹 Extracted {len(frames)} frames from {attachment.filename}")
|
|
||||||
|
|
||||||
# Analyze the video/GIF with appropriate media type
|
|
||||||
video_description = await analyze_video_with_vision(frames, media_type=media_type, user_prompt=prompt)
|
|
||||||
if not video_description or not video_description.strip():
|
|
||||||
await message.channel.send(f"I couldn't analyze that {media_type} clearly, sorry! Try sending it again.")
|
|
||||||
return
|
|
||||||
# For DMs, pass None as guild_id to use DM mood
|
|
||||||
guild_id = message.guild.id if message.guild else None
|
|
||||||
miku_reply = await rephrase_as_miku(
|
|
||||||
video_description,
|
|
||||||
prompt,
|
|
||||||
guild_id=guild_id,
|
|
||||||
user_id=str(message.author.id),
|
|
||||||
author_name=message.author.display_name,
|
|
||||||
media_type=media_type
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_dm:
|
|
||||||
logger.info(f"💌 DM {media_type} response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
|
|
||||||
else:
|
|
||||||
logger.info(f"💬 Server video response to {message.author.display_name} in {message.guild.name} (using server mood)")
|
|
||||||
|
|
||||||
response_message = await message.channel.send(miku_reply)
|
|
||||||
|
|
||||||
# Log the bot's DM response
|
|
||||||
if is_dm:
|
|
||||||
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
|
||||||
|
|
||||||
# For server messages, check if opposite persona should interject
|
|
||||||
if not is_dm and globals.BIPOLAR_MODE:
|
|
||||||
try:
|
|
||||||
from utils.persona_dialogue import check_for_interjection
|
|
||||||
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
|
||||||
create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking for persona interjection: {e}")
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# Check for embeds (articles, images, videos, GIFs, etc.)
|
|
||||||
if message.embeds:
|
|
||||||
for embed in message.embeds:
|
|
||||||
# Handle Tenor GIF embeds specially (Discord uses these for /gif command)
|
|
||||||
if embed.type == 'gifv' and embed.url and 'tenor.com' in embed.url:
|
|
||||||
logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}")
|
|
||||||
|
|
||||||
# Extract the actual GIF URL from Tenor
|
|
||||||
gif_url = await extract_tenor_gif_url(embed.url)
|
|
||||||
if not gif_url:
|
|
||||||
# Try using the embed's video or image URL as fallback
|
|
||||||
if hasattr(embed, 'video') and embed.video:
|
|
||||||
gif_url = embed.video.url
|
|
||||||
elif hasattr(embed, 'thumbnail') and embed.thumbnail:
|
|
||||||
gif_url = embed.thumbnail.url
|
|
||||||
|
|
||||||
if not gif_url:
|
|
||||||
logger.warning(f"Could not extract GIF URL from Tenor embed")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Download the GIF
|
|
||||||
media_bytes_b64 = await download_and_encode_media(gif_url)
|
|
||||||
if not media_bytes_b64:
|
|
||||||
await message.channel.send("I couldn't load that Tenor GIF, sorry!")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Decode to bytes
|
|
||||||
import base64
|
|
||||||
media_bytes = base64.b64decode(media_bytes_b64)
|
|
||||||
|
|
||||||
# Convert GIF to MP4
|
|
||||||
logger.debug(f"Converting Tenor GIF to MP4 for processing...")
|
|
||||||
mp4_bytes = await convert_gif_to_mp4(media_bytes)
|
|
||||||
if not mp4_bytes:
|
|
||||||
logger.warning(f"GIF conversion failed, trying direct frame extraction")
|
|
||||||
mp4_bytes = media_bytes
|
|
||||||
else:
|
|
||||||
logger.debug(f"Tenor GIF converted to MP4")
|
|
||||||
|
|
||||||
# Extract frames
|
|
||||||
frames = await extract_video_frames(mp4_bytes, num_frames=6)
|
|
||||||
|
|
||||||
if not frames:
|
|
||||||
await message.channel.send("I couldn't extract frames from that GIF, sorry!")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info(f"📹 Extracted {len(frames)} frames from Tenor GIF")
|
|
||||||
|
|
||||||
# Analyze the GIF with tenor_gif media type
|
|
||||||
video_description = await analyze_video_with_vision(frames, media_type="tenor_gif", user_prompt=prompt)
|
|
||||||
if not video_description or not video_description.strip():
|
|
||||||
await message.channel.send("I couldn't analyze that GIF clearly, sorry! Try sending it again.")
|
|
||||||
return
|
|
||||||
guild_id = message.guild.id if message.guild else None
|
|
||||||
miku_reply = await rephrase_as_miku(
|
|
||||||
video_description,
|
|
||||||
prompt,
|
|
||||||
guild_id=guild_id,
|
|
||||||
user_id=str(message.author.id),
|
|
||||||
author_name=message.author.display_name,
|
|
||||||
media_type="tenor_gif"
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_dm:
|
|
||||||
logger.info(f"💌 DM Tenor GIF response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
|
|
||||||
else:
|
|
||||||
logger.info(f"💬 Server Tenor GIF response to {message.author.display_name} in {message.guild.name} (using server mood)")
|
|
||||||
|
|
||||||
response_message = await message.channel.send(miku_reply)
|
|
||||||
|
|
||||||
# Log the bot's DM response
|
|
||||||
if is_dm:
|
|
||||||
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
|
||||||
|
|
||||||
# For server messages, check if opposite persona should interject
|
|
||||||
if not is_dm and globals.BIPOLAR_MODE:
|
|
||||||
try:
|
|
||||||
from utils.persona_dialogue import check_for_interjection
|
|
||||||
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
|
||||||
create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking for persona interjection: {e}")
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle other types of embeds (rich, article, image, video, link)
|
|
||||||
elif embed.type in ['rich', 'article', 'image', 'video', 'link']:
|
|
||||||
logger.error(f"Processing {embed.type} embed")
|
|
||||||
|
|
||||||
# Extract content from embed
|
|
||||||
embed_content = await extract_embed_content(embed)
|
|
||||||
|
|
||||||
if not embed_content['has_content']:
|
|
||||||
logger.warning(f"Embed has no extractable content, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Build context string with embed text
|
|
||||||
embed_context_parts = []
|
|
||||||
if embed_content['text']:
|
|
||||||
embed_context_parts.append(f"[Embedded content: {embed_content['text'][:500]}{'...' if len(embed_content['text']) > 500 else ''}]")
|
|
||||||
|
|
||||||
# Process images from embed
|
|
||||||
if embed_content['images']:
|
|
||||||
for img_url in embed_content['images']:
|
|
||||||
logger.error(f"Processing image from embed: {img_url}")
|
|
||||||
try:
|
|
||||||
base64_img = await download_and_encode_image(img_url)
|
|
||||||
if base64_img:
|
|
||||||
logger.info(f"Image downloaded, analyzing with vision model...")
|
|
||||||
# Analyze image
|
|
||||||
qwen_description = await analyze_image_with_qwen(base64_img, user_prompt=prompt)
|
|
||||||
truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description
|
|
||||||
logger.error(f"Vision analysis result: {truncated}")
|
|
||||||
if qwen_description and qwen_description.strip():
|
|
||||||
embed_context_parts.append(f"[Embedded image shows: {qwen_description}]")
|
|
||||||
else:
|
|
||||||
logger.error(f"Failed to download image from embed")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing embedded image: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
# Process videos from embed
|
|
||||||
if embed_content['videos']:
|
|
||||||
for video_url in embed_content['videos']:
|
|
||||||
logger.info(f"🎬 Processing video from embed: {video_url}")
|
|
||||||
try:
|
|
||||||
media_bytes_b64 = await download_and_encode_media(video_url)
|
|
||||||
if media_bytes_b64:
|
|
||||||
import base64
|
|
||||||
media_bytes = base64.b64decode(media_bytes_b64)
|
|
||||||
frames = await extract_video_frames(media_bytes, num_frames=6)
|
|
||||||
if frames:
|
|
||||||
logger.info(f"📹 Extracted {len(frames)} frames, analyzing with vision model...")
|
|
||||||
video_description = await analyze_video_with_vision(frames, media_type="video", user_prompt=prompt)
|
|
||||||
logger.info(f"Video analysis result: {video_description[:100]}...")
|
|
||||||
if video_description and video_description.strip():
|
|
||||||
embed_context_parts.append(f"[Embedded video shows: {video_description}]")
|
|
||||||
else:
|
|
||||||
logger.error(f"Failed to extract frames from video")
|
|
||||||
else:
|
|
||||||
logger.error(f"Failed to download video from embed")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing embedded video: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
# Combine embed context with user prompt
|
|
||||||
if embed_context_parts:
|
|
||||||
full_context = '\n'.join(embed_context_parts)
|
|
||||||
enhanced_prompt = f"{full_context}\n\nUser message: {prompt}" if prompt else full_context
|
|
||||||
|
|
||||||
# Get Miku's response
|
|
||||||
guild_id = message.guild.id if message.guild else None
|
|
||||||
response_type = "dm_response" if is_dm else "server_response"
|
|
||||||
author_name = message.author.display_name
|
|
||||||
|
|
||||||
# Phase 3: Try Cat pipeline first for embed responses too
|
|
||||||
response = None
|
|
||||||
if globals.USE_CHESHIRE_CAT:
|
|
||||||
try:
|
|
||||||
from utils.cat_client import cat_adapter
|
|
||||||
cat_result = await cat_adapter.query(
|
|
||||||
text=enhanced_prompt,
|
|
||||||
user_id=str(message.author.id),
|
|
||||||
guild_id=str(guild_id) if guild_id else None,
|
|
||||||
author_name=author_name,
|
|
||||||
mood=globals.DM_MOOD,
|
|
||||||
response_type=response_type,
|
|
||||||
)
|
|
||||||
if cat_result:
|
|
||||||
response, cat_full_prompt = cat_result
|
|
||||||
logger.info(f"🐱 Cat embed response for {author_name}")
|
|
||||||
import datetime
|
|
||||||
globals.LAST_CAT_INTERACTION = {
|
|
||||||
"full_prompt": cat_full_prompt,
|
|
||||||
"response": response[:500] if response else "",
|
|
||||||
"user": author_name,
|
|
||||||
"mood": globals.DM_MOOD,
|
|
||||||
"timestamp": datetime.datetime.now().isoformat(),
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"🐱 Cat embed error, fallback: {e}")
|
|
||||||
response = None
|
|
||||||
|
|
||||||
if not response:
|
|
||||||
response = await query_llama(
|
|
||||||
enhanced_prompt,
|
|
||||||
user_id=str(message.author.id),
|
|
||||||
guild_id=guild_id,
|
|
||||||
response_type=response_type,
|
|
||||||
author_name=author_name
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_dm:
|
|
||||||
logger.info(f"💌 DM embed response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
|
|
||||||
else:
|
|
||||||
logger.info(f"💬 Server embed response to {message.author.display_name} in {message.guild.name}")
|
|
||||||
|
|
||||||
response_message = await message.channel.send(response)
|
|
||||||
|
|
||||||
# Log the bot's DM response
|
|
||||||
if is_dm:
|
|
||||||
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
|
||||||
|
|
||||||
# For server messages, check if opposite persona should interject
|
|
||||||
if not is_dm and globals.BIPOLAR_MODE:
|
|
||||||
try:
|
|
||||||
from utils.persona_dialogue import check_for_interjection
|
|
||||||
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
|
||||||
create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking for persona interjection: {e}")
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if this is an image generation request
|
# Check if this is an image generation request
|
||||||
@@ -686,30 +345,8 @@ async def on_message(message):
|
|||||||
author_name=author_name
|
author_name=author_name
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_dm:
|
from utils.image_handling import _send_log_bipolar
|
||||||
logger.info(f"💌 DM response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
|
response_message = await _send_log_bipolar(message, response, is_dm)
|
||||||
else:
|
|
||||||
logger.info(f"💬 Server response to {message.author.display_name} in {message.guild.name} (using server mood)")
|
|
||||||
|
|
||||||
response_message = await message.channel.send(response)
|
|
||||||
|
|
||||||
# Log the bot's DM response
|
|
||||||
if is_dm:
|
|
||||||
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
|
||||||
|
|
||||||
# For server messages, check if opposite persona should interject (persona dialogue system)
|
|
||||||
if not is_dm and globals.BIPOLAR_MODE:
|
|
||||||
logger.debug(f"Attempting to check for interjection (is_dm={is_dm}, BIPOLAR_MODE={globals.BIPOLAR_MODE})")
|
|
||||||
try:
|
|
||||||
from utils.persona_dialogue import check_for_interjection
|
|
||||||
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
|
||||||
logger.debug(f"Creating interjection check task for persona: {current_persona}")
|
|
||||||
# Pass the bot's response message for analysis
|
|
||||||
create_tracked_task(check_for_interjection(response_message, current_persona), task_name="interjection_check")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking for persona interjection: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
# For server messages, do server-specific mood detection
|
# For server messages, do server-specific mood detection
|
||||||
if not is_dm and message.guild:
|
if not is_dm and message.guild:
|
||||||
|
|||||||
@@ -418,14 +418,13 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No
|
|||||||
|
|
||||||
# Format the user's message to include vision context with media type
|
# Format the user's message to include vision context with media type
|
||||||
# This will be saved to history automatically by query_llama
|
# This will be saved to history automatically by query_llama
|
||||||
if media_type == "gif":
|
_MEDIA_PREFIXES = {
|
||||||
media_prefix = "Looking at a GIF"
|
"gif": "Looking at a GIF",
|
||||||
elif media_type == "tenor_gif":
|
"tenor_gif": "Looking at a Tenor GIF",
|
||||||
media_prefix = "Looking at a Tenor GIF"
|
"video": "Looking at a video",
|
||||||
elif media_type == "video":
|
"rich_embed": "Looking at embedded content",
|
||||||
media_prefix = "Looking at a video"
|
}
|
||||||
else: # image
|
media_prefix = _MEDIA_PREFIXES.get(media_type, "Looking at an image")
|
||||||
media_prefix = "Looking at an image"
|
|
||||||
|
|
||||||
if user_prompt:
|
if user_prompt:
|
||||||
# Include media type, vision description, and user's text
|
# Include media type, vision description, and user's text
|
||||||
@@ -503,6 +502,330 @@ async def rephrase_as_miku(vision_output, user_prompt, guild_id=None, user_id=No
|
|||||||
analyze_image_with_qwen = analyze_image_with_vision
|
analyze_image_with_qwen = analyze_image_with_vision
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Shared tail helper — send response, log DM, check bipolar interjection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _send_log_bipolar(message, reply_text, is_dm, *, media_label=""):
|
||||||
|
"""
|
||||||
|
Common tail shared by every media handler *and* the text-fallback path in
|
||||||
|
bot.py. Sends *reply_text* to the channel, logs the reply in the DM
|
||||||
|
ledger when appropriate, and fires a bipolar-interjection check for server
|
||||||
|
messages.
|
||||||
|
|
||||||
|
Returns the sent ``discord.Message`` so callers can use it if needed.
|
||||||
|
"""
|
||||||
|
from utils.dm_logger import dm_logger
|
||||||
|
from utils.task_tracker import create_tracked_task
|
||||||
|
|
||||||
|
label = f" {media_label}" if media_label else ""
|
||||||
|
if is_dm:
|
||||||
|
logger.info(
|
||||||
|
f"💌 DM{label} response to {message.author.display_name} "
|
||||||
|
f"(using DM mood: {globals.DM_MOOD})"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
guild_name = message.guild.name if message.guild else "unknown"
|
||||||
|
logger.info(
|
||||||
|
f"💬 Server{label} response to {message.author.display_name} "
|
||||||
|
f"in {guild_name} (using server mood)"
|
||||||
|
)
|
||||||
|
|
||||||
|
response_message = await message.channel.send(reply_text)
|
||||||
|
|
||||||
|
# Log bot's reply in the DM ledger
|
||||||
|
if is_dm:
|
||||||
|
dm_logger.log_user_message(message.author, response_message, is_bot_message=True)
|
||||||
|
|
||||||
|
# Bipolar-mode interjection check (server messages only)
|
||||||
|
if not is_dm and globals.BIPOLAR_MODE:
|
||||||
|
try:
|
||||||
|
from utils.persona_dialogue import check_for_interjection
|
||||||
|
current_persona = "evil" if globals.EVIL_MODE else "miku"
|
||||||
|
create_tracked_task(
|
||||||
|
check_for_interjection(response_message, current_persona),
|
||||||
|
task_name="interjection_check",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error checking for persona interjection: {e}")
|
||||||
|
|
||||||
|
return response_message
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# High-level media dispatcher — called from bot.py on_message()
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def process_media_in_message(message, prompt, is_dm, guild_id) -> bool:
|
||||||
|
"""
|
||||||
|
Inspect *message* for image/video/GIF attachments and embeds.
|
||||||
|
|
||||||
|
If any media is found and successfully processed, a reply is sent to the
|
||||||
|
channel and this function returns ``True``. Otherwise it returns
|
||||||
|
``False`` so the caller can fall through to text-only handling.
|
||||||
|
"""
|
||||||
|
author_id = str(message.author.id)
|
||||||
|
author_name = message.author.display_name
|
||||||
|
|
||||||
|
# ---- 1. Image attachments (.jpg, .jpeg, .png, .webp) -----------------
|
||||||
|
if message.attachments:
|
||||||
|
for attachment in message.attachments:
|
||||||
|
lower = attachment.filename.lower()
|
||||||
|
|
||||||
|
if any(lower.endswith(ext) for ext in (".jpg", ".jpeg", ".png", ".webp")):
|
||||||
|
base64_img = await download_and_encode_image(attachment.url)
|
||||||
|
if not base64_img:
|
||||||
|
await message.channel.send("I couldn't load the image, sorry!")
|
||||||
|
return True
|
||||||
|
|
||||||
|
qwen_description = await analyze_image_with_vision(base64_img, user_prompt=prompt)
|
||||||
|
if not qwen_description or not qwen_description.strip():
|
||||||
|
await message.channel.send(
|
||||||
|
"I couldn't see that image clearly, sorry! Try sending it again."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
miku_reply = await rephrase_as_miku(
|
||||||
|
qwen_description, prompt,
|
||||||
|
guild_id=guild_id,
|
||||||
|
user_id=author_id,
|
||||||
|
author_name=author_name,
|
||||||
|
media_type="image",
|
||||||
|
)
|
||||||
|
await _send_log_bipolar(message, miku_reply, is_dm, media_label="image")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---- 2. Video / GIF attachments (.gif, .mp4, .webm, .mov) ----
|
||||||
|
elif any(lower.endswith(ext) for ext in (".gif", ".mp4", ".webm", ".mov")):
|
||||||
|
is_gif = lower.endswith(".gif")
|
||||||
|
media_type = "gif" if is_gif else "video"
|
||||||
|
|
||||||
|
logger.debug(f"🎬 Processing {media_type}: {attachment.filename}")
|
||||||
|
|
||||||
|
media_bytes_b64 = await download_and_encode_media(attachment.url)
|
||||||
|
if not media_bytes_b64:
|
||||||
|
await message.channel.send(f"I couldn't load the {media_type}, sorry!")
|
||||||
|
return True
|
||||||
|
|
||||||
|
media_bytes = base64.b64decode(media_bytes_b64)
|
||||||
|
|
||||||
|
if is_gif:
|
||||||
|
logger.debug("🔄 Converting GIF to MP4 for processing...")
|
||||||
|
mp4_bytes = await convert_gif_to_mp4(media_bytes)
|
||||||
|
if mp4_bytes:
|
||||||
|
media_bytes = mp4_bytes
|
||||||
|
logger.info("✅ GIF converted to MP4")
|
||||||
|
else:
|
||||||
|
logger.warning("GIF conversion failed, trying direct processing")
|
||||||
|
|
||||||
|
frames = await extract_video_frames(media_bytes, num_frames=6)
|
||||||
|
if not frames:
|
||||||
|
await message.channel.send(
|
||||||
|
f"I couldn't extract frames from that {media_type}, sorry!"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"📹 Extracted {len(frames)} frames from {attachment.filename}"
|
||||||
|
)
|
||||||
|
|
||||||
|
video_description = await analyze_video_with_vision(
|
||||||
|
frames, media_type=media_type, user_prompt=prompt,
|
||||||
|
)
|
||||||
|
if not video_description or not video_description.strip():
|
||||||
|
await message.channel.send(
|
||||||
|
f"I couldn't analyze that {media_type} clearly, sorry! "
|
||||||
|
"Try sending it again."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
miku_reply = await rephrase_as_miku(
|
||||||
|
video_description, prompt,
|
||||||
|
guild_id=guild_id,
|
||||||
|
user_id=author_id,
|
||||||
|
author_name=author_name,
|
||||||
|
media_type=media_type,
|
||||||
|
)
|
||||||
|
await _send_log_bipolar(message, miku_reply, is_dm, media_label=media_type)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---- 3. Tenor GIF embeds (gifv from tenor.com) -----------------------
|
||||||
|
if message.embeds:
|
||||||
|
for embed in message.embeds:
|
||||||
|
if embed.type == "gifv" and embed.url and "tenor.com" in embed.url:
|
||||||
|
logger.info(f"🎭 Processing Tenor GIF from embed: {embed.url}")
|
||||||
|
|
||||||
|
gif_url = await extract_tenor_gif_url(embed.url)
|
||||||
|
if not gif_url:
|
||||||
|
if hasattr(embed, "video") and embed.video:
|
||||||
|
gif_url = embed.video.url
|
||||||
|
elif hasattr(embed, "thumbnail") and embed.thumbnail:
|
||||||
|
gif_url = embed.thumbnail.url
|
||||||
|
|
||||||
|
if not gif_url:
|
||||||
|
logger.warning("Could not extract GIF URL from Tenor embed")
|
||||||
|
continue
|
||||||
|
|
||||||
|
media_bytes_b64 = await download_and_encode_media(gif_url)
|
||||||
|
if not media_bytes_b64:
|
||||||
|
await message.channel.send(
|
||||||
|
"I couldn't load that Tenor GIF, sorry!"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
media_bytes = base64.b64decode(media_bytes_b64)
|
||||||
|
|
||||||
|
logger.debug("Converting Tenor GIF to MP4 for processing...")
|
||||||
|
mp4_bytes = await convert_gif_to_mp4(media_bytes)
|
||||||
|
if not mp4_bytes:
|
||||||
|
logger.warning(
|
||||||
|
"GIF conversion failed, trying direct frame extraction"
|
||||||
|
)
|
||||||
|
mp4_bytes = media_bytes
|
||||||
|
else:
|
||||||
|
logger.debug("Tenor GIF converted to MP4")
|
||||||
|
|
||||||
|
frames = await extract_video_frames(mp4_bytes, num_frames=6)
|
||||||
|
if not frames:
|
||||||
|
await message.channel.send(
|
||||||
|
"I couldn't extract frames from that GIF, sorry!"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"📹 Extracted {len(frames)} frames from Tenor GIF"
|
||||||
|
)
|
||||||
|
|
||||||
|
video_description = await analyze_video_with_vision(
|
||||||
|
frames, media_type="tenor_gif", user_prompt=prompt,
|
||||||
|
)
|
||||||
|
if not video_description or not video_description.strip():
|
||||||
|
await message.channel.send(
|
||||||
|
"I couldn't analyze that GIF clearly, sorry! "
|
||||||
|
"Try sending it again."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
miku_reply = await rephrase_as_miku(
|
||||||
|
video_description, prompt,
|
||||||
|
guild_id=guild_id,
|
||||||
|
user_id=author_id,
|
||||||
|
author_name=author_name,
|
||||||
|
media_type="tenor_gif",
|
||||||
|
)
|
||||||
|
await _send_log_bipolar(
|
||||||
|
message, miku_reply, is_dm, media_label="Tenor GIF",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---- 4. Rich / article / image / video / link embeds ---------
|
||||||
|
elif embed.type in ("rich", "article", "image", "video", "link"):
|
||||||
|
logger.info(f"Processing {embed.type} embed")
|
||||||
|
|
||||||
|
embed_content = await extract_embed_content(embed)
|
||||||
|
if not embed_content["has_content"]:
|
||||||
|
logger.warning("Embed has no extractable content, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
embed_context_parts = []
|
||||||
|
if embed_content["text"]:
|
||||||
|
truncated = embed_content["text"][:500]
|
||||||
|
if len(embed_content["text"]) > 500:
|
||||||
|
truncated += "..."
|
||||||
|
embed_context_parts.append(
|
||||||
|
f"[Embedded content: {truncated}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Analyze images found inside the embed
|
||||||
|
for img_url in embed_content["images"]:
|
||||||
|
logger.info(f"Processing image from embed: {img_url}")
|
||||||
|
try:
|
||||||
|
base64_img = await download_and_encode_image(img_url)
|
||||||
|
if base64_img:
|
||||||
|
logger.info(
|
||||||
|
"Image downloaded, analyzing with vision model..."
|
||||||
|
)
|
||||||
|
qwen_description = await analyze_image_with_vision(
|
||||||
|
base64_img, user_prompt=prompt,
|
||||||
|
)
|
||||||
|
if qwen_description and qwen_description.strip():
|
||||||
|
embed_context_parts.append(
|
||||||
|
f"[Embedded image shows: {qwen_description}]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error("Failed to download image from embed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing embedded image: {e}")
|
||||||
|
|
||||||
|
# Analyze videos found inside the embed
|
||||||
|
for video_url in embed_content["videos"]:
|
||||||
|
logger.info(
|
||||||
|
f"🎬 Processing video from embed: {video_url}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
media_bytes_b64 = await download_and_encode_media(
|
||||||
|
video_url,
|
||||||
|
)
|
||||||
|
if media_bytes_b64:
|
||||||
|
media_bytes = base64.b64decode(media_bytes_b64)
|
||||||
|
frames = await extract_video_frames(
|
||||||
|
media_bytes, num_frames=6,
|
||||||
|
)
|
||||||
|
if frames:
|
||||||
|
logger.info(
|
||||||
|
f"📹 Extracted {len(frames)} frames, "
|
||||||
|
"analyzing with vision model..."
|
||||||
|
)
|
||||||
|
video_description = (
|
||||||
|
await analyze_video_with_vision(
|
||||||
|
frames,
|
||||||
|
media_type="video",
|
||||||
|
user_prompt=prompt,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
video_description
|
||||||
|
and video_description.strip()
|
||||||
|
):
|
||||||
|
embed_context_parts.append(
|
||||||
|
f"[Embedded video shows: "
|
||||||
|
f"{video_description}]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
"Failed to extract frames from video"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
"Failed to download video from embed"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Error processing embedded video: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not embed_context_parts:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Build a combined vision description and route through
|
||||||
|
# rephrase_as_miku (which handles Cat → LLM fallback,
|
||||||
|
# mood resolution, and LAST_CAT_INTERACTION tracking).
|
||||||
|
combined_description = "\n".join(embed_context_parts)
|
||||||
|
miku_reply = await rephrase_as_miku(
|
||||||
|
combined_description, prompt,
|
||||||
|
guild_id=guild_id,
|
||||||
|
user_id=author_id,
|
||||||
|
author_name=author_name,
|
||||||
|
media_type="rich_embed",
|
||||||
|
)
|
||||||
|
await _send_log_bipolar(
|
||||||
|
message, miku_reply, is_dm, media_label="embed",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def extract_embed_content(embed):
|
async def extract_embed_content(embed):
|
||||||
"""
|
"""
|
||||||
Extract text and media content from a Discord embed.
|
Extract text and media content from a Discord embed.
|
||||||
|
|||||||
Reference in New Issue
Block a user