From d58be3b33e2cf3dc0a04ed567dae66cc1165fc3a Mon Sep 17 00:00:00 2001
From: koko210Serve <koko.yordanov@proton.me>
Date: Sun, 7 Dec 2025 17:50:08 +0200
Subject: [PATCH] Remove all Ollama remnants and complete migration to
 llama.cpp

- Remove Ollama-specific files (Dockerfile.ollama, entrypoint.sh)
- Replace all query_ollama imports and calls with query_llama
- Remove langchain-ollama dependency from requirements.txt
- Update all utility files (autonomous, kindness, image_generation, etc.)
- Update README.md documentation references
- Maintain backward compatibility alias in llm.py
---
 Dockerfile.ollama                    |   8 -
 README.md                            |   2 +-
 bot/api.py                           |   4 +-
 bot/bot.py                           |  11 +-
 bot/requirements.txt                 |   1 -
 bot/utils/autonomous_v1_legacy.py    |  16 +-
 bot/utils/autonomous_wip.py          |  12 +-
 bot/utils/dm_interaction_analyzer.py |   4 +-
 bot/utils/figurine_notifier.py       |   4 +-
 bot/utils/image_generation.py        |   8 +-
 bot/utils/kindness.py                |   4 +-
 bot/utils/scheduled.py               |   6 +-
 bot/utils/sentiment_analysis.py      |   6 +-
 entrypoint.sh                        |  17 --
 readmes/VOICE_CHAT_IMPLEMENTATION.md | 222 ---------------------------
 15 files changed, 39 insertions(+), 286 deletions(-)
 delete mode 100644 Dockerfile.ollama
 delete mode 100755 entrypoint.sh
 delete mode 100644 readmes/VOICE_CHAT_IMPLEMENTATION.md

diff --git a/Dockerfile.ollama b/Dockerfile.ollama
deleted file mode 100644
index a5592db..0000000
--- a/Dockerfile.ollama
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM ollama/ollama
-
-# Install curl so we can run health checks
-USER root
-RUN apt-get update && apt-get install -y curl && apt-get clean
-
-COPY entrypoint.sh /entrypoint.sh
-ENTRYPOINT ["/entrypoint.sh"]
diff --git a/README.md b/README.md
index 2b54e0d..5296d38 100644
--- a/README.md
+++ b/README.md
@@ -423,7 +423,7 @@ Detailed documentation available in the `readmes/` directory:
 - **[FACE_DETECTION_API_MIGRATION.md](readmes/FACE_DETECTION_API_MIGRATION.md)** - Face detection setup
 - **[DM_ANALYSIS_FEATURE.md](readmes/DM_ANALYSIS_FEATURE.md)** - DM interaction analytics
 - **[MOOD_SYSTEM_ANALYSIS.md](readmes/MOOD_SYSTEM_ANALYSIS.md)** - Mood system deep dive
-- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - Ollama → llama.cpp migration guide
+- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - llama.cpp setup and migration guide
 
 ---
 
diff --git a/bot/api.py b/bot/api.py
index 06f82ad..66daecd 100644
--- a/bot/api.py
+++ b/bot/api.py
@@ -833,11 +833,11 @@ async def send_custom_prompt_dm(user_id: str, req: CustomPromptRequest):
             return {"status": "error", "message": f"User {user_id} not found"}
         
         # Use the LLM query function for DM context
-        from utils.llm import query_ollama
+        from utils.llm import query_llama
         
         async def send_dm_custom_prompt():
             try:
-                response = await query_ollama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response")
+                response = await query_llama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response")
                 await user.send(response)
                 print(f"✅ Custom DM prompt sent to user {user_id}: {req.prompt[:50]}...")
                 
diff --git a/bot/bot.py b/bot/bot.py
index a4fc565..6bf98a5 100644
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -34,7 +34,7 @@ from utils.moods import (
 from utils.media import(
     overlay_username_with_ffmpeg
 )
-from utils.llm import query_ollama
+from utils.llm import query_llama
 from utils.autonomous import (
     setup_autonomous_speaking, 
     load_last_sent_tweets,
@@ -100,7 +100,7 @@ async def on_ready():
 
     # Start server-specific schedulers (includes DM mood rotation)
     server_manager.start_all_schedulers(globals.client)
-
+https://tea.koko210cloud.xyz/Koko210/miku-discord
     # Start the global scheduler for other tasks
     globals.scheduler.start()
 
@@ -367,7 +367,8 @@ async def on_message(message):
                                         print(f"✅ Image downloaded, analyzing with vision model...")
                                         # Analyze image
                                         qwen_description = await analyze_image_with_qwen(base64_img)
-                                        truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description
+                                        truncated = (qwen_description[:50] + "...") 
+                        if not base64_img:if len(qwen_description) > 50 else qwen_description
                                         print(f"📝 Vision analysis result: {truncated}")
                                         if qwen_description and qwen_description.strip():
                                             embed_context_parts.append(f"[Embedded image shows: {qwen_description}]")
@@ -413,7 +414,7 @@ async def on_message(message):
                             response_type = "dm_response" if is_dm else "server_response"
                             author_name = message.author.display_name
                             
-                            response = await query_ollama(
+                            response = await query_llama(
                                 enhanced_prompt,
                                 user_id=str(message.author.id),
                                 guild_id=guild_id,
@@ -454,7 +455,7 @@ async def on_message(message):
             guild_id = message.guild.id if message.guild else None
             response_type = "dm_response" if is_dm else "server_response"
             author_name = message.author.display_name
-            response = await query_ollama(
+            response = await query_llama(
                 prompt, 
                 user_id=str(message.author.id), 
                 guild_id=guild_id, 
diff --git a/bot/requirements.txt b/bot/requirements.txt
index 6f65151..2f5d79c 100644
--- a/bot/requirements.txt
+++ b/bot/requirements.txt
@@ -3,7 +3,6 @@ aiohttp
 requests
 langchain-core
 langchain-text-splitters
-langchain-ollama
 faiss-cpu
 langchain-community
 aiofiles
diff --git a/bot/utils/autonomous_v1_legacy.py b/bot/utils/autonomous_v1_legacy.py
index 5bbca8a..8735312 100644
--- a/bot/utils/autonomous_v1_legacy.py
+++ b/bot/utils/autonomous_v1_legacy.py
@@ -11,7 +11,7 @@ from discord import TextChannel
 from difflib import SequenceMatcher
 import globals
 from server_manager import server_manager
-from utils.llm import query_ollama
+from utils.llm import query_llama
 from utils.moods import MOOD_EMOJIS
 from utils.twitter_fetcher import fetch_miku_tweets
 from utils.image_handling import (
@@ -107,7 +107,7 @@ async def miku_say_something_general_for_server(guild_id: int):
     for attempt in range(3):  # retry up to 3 times if message is too similar
         # Use consistent user_id per guild for autonomous actions to enable conversation history
         # and prompt caching, rather than creating new IDs with timestamps
-        message = await query_ollama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
+        message = await query_llama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
         if not is_too_similar(message, _server_autonomous_messages[guild_id]):
             break
         print("🔁 Response was too similar to past messages, retrying...")
@@ -202,7 +202,7 @@ async def miku_engage_random_user_for_server(guild_id: int):
 
     try:
         # Use consistent user_id for engaging users to enable conversation history
-        message = await query_ollama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id)
+        message = await query_llama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id)
         await channel.send(f"{target.mention} {message}")
         _server_user_engagements[guild_id][target.id] = time.time()
         print(f"👤 Miku engaged {display_name} in server {server_config.guild_name}")
@@ -263,7 +263,7 @@ async def miku_detect_and_join_conversation_for_server(guild_id: int):
 
     try:
         # Use consistent user_id for joining conversations to enable conversation history
-        reply = await query_ollama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join")
+        reply = await query_llama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join")
         await channel.send(reply)
         print(f"💬 Miku joined an ongoing conversation in server {server_config.guild_name}")
     except Exception as e:
@@ -309,7 +309,7 @@ async def share_miku_tweet_for_server(guild_id: int):
             img_desc = await analyze_image_with_qwen(base64_img)
             base_prompt += f"\n\nThe image looks like this: {img_desc}"
 
-    miku_comment = await query_ollama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet")
+    miku_comment = await query_llama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet")
 
     # Post to Discord (convert to fxtwitter for better embeds)
     fx_tweet_url = tweet['url'].replace("twitter.com", "fxtwitter.com").replace("x.com", "fxtwitter.com")
@@ -342,7 +342,7 @@ async def handle_custom_prompt_for_server(guild_id: int, user_prompt: str):
 
     try:
         # Use consistent user_id for manual prompts to enable conversation history
-        message = await query_ollama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
+        message = await query_llama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
         await channel.send(message)
         print(f"🎤 Miku responded to custom prompt in server {server_config.guild_name}")
         
@@ -585,7 +585,7 @@ async def miku_autonomous_reaction_for_server(guild_id: int, force_message=None,
             f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text."
         )
         
-        emoji = await query_ollama(
+        emoji = await query_llama(
             prompt, 
             user_id=f"miku-reaction-{guild_id}",  # Use consistent user_id
             guild_id=guild_id, 
@@ -750,7 +750,7 @@ async def miku_autonomous_reaction_for_dm(user_id: int, force_message=None):
             f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text."
         )
         
-        emoji = await query_ollama(
+        emoji = await query_llama(
             prompt, 
             user_id=f"miku-dm-reaction-{user_id}",  # Use consistent user_id per DM user
             guild_id=None,  # DM doesn't have guild
diff --git a/bot/utils/autonomous_wip.py b/bot/utils/autonomous_wip.py
index 20905cf..bd317d6 100644
--- a/bot/utils/autonomous_wip.py
+++ b/bot/utils/autonomous_wip.py
@@ -10,7 +10,7 @@ from discord import Status
 from discord import TextChannel
 from difflib import SequenceMatcher
 import globals
-from utils.llm import query_ollama
+from utils.llm import query_llama
 from utils.moods import MOOD_EMOJIS
 from utils.twitter_fetcher import fetch_miku_tweets
 from utils.image_handling import analyze_image_with_qwen, download_and_encode_image
@@ -95,7 +95,7 @@ async def miku_say_something_general(guild_id, settings):
     )
 
     for attempt in range(3):  # retry up to 3 times if message is too similar
-        message = await query_ollama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
+        message = await query_llama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
         if not is_too_similar(message, _last_autonomous_messages):
             break
         print("🔁 Response was too similar to past messages, retrying...")
@@ -183,7 +183,7 @@ async def miku_engage_random_user(guild_id, settings):
     )
 
     try:
-        message = await query_ollama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
+        message = await query_llama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
         await channel.send(f"{target.mention} {message}")
         print(f"👤 Miku engaged {display_name}")
         _last_user_engagements[target.id] = time.time()
@@ -236,7 +236,7 @@ async def miku_detect_and_join_conversation():
     )
 
     try:
-        reply = await query_ollama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join")
+        reply = await query_llama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join")
         await channel.send(reply)
         print(f"💬 Miku joined an ongoing conversation.")
     except Exception as e:
@@ -275,7 +275,7 @@ async def share_miku_tweet(guild_id, settings):
         img_desc = await analyze_image_with_qwen(base64_img)
         base_prompt += f"\n\nThe image looks like this: {img_desc}"
 
-    miku_comment = await query_ollama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet")
+    miku_comment = await query_llama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet")
 
     # Post to Discord
     # Convert to fxtwitter for better embeds
@@ -302,7 +302,7 @@ async def handle_custom_prompt(user_prompt: str):
     )
 
     try:
-        message = await query_ollama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general")
+        message = await query_llama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general")
         await channel.send(message)
         print("🎤 Miku responded to custom prompt.")
         _last_autonomous_messages.append(message)
diff --git a/bot/utils/dm_interaction_analyzer.py b/bot/utils/dm_interaction_analyzer.py
index db1c564..15a4b88 100644
--- a/bot/utils/dm_interaction_analyzer.py
+++ b/bot/utils/dm_interaction_analyzer.py
@@ -9,7 +9,7 @@ from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 import discord
 import globals
-from utils.llm import query_ollama
+from utils.llm import query_llama
 from utils.dm_logger import dm_logger
 
 # Directories
@@ -167,7 +167,7 @@ Respond ONLY with the JSON object, no other text."""
 
         # Query the LLM
         try:
-            response = await query_ollama(
+            response = await query_llama(
                 analysis_prompt,
                 user_id=f"analyzer-{user_id}",
                 guild_id=None,
diff --git a/bot/utils/figurine_notifier.py b/bot/utils/figurine_notifier.py
index 3f8e01c..8c2bdfb 100644
--- a/bot/utils/figurine_notifier.py
+++ b/bot/utils/figurine_notifier.py
@@ -9,7 +9,7 @@ import globals
 
 from utils.twitter_fetcher import fetch_figurine_tweets_latest
 from utils.image_handling import analyze_image_with_qwen, download_and_encode_image
-from utils.llm import query_ollama
+from utils.llm import query_llama
 
 
 from utils.dm_logger import dm_logger
@@ -165,7 +165,7 @@ async def send_figurine_dm_to_user(client: discord.Client, user_id: int, tweet:
         base_prompt += "\n\nSign off as Miku with a cute emoji."
 
         # Query LLM in DM context (no guild_id -> DM mood rules apply)
-        miku_comment = await query_ollama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response")
+        miku_comment = await query_llama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response")
 
         dm = await user.create_dm()
         tweet_url = tweet.get("url", "")
diff --git a/bot/utils/image_generation.py b/bot/utils/image_generation.py
index 60b8c2c..27e5e43 100644
--- a/bot/utils/image_generation.py
+++ b/bot/utils/image_generation.py
@@ -13,7 +13,7 @@ import tempfile
 import time
 from typing import Optional, Tuple
 import globals
-from utils.llm import query_ollama
+from utils.llm import query_llama
 
 # Image generation detection patterns
 IMAGE_REQUEST_PATTERNS = [
@@ -299,7 +299,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
         response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!"
         
         response_type = "dm_response" if is_dm else "server_response"
-        initial_response = await query_ollama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+        initial_response = await query_llama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
         
         # Send initial response
         initial_msg = await message.channel.send(initial_response)
@@ -318,7 +318,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
                     
                     # Create a follow-up message about the completed image
                     completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!"
-                    completion_response = await query_ollama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+                    completion_response = await query_llama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
                     
                     await message.channel.send(completion_response, file=file)
                 
@@ -333,7 +333,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
             else:
                 # Image generation failed
                 error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later."
-                error_response = await query_ollama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+                error_response = await query_llama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
                 await message.channel.send(error_response)
                 
                 print(f"❌ Image generation failed for prompt: {prompt}")
diff --git a/bot/utils/kindness.py b/bot/utils/kindness.py
index 7ab9c77..731e946 100644
--- a/bot/utils/kindness.py
+++ b/bot/utils/kindness.py
@@ -2,7 +2,7 @@
 
 import random
 import globals
-from utils.llm import query_ollama  # Adjust path as needed
+from utils.llm import query_llama  # Adjust path as needed
 
 
 async def detect_and_react_to_kindness(message, after_reply=False, server_context=None):
@@ -37,7 +37,7 @@ async def detect_and_react_to_kindness(message, after_reply=False, server_contex
             "Answer with 'yes' or 'no' only.\n\n"
             f"Message: \"{message.content}\""
         )
-        result = await query_ollama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response")
+        result = await query_llama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response")
 
         if result.strip().lower().startswith("yes"):
             await message.add_reaction(emoji)
diff --git a/bot/utils/scheduled.py b/bot/utils/scheduled.py
index 5f091b9..30f9043 100644
--- a/bot/utils/scheduled.py
+++ b/bot/utils/scheduled.py
@@ -11,7 +11,7 @@ from discord import Status, ActivityType
 
 import globals
 from server_manager import server_manager
-from utils.llm import query_ollama
+from utils.llm import query_llama
 from utils.dm_interaction_analyzer import dm_analyzer
 
 BEDTIME_TRACKING_FILE = "last_bedtime_targets.json"
@@ -27,7 +27,7 @@ async def send_monday_video_for_server(guild_id: int):
 
     # Generate a motivational message
     prompt = "It's Miku Monday! Give me an energetic and heartfelt Miku Monday morning message to inspire someone for the week ahead."
-    response = await query_ollama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id)
+    response = await query_llama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id)
 
     video_url = "http://zip.koko210cloud.xyz/u/zEgU7Z.mp4"
 
@@ -158,7 +158,7 @@ async def send_bedtime_reminder_for_server(guild_id: int, client=None):
             f"Miku is currently feeling: {server_config.current_mood_description or 'neutral'}\nPlease word in a way that reflects this emotional tone."
         )
         
-        bedtime_message = await query_ollama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id)
+        bedtime_message = await query_llama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id)
 
         try:
             await channel.send(f"{chosen_one.mention} {bedtime_message}")
diff --git a/bot/utils/sentiment_analysis.py b/bot/utils/sentiment_analysis.py
index d1aabb4..b58e42e 100644
--- a/bot/utils/sentiment_analysis.py
+++ b/bot/utils/sentiment_analysis.py
@@ -1,8 +1,8 @@
-from utils.llm import query_ollama
+from utils.llm import query_llama
 
 async def analyze_sentiment(messages: list) -> tuple[str, float]:
     """
-    Analyze the sentiment of a conversation using Ollama
+    Analyze the sentiment of a conversation using llama.cpp
     Returns a tuple of (sentiment description, positivity score from 0-1)
     """
     # Combine the last few messages for context (up to 5)
@@ -29,7 +29,7 @@ Score: 0.85
 Response:"""
     
     try:
-        response = await query_ollama(prompt)
+        response = await query_llama(prompt)
         if not response or 'Score:' not in response:
             return "Could not analyze sentiment", 0.5
             
diff --git a/entrypoint.sh b/entrypoint.sh
deleted file mode 100755
index 1f8e206..0000000
--- a/entrypoint.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/sh
-
-# Start the server in the background
-ollama serve &
-
-# Wait until the server is reachable
-until curl -s http://localhost:11434 | grep -q 'Ollama is running'; do
-  echo 'Waiting for Ollama to start...'
-  sleep 2
-done
-
-# Pull the model
-ollama pull llama3.1
-ollama pull moondream
-
-# Wait for background jobs
-wait
diff --git a/readmes/VOICE_CHAT_IMPLEMENTATION.md b/readmes/VOICE_CHAT_IMPLEMENTATION.md
deleted file mode 100644
index 79772d1..0000000
--- a/readmes/VOICE_CHAT_IMPLEMENTATION.md
+++ /dev/null
@@ -1,222 +0,0 @@
-# Voice Chat Implementation with Fish.audio
-
-## Overview
-This document explains how to integrate Fish.audio TTS API with the Miku Discord bot for voice channel conversations.
-
-## Fish.audio API Setup
-
-### 1. Get API Key
-- Create account at https://fish.audio/
-- Get API key from: https://fish.audio/app/api-keys/
-
-### 2. Find Your Miku Voice Model ID
-- Browse voices at https://fish.audio/
-- Find your Miku voice model
-- Copy the model ID from the URL (e.g., `8ef4a238714b45718ce04243307c57a7`)
-- Or use the copy button on the voice page
-
-## API Usage for Discord Voice Chat
-
-### Basic TTS Request (REST API)
-```python
-import requests
-
-def generate_speech(text: str, voice_id: str, api_key: str) -> bytes:
-    """Generate speech using Fish.audio API"""
-    url = "https://api.fish.audio/v1/tts"
-    
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-        "model": "s1"  # Recommended model
-    }
-    
-    payload = {
-        "text": text,
-        "reference_id": voice_id,  # Your Miku voice model ID
-        "format": "mp3",           # or "pcm" for raw audio
-        "latency": "balanced",     # Lower latency for real-time
-        "temperature": 0.9,        # Controls randomness (0-1)
-        "normalize": True          # Reduces latency
-    }
-    
-    response = requests.post(url, json=payload, headers=headers)
-    return response.content  # Returns audio bytes
-```
-
-### Real-time Streaming (WebSocket - Recommended for VC)
-```python
-from fish_audio_sdk import WebSocketSession, TTSRequest
-
-def stream_to_discord(text: str, voice_id: str, api_key: str):
-    """Stream audio directly to Discord voice channel"""
-    ws_session = WebSocketSession(api_key)
-    
-    # Define text generator (can stream from LLM responses)
-    def text_stream():
-        # You can yield text as it's generated from your LLM
-        yield text
-    
-    with ws_session:
-        for audio_chunk in ws_session.tts(
-            TTSRequest(
-                text="",  # Empty when streaming
-                reference_id=voice_id,
-                format="pcm",        # Best for Discord
-                sample_rate=48000    # Discord uses 48kHz
-            ),
-            text_stream()
-        ):
-            # Send audio_chunk to Discord voice channel
-            yield audio_chunk
-```
-
-### Async Streaming (Better for Discord.py)
-```python
-from fish_audio_sdk import AsyncWebSocketSession, TTSRequest
-import asyncio
-
-async def async_stream_speech(text: str, voice_id: str, api_key: str):
-    """Async streaming for Discord.py integration"""
-    ws_session = AsyncWebSocketSession(api_key)
-    
-    async def text_stream():
-        yield text
-    
-    async with ws_session:
-        audio_buffer = bytearray()
-        async for audio_chunk in ws_session.tts(
-            TTSRequest(
-                text="",
-                reference_id=voice_id,
-                format="pcm",
-                sample_rate=48000
-            ),
-            text_stream()
-        ):
-            audio_buffer.extend(audio_chunk)
-    
-    return bytes(audio_buffer)
-```
-
-## Integration with Miku Bot
-
-### Required Dependencies
-Add to `requirements.txt`:
-```
-discord.py[voice]
-PyNaCl
-fish-audio-sdk
-speech_recognition  # For STT
-pydub  # Audio processing
-```
-
-### Environment Variables
-Add to your `.env` or docker-compose.yml:
-```bash
-FISH_API_KEY=your_api_key_here
-MIKU_VOICE_ID=your_miku_model_id_here
-```
-
-### Discord Voice Channel Flow
-```
-1. User speaks in VC
-   ↓
-2. Capture audio → Speech Recognition (STT)
-   ↓
-3. Convert speech to text
-   ↓
-4. Process with Miku's LLM (existing bot logic)
-   ↓
-5. Generate response text
-   ↓
-6. Send to Fish.audio TTS API
-   ↓
-7. Stream audio back to Discord VC
-```
-
-## Key Implementation Details
-
-### For Low Latency Voice Chat:
-- Use WebSocket streaming instead of REST API
-- Set `latency: "balanced"` in requests
-- Use `format: "pcm"` with `sample_rate: 48000` for Discord
-- Stream LLM responses as they generate (don't wait for full response)
-
-### Audio Format for Discord:
-- **Sample Rate**: 48000 Hz (Discord standard)
-- **Channels**: 1 (mono)
-- **Format**: PCM (raw audio) or Opus (compressed)
-- **Bit Depth**: 16-bit
-
-### Cost Considerations:
-- **TTS**: $15.00 per million UTF-8 bytes
-- Example: ~$0.015 for 1000 characters
-- Monitor usage at https://fish.audio/app/billing/
-
-### API Features Available:
-- **Temperature** (0-1): Controls speech randomness/expressiveness
-- **Prosody**: Control speed and volume
-  ```python
-  "prosody": {
-      "speed": 1.0,  # 0.5-2.0 range
-      "volume": 0    # -10 to 10 dB
-  }
-  ```
-- **Chunk Length** (100-300): Affects streaming speed
-- **Normalize**: Reduces latency but may affect number/date pronunciation
-
-## Example: Integrate with Existing LLM
-```python
-from utils.llm import query_ollama
-from fish_audio_sdk import AsyncWebSocketSession, TTSRequest
-
-async def miku_voice_response(user_message: str):
-    """Generate Miku's response and convert to speech"""
-    
-    # 1. Get text response from existing LLM
-    response_text = await query_ollama(
-        prompt=user_message,
-        model=globals.OLLAMA_MODEL
-    )
-    
-    # 2. Convert to speech
-    ws_session = AsyncWebSocketSession(globals.FISH_API_KEY)
-    
-    async def text_stream():
-        # Can stream as LLM generates if needed
-        yield response_text
-    
-    async with ws_session:
-        async for audio_chunk in ws_session.tts(
-            TTSRequest(
-                text="",
-                reference_id=globals.MIKU_VOICE_ID,
-                format="pcm",
-                sample_rate=48000
-            ),
-            text_stream()
-        ):
-            # Send to Discord voice channel
-            yield audio_chunk
-```
-
-## Rate Limits
-Check the current rate limits at:
-https://docs.fish.audio/developer-platform/models-pricing/pricing-and-rate-limits
-
-## Additional Resources
-- **API Reference**: https://docs.fish.audio/api-reference/introduction
-- **Python SDK**: https://github.com/fishaudio/fish-audio-python
-- **WebSocket Docs**: https://docs.fish.audio/sdk-reference/python/websocket
-- **Discord Community**: https://discord.com/invite/dF9Db2Tt3Y
-- **Support**: support@fish.audio
-
-## Next Steps
-1. Create Fish.audio account and get API key
-2. Find/select Miku voice model and get its ID
-3. Install required dependencies
-4. Implement voice channel connection in bot
-5. Add speech-to-text for user audio
-6. Connect Fish.audio TTS to output audio
-7. Test latency and quality