From d58be3b33e2cf3dc0a04ed567dae66cc1165fc3a Mon Sep 17 00:00:00 2001 From: koko210Serve Date: Sun, 7 Dec 2025 17:50:08 +0200 Subject: [PATCH] Remove all Ollama remnants and complete migration to llama.cpp - Remove Ollama-specific files (Dockerfile.ollama, entrypoint.sh) - Replace all query_ollama imports and calls with query_llama - Remove langchain-ollama dependency from requirements.txt - Update all utility files (autonomous, kindness, image_generation, etc.) - Update README.md documentation references - Maintain backward compatibility alias in llm.py --- Dockerfile.ollama | 8 - README.md | 2 +- bot/api.py | 4 +- bot/bot.py | 11 +- bot/requirements.txt | 1 - bot/utils/autonomous_v1_legacy.py | 16 +- bot/utils/autonomous_wip.py | 12 +- bot/utils/dm_interaction_analyzer.py | 4 +- bot/utils/figurine_notifier.py | 4 +- bot/utils/image_generation.py | 8 +- bot/utils/kindness.py | 4 +- bot/utils/scheduled.py | 6 +- bot/utils/sentiment_analysis.py | 6 +- entrypoint.sh | 17 -- readmes/VOICE_CHAT_IMPLEMENTATION.md | 222 --------------------------- 15 files changed, 39 insertions(+), 286 deletions(-) delete mode 100644 Dockerfile.ollama delete mode 100755 entrypoint.sh delete mode 100644 readmes/VOICE_CHAT_IMPLEMENTATION.md diff --git a/Dockerfile.ollama b/Dockerfile.ollama deleted file mode 100644 index a5592db..0000000 --- a/Dockerfile.ollama +++ /dev/null @@ -1,8 +0,0 @@ -FROM ollama/ollama - -# Install curl so we can run health checks -USER root -RUN apt-get update && apt-get install -y curl && apt-get clean - -COPY entrypoint.sh /entrypoint.sh -ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md index 2b54e0d..5296d38 100644 --- a/README.md +++ b/README.md @@ -423,7 +423,7 @@ Detailed documentation available in the `readmes/` directory: - **[FACE_DETECTION_API_MIGRATION.md](readmes/FACE_DETECTION_API_MIGRATION.md)** - Face detection setup - **[DM_ANALYSIS_FEATURE.md](readmes/DM_ANALYSIS_FEATURE.md)** - DM interaction analytics - **[MOOD_SYSTEM_ANALYSIS.md](readmes/MOOD_SYSTEM_ANALYSIS.md)** - Mood system deep dive -- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - Ollama → llama.cpp migration guide +- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - llama.cpp setup and migration guide --- diff --git a/bot/api.py b/bot/api.py index 06f82ad..66daecd 100644 --- a/bot/api.py +++ b/bot/api.py @@ -833,11 +833,11 @@ async def send_custom_prompt_dm(user_id: str, req: CustomPromptRequest): return {"status": "error", "message": f"User {user_id} not found"} # Use the LLM query function for DM context - from utils.llm import query_ollama + from utils.llm import query_llama async def send_dm_custom_prompt(): try: - response = await query_ollama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response") + response = await query_llama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response") await user.send(response) print(f"✅ Custom DM prompt sent to user {user_id}: {req.prompt[:50]}...") diff --git a/bot/bot.py b/bot/bot.py index a4fc565..6bf98a5 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -34,7 +34,7 @@ from utils.moods import ( from utils.media import( overlay_username_with_ffmpeg ) -from utils.llm import query_ollama +from utils.llm import query_llama from utils.autonomous import ( setup_autonomous_speaking, load_last_sent_tweets, @@ -100,7 +100,7 @@ async def on_ready(): # Start server-specific schedulers (includes DM mood rotation) server_manager.start_all_schedulers(globals.client) - +https://tea.koko210cloud.xyz/Koko210/miku-discord # Start the global scheduler for other tasks globals.scheduler.start() @@ -367,7 +367,8 @@ async def on_message(message): print(f"✅ Image downloaded, analyzing with vision model...") # Analyze image qwen_description = await analyze_image_with_qwen(base64_img) - truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description + truncated = (qwen_description[:50] + "...") + if not base64_img:if len(qwen_description) > 50 else qwen_description print(f"📝 Vision analysis result: {truncated}") if qwen_description and qwen_description.strip(): embed_context_parts.append(f"[Embedded image shows: {qwen_description}]") @@ -413,7 +414,7 @@ async def on_message(message): response_type = "dm_response" if is_dm else "server_response" author_name = message.author.display_name - response = await query_ollama( + response = await query_llama( enhanced_prompt, user_id=str(message.author.id), guild_id=guild_id, @@ -454,7 +455,7 @@ async def on_message(message): guild_id = message.guild.id if message.guild else None response_type = "dm_response" if is_dm else "server_response" author_name = message.author.display_name - response = await query_ollama( + response = await query_llama( prompt, user_id=str(message.author.id), guild_id=guild_id, diff --git a/bot/requirements.txt b/bot/requirements.txt index 6f65151..2f5d79c 100644 --- a/bot/requirements.txt +++ b/bot/requirements.txt @@ -3,7 +3,6 @@ aiohttp requests langchain-core langchain-text-splitters -langchain-ollama faiss-cpu langchain-community aiofiles diff --git a/bot/utils/autonomous_v1_legacy.py b/bot/utils/autonomous_v1_legacy.py index 5bbca8a..8735312 100644 --- a/bot/utils/autonomous_v1_legacy.py +++ b/bot/utils/autonomous_v1_legacy.py @@ -11,7 +11,7 @@ from discord import TextChannel from difflib import SequenceMatcher import globals from server_manager import server_manager -from utils.llm import query_ollama +from utils.llm import query_llama from utils.moods import MOOD_EMOJIS from utils.twitter_fetcher import fetch_miku_tweets from utils.image_handling import ( @@ -107,7 +107,7 @@ async def miku_say_something_general_for_server(guild_id: int): for attempt in range(3): # retry up to 3 times if message is too similar # Use consistent user_id per guild for autonomous actions to enable conversation history # and prompt caching, rather than creating new IDs with timestamps - message = await query_ollama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general") + message = await query_llama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general") if not is_too_similar(message, _server_autonomous_messages[guild_id]): break print("🔁 Response was too similar to past messages, retrying...") @@ -202,7 +202,7 @@ async def miku_engage_random_user_for_server(guild_id: int): try: # Use consistent user_id for engaging users to enable conversation history - message = await query_ollama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id) + message = await query_llama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id) await channel.send(f"{target.mention} {message}") _server_user_engagements[guild_id][target.id] = time.time() print(f"👤 Miku engaged {display_name} in server {server_config.guild_name}") @@ -263,7 +263,7 @@ async def miku_detect_and_join_conversation_for_server(guild_id: int): try: # Use consistent user_id for joining conversations to enable conversation history - reply = await query_ollama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join") + reply = await query_llama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join") await channel.send(reply) print(f"💬 Miku joined an ongoing conversation in server {server_config.guild_name}") except Exception as e: @@ -309,7 +309,7 @@ async def share_miku_tweet_for_server(guild_id: int): img_desc = await analyze_image_with_qwen(base64_img) base_prompt += f"\n\nThe image looks like this: {img_desc}" - miku_comment = await query_ollama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet") + miku_comment = await query_llama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet") # Post to Discord (convert to fxtwitter for better embeds) fx_tweet_url = tweet['url'].replace("twitter.com", "fxtwitter.com").replace("x.com", "fxtwitter.com") @@ -342,7 +342,7 @@ async def handle_custom_prompt_for_server(guild_id: int, user_prompt: str): try: # Use consistent user_id for manual prompts to enable conversation history - message = await query_ollama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general") + message = await query_llama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general") await channel.send(message) print(f"🎤 Miku responded to custom prompt in server {server_config.guild_name}") @@ -585,7 +585,7 @@ async def miku_autonomous_reaction_for_server(guild_id: int, force_message=None, f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text." ) - emoji = await query_ollama( + emoji = await query_llama( prompt, user_id=f"miku-reaction-{guild_id}", # Use consistent user_id guild_id=guild_id, @@ -750,7 +750,7 @@ async def miku_autonomous_reaction_for_dm(user_id: int, force_message=None): f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text." ) - emoji = await query_ollama( + emoji = await query_llama( prompt, user_id=f"miku-dm-reaction-{user_id}", # Use consistent user_id per DM user guild_id=None, # DM doesn't have guild diff --git a/bot/utils/autonomous_wip.py b/bot/utils/autonomous_wip.py index 20905cf..bd317d6 100644 --- a/bot/utils/autonomous_wip.py +++ b/bot/utils/autonomous_wip.py @@ -10,7 +10,7 @@ from discord import Status from discord import TextChannel from difflib import SequenceMatcher import globals -from utils.llm import query_ollama +from utils.llm import query_llama from utils.moods import MOOD_EMOJIS from utils.twitter_fetcher import fetch_miku_tweets from utils.image_handling import analyze_image_with_qwen, download_and_encode_image @@ -95,7 +95,7 @@ async def miku_say_something_general(guild_id, settings): ) for attempt in range(3): # retry up to 3 times if message is too similar - message = await query_ollama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general") + message = await query_llama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general") if not is_too_similar(message, _last_autonomous_messages): break print("🔁 Response was too similar to past messages, retrying...") @@ -183,7 +183,7 @@ async def miku_engage_random_user(guild_id, settings): ) try: - message = await query_ollama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general") + message = await query_llama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general") await channel.send(f"{target.mention} {message}") print(f"👤 Miku engaged {display_name}") _last_user_engagements[target.id] = time.time() @@ -236,7 +236,7 @@ async def miku_detect_and_join_conversation(): ) try: - reply = await query_ollama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join") + reply = await query_llama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join") await channel.send(reply) print(f"💬 Miku joined an ongoing conversation.") except Exception as e: @@ -275,7 +275,7 @@ async def share_miku_tweet(guild_id, settings): img_desc = await analyze_image_with_qwen(base64_img) base_prompt += f"\n\nThe image looks like this: {img_desc}" - miku_comment = await query_ollama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet") + miku_comment = await query_llama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet") # Post to Discord # Convert to fxtwitter for better embeds @@ -302,7 +302,7 @@ async def handle_custom_prompt(user_prompt: str): ) try: - message = await query_ollama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general") + message = await query_llama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general") await channel.send(message) print("🎤 Miku responded to custom prompt.") _last_autonomous_messages.append(message) diff --git a/bot/utils/dm_interaction_analyzer.py b/bot/utils/dm_interaction_analyzer.py index db1c564..15a4b88 100644 --- a/bot/utils/dm_interaction_analyzer.py +++ b/bot/utils/dm_interaction_analyzer.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta from typing import List, Dict, Optional import discord import globals -from utils.llm import query_ollama +from utils.llm import query_llama from utils.dm_logger import dm_logger # Directories @@ -167,7 +167,7 @@ Respond ONLY with the JSON object, no other text.""" # Query the LLM try: - response = await query_ollama( + response = await query_llama( analysis_prompt, user_id=f"analyzer-{user_id}", guild_id=None, diff --git a/bot/utils/figurine_notifier.py b/bot/utils/figurine_notifier.py index 3f8e01c..8c2bdfb 100644 --- a/bot/utils/figurine_notifier.py +++ b/bot/utils/figurine_notifier.py @@ -9,7 +9,7 @@ import globals from utils.twitter_fetcher import fetch_figurine_tweets_latest from utils.image_handling import analyze_image_with_qwen, download_and_encode_image -from utils.llm import query_ollama +from utils.llm import query_llama from utils.dm_logger import dm_logger @@ -165,7 +165,7 @@ async def send_figurine_dm_to_user(client: discord.Client, user_id: int, tweet: base_prompt += "\n\nSign off as Miku with a cute emoji." # Query LLM in DM context (no guild_id -> DM mood rules apply) - miku_comment = await query_ollama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response") + miku_comment = await query_llama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response") dm = await user.create_dm() tweet_url = tweet.get("url", "") diff --git a/bot/utils/image_generation.py b/bot/utils/image_generation.py index 60b8c2c..27e5e43 100644 --- a/bot/utils/image_generation.py +++ b/bot/utils/image_generation.py @@ -13,7 +13,7 @@ import tempfile import time from typing import Optional, Tuple import globals -from utils.llm import query_ollama +from utils.llm import query_llama # Image generation detection patterns IMAGE_REQUEST_PATTERNS = [ @@ -299,7 +299,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool: response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!" response_type = "dm_response" if is_dm else "server_response" - initial_response = await query_ollama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) + initial_response = await query_llama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) # Send initial response initial_msg = await message.channel.send(initial_response) @@ -318,7 +318,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool: # Create a follow-up message about the completed image completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!" - completion_response = await query_ollama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) + completion_response = await query_llama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) await message.channel.send(completion_response, file=file) @@ -333,7 +333,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool: else: # Image generation failed error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later." - error_response = await query_ollama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) + error_response = await query_llama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type) await message.channel.send(error_response) print(f"❌ Image generation failed for prompt: {prompt}") diff --git a/bot/utils/kindness.py b/bot/utils/kindness.py index 7ab9c77..731e946 100644 --- a/bot/utils/kindness.py +++ b/bot/utils/kindness.py @@ -2,7 +2,7 @@ import random import globals -from utils.llm import query_ollama # Adjust path as needed +from utils.llm import query_llama # Adjust path as needed async def detect_and_react_to_kindness(message, after_reply=False, server_context=None): @@ -37,7 +37,7 @@ async def detect_and_react_to_kindness(message, after_reply=False, server_contex "Answer with 'yes' or 'no' only.\n\n" f"Message: \"{message.content}\"" ) - result = await query_ollama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response") + result = await query_llama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response") if result.strip().lower().startswith("yes"): await message.add_reaction(emoji) diff --git a/bot/utils/scheduled.py b/bot/utils/scheduled.py index 5f091b9..30f9043 100644 --- a/bot/utils/scheduled.py +++ b/bot/utils/scheduled.py @@ -11,7 +11,7 @@ from discord import Status, ActivityType import globals from server_manager import server_manager -from utils.llm import query_ollama +from utils.llm import query_llama from utils.dm_interaction_analyzer import dm_analyzer BEDTIME_TRACKING_FILE = "last_bedtime_targets.json" @@ -27,7 +27,7 @@ async def send_monday_video_for_server(guild_id: int): # Generate a motivational message prompt = "It's Miku Monday! Give me an energetic and heartfelt Miku Monday morning message to inspire someone for the week ahead." - response = await query_ollama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id) + response = await query_llama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id) video_url = "http://zip.koko210cloud.xyz/u/zEgU7Z.mp4" @@ -158,7 +158,7 @@ async def send_bedtime_reminder_for_server(guild_id: int, client=None): f"Miku is currently feeling: {server_config.current_mood_description or 'neutral'}\nPlease word in a way that reflects this emotional tone." ) - bedtime_message = await query_ollama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id) + bedtime_message = await query_llama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id) try: await channel.send(f"{chosen_one.mention} {bedtime_message}") diff --git a/bot/utils/sentiment_analysis.py b/bot/utils/sentiment_analysis.py index d1aabb4..b58e42e 100644 --- a/bot/utils/sentiment_analysis.py +++ b/bot/utils/sentiment_analysis.py @@ -1,8 +1,8 @@ -from utils.llm import query_ollama +from utils.llm import query_llama async def analyze_sentiment(messages: list) -> tuple[str, float]: """ - Analyze the sentiment of a conversation using Ollama + Analyze the sentiment of a conversation using llama.cpp Returns a tuple of (sentiment description, positivity score from 0-1) """ # Combine the last few messages for context (up to 5) @@ -29,7 +29,7 @@ Score: 0.85 Response:""" try: - response = await query_ollama(prompt) + response = await query_llama(prompt) if not response or 'Score:' not in response: return "Could not analyze sentiment", 0.5 diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100755 index 1f8e206..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -# Start the server in the background -ollama serve & - -# Wait until the server is reachable -until curl -s http://localhost:11434 | grep -q 'Ollama is running'; do - echo 'Waiting for Ollama to start...' - sleep 2 -done - -# Pull the model -ollama pull llama3.1 -ollama pull moondream - -# Wait for background jobs -wait diff --git a/readmes/VOICE_CHAT_IMPLEMENTATION.md b/readmes/VOICE_CHAT_IMPLEMENTATION.md deleted file mode 100644 index 79772d1..0000000 --- a/readmes/VOICE_CHAT_IMPLEMENTATION.md +++ /dev/null @@ -1,222 +0,0 @@ -# Voice Chat Implementation with Fish.audio - -## Overview -This document explains how to integrate Fish.audio TTS API with the Miku Discord bot for voice channel conversations. - -## Fish.audio API Setup - -### 1. Get API Key -- Create account at https://fish.audio/ -- Get API key from: https://fish.audio/app/api-keys/ - -### 2. Find Your Miku Voice Model ID -- Browse voices at https://fish.audio/ -- Find your Miku voice model -- Copy the model ID from the URL (e.g., `8ef4a238714b45718ce04243307c57a7`) -- Or use the copy button on the voice page - -## API Usage for Discord Voice Chat - -### Basic TTS Request (REST API) -```python -import requests - -def generate_speech(text: str, voice_id: str, api_key: str) -> bytes: - """Generate speech using Fish.audio API""" - url = "https://api.fish.audio/v1/tts" - - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - "model": "s1" # Recommended model - } - - payload = { - "text": text, - "reference_id": voice_id, # Your Miku voice model ID - "format": "mp3", # or "pcm" for raw audio - "latency": "balanced", # Lower latency for real-time - "temperature": 0.9, # Controls randomness (0-1) - "normalize": True # Reduces latency - } - - response = requests.post(url, json=payload, headers=headers) - return response.content # Returns audio bytes -``` - -### Real-time Streaming (WebSocket - Recommended for VC) -```python -from fish_audio_sdk import WebSocketSession, TTSRequest - -def stream_to_discord(text: str, voice_id: str, api_key: str): - """Stream audio directly to Discord voice channel""" - ws_session = WebSocketSession(api_key) - - # Define text generator (can stream from LLM responses) - def text_stream(): - # You can yield text as it's generated from your LLM - yield text - - with ws_session: - for audio_chunk in ws_session.tts( - TTSRequest( - text="", # Empty when streaming - reference_id=voice_id, - format="pcm", # Best for Discord - sample_rate=48000 # Discord uses 48kHz - ), - text_stream() - ): - # Send audio_chunk to Discord voice channel - yield audio_chunk -``` - -### Async Streaming (Better for Discord.py) -```python -from fish_audio_sdk import AsyncWebSocketSession, TTSRequest -import asyncio - -async def async_stream_speech(text: str, voice_id: str, api_key: str): - """Async streaming for Discord.py integration""" - ws_session = AsyncWebSocketSession(api_key) - - async def text_stream(): - yield text - - async with ws_session: - audio_buffer = bytearray() - async for audio_chunk in ws_session.tts( - TTSRequest( - text="", - reference_id=voice_id, - format="pcm", - sample_rate=48000 - ), - text_stream() - ): - audio_buffer.extend(audio_chunk) - - return bytes(audio_buffer) -``` - -## Integration with Miku Bot - -### Required Dependencies -Add to `requirements.txt`: -``` -discord.py[voice] -PyNaCl -fish-audio-sdk -speech_recognition # For STT -pydub # Audio processing -``` - -### Environment Variables -Add to your `.env` or docker-compose.yml: -```bash -FISH_API_KEY=your_api_key_here -MIKU_VOICE_ID=your_miku_model_id_here -``` - -### Discord Voice Channel Flow -``` -1. User speaks in VC - ↓ -2. Capture audio → Speech Recognition (STT) - ↓ -3. Convert speech to text - ↓ -4. Process with Miku's LLM (existing bot logic) - ↓ -5. Generate response text - ↓ -6. Send to Fish.audio TTS API - ↓ -7. Stream audio back to Discord VC -``` - -## Key Implementation Details - -### For Low Latency Voice Chat: -- Use WebSocket streaming instead of REST API -- Set `latency: "balanced"` in requests -- Use `format: "pcm"` with `sample_rate: 48000` for Discord -- Stream LLM responses as they generate (don't wait for full response) - -### Audio Format for Discord: -- **Sample Rate**: 48000 Hz (Discord standard) -- **Channels**: 1 (mono) -- **Format**: PCM (raw audio) or Opus (compressed) -- **Bit Depth**: 16-bit - -### Cost Considerations: -- **TTS**: $15.00 per million UTF-8 bytes -- Example: ~$0.015 for 1000 characters -- Monitor usage at https://fish.audio/app/billing/ - -### API Features Available: -- **Temperature** (0-1): Controls speech randomness/expressiveness -- **Prosody**: Control speed and volume - ```python - "prosody": { - "speed": 1.0, # 0.5-2.0 range - "volume": 0 # -10 to 10 dB - } - ``` -- **Chunk Length** (100-300): Affects streaming speed -- **Normalize**: Reduces latency but may affect number/date pronunciation - -## Example: Integrate with Existing LLM -```python -from utils.llm import query_ollama -from fish_audio_sdk import AsyncWebSocketSession, TTSRequest - -async def miku_voice_response(user_message: str): - """Generate Miku's response and convert to speech""" - - # 1. Get text response from existing LLM - response_text = await query_ollama( - prompt=user_message, - model=globals.OLLAMA_MODEL - ) - - # 2. Convert to speech - ws_session = AsyncWebSocketSession(globals.FISH_API_KEY) - - async def text_stream(): - # Can stream as LLM generates if needed - yield response_text - - async with ws_session: - async for audio_chunk in ws_session.tts( - TTSRequest( - text="", - reference_id=globals.MIKU_VOICE_ID, - format="pcm", - sample_rate=48000 - ), - text_stream() - ): - # Send to Discord voice channel - yield audio_chunk -``` - -## Rate Limits -Check the current rate limits at: -https://docs.fish.audio/developer-platform/models-pricing/pricing-and-rate-limits - -## Additional Resources -- **API Reference**: https://docs.fish.audio/api-reference/introduction -- **Python SDK**: https://github.com/fishaudio/fish-audio-python -- **WebSocket Docs**: https://docs.fish.audio/sdk-reference/python/websocket -- **Discord Community**: https://discord.com/invite/dF9Db2Tt3Y -- **Support**: support@fish.audio - -## Next Steps -1. Create Fish.audio account and get API key -2. Find/select Miku voice model and get its ID -3. Install required dependencies -4. Implement voice channel connection in bot -5. Add speech-to-text for user audio -6. Connect Fish.audio TTS to output audio -7. Test latency and quality