Remove all Ollama remnants and complete migration to llama.cpp

- Remove Ollama-specific files (Dockerfile.ollama, entrypoint.sh)
- Replace all query_ollama imports and calls with query_llama
- Remove langchain-ollama dependency from requirements.txt
- Update all utility files (autonomous, kindness, image_generation, etc.)
- Update README.md documentation references
- Maintain backward compatibility alias in llm.py
This commit is contained in:
2025-12-07 17:50:08 +02:00
parent a6da4c0c2e
commit d58be3b33e
15 changed files with 39 additions and 286 deletions

View File

@@ -1,8 +0,0 @@
FROM ollama/ollama
# Install curl so we can run health checks
USER root
RUN apt-get update && apt-get install -y curl && apt-get clean
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -423,7 +423,7 @@ Detailed documentation available in the `readmes/` directory:
- **[FACE_DETECTION_API_MIGRATION.md](readmes/FACE_DETECTION_API_MIGRATION.md)** - Face detection setup
- **[DM_ANALYSIS_FEATURE.md](readmes/DM_ANALYSIS_FEATURE.md)** - DM interaction analytics
- **[MOOD_SYSTEM_ANALYSIS.md](readmes/MOOD_SYSTEM_ANALYSIS.md)** - Mood system deep dive
- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - Ollama → llama.cpp migration guide
- **[QUICK_REFERENCE.md](readmes/QUICK_REFERENCE.md)** - llama.cpp setup and migration guide
---

View File

@@ -833,11 +833,11 @@ async def send_custom_prompt_dm(user_id: str, req: CustomPromptRequest):
return {"status": "error", "message": f"User {user_id} not found"}
# Use the LLM query function for DM context
from utils.llm import query_ollama
from utils.llm import query_llama
async def send_dm_custom_prompt():
try:
response = await query_ollama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response")
response = await query_llama(req.prompt, user_id=user_id, guild_id=None, response_type="dm_response")
await user.send(response)
print(f"✅ Custom DM prompt sent to user {user_id}: {req.prompt[:50]}...")

View File

@@ -34,7 +34,7 @@ from utils.moods import (
from utils.media import(
overlay_username_with_ffmpeg
)
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.autonomous import (
setup_autonomous_speaking,
load_last_sent_tweets,
@@ -100,7 +100,7 @@ async def on_ready():
# Start server-specific schedulers (includes DM mood rotation)
server_manager.start_all_schedulers(globals.client)
https://tea.koko210cloud.xyz/Koko210/miku-discord
# Start the global scheduler for other tasks
globals.scheduler.start()
@@ -367,7 +367,8 @@ async def on_message(message):
print(f"✅ Image downloaded, analyzing with vision model...")
# Analyze image
qwen_description = await analyze_image_with_qwen(base64_img)
truncated = (qwen_description[:50] + "...") if len(qwen_description) > 50 else qwen_description
truncated = (qwen_description[:50] + "...")
if not base64_img:if len(qwen_description) > 50 else qwen_description
print(f"📝 Vision analysis result: {truncated}")
if qwen_description and qwen_description.strip():
embed_context_parts.append(f"[Embedded image shows: {qwen_description}]")
@@ -413,7 +414,7 @@ async def on_message(message):
response_type = "dm_response" if is_dm else "server_response"
author_name = message.author.display_name
response = await query_ollama(
response = await query_llama(
enhanced_prompt,
user_id=str(message.author.id),
guild_id=guild_id,
@@ -454,7 +455,7 @@ async def on_message(message):
guild_id = message.guild.id if message.guild else None
response_type = "dm_response" if is_dm else "server_response"
author_name = message.author.display_name
response = await query_ollama(
response = await query_llama(
prompt,
user_id=str(message.author.id),
guild_id=guild_id,

View File

@@ -3,7 +3,6 @@ aiohttp
requests
langchain-core
langchain-text-splitters
langchain-ollama
faiss-cpu
langchain-community
aiofiles

View File

@@ -11,7 +11,7 @@ from discord import TextChannel
from difflib import SequenceMatcher
import globals
from server_manager import server_manager
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.moods import MOOD_EMOJIS
from utils.twitter_fetcher import fetch_miku_tweets
from utils.image_handling import (
@@ -107,7 +107,7 @@ async def miku_say_something_general_for_server(guild_id: int):
for attempt in range(3): # retry up to 3 times if message is too similar
# Use consistent user_id per guild for autonomous actions to enable conversation history
# and prompt caching, rather than creating new IDs with timestamps
message = await query_ollama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
message = await query_llama(prompt, user_id=f"miku-autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
if not is_too_similar(message, _server_autonomous_messages[guild_id]):
break
print("🔁 Response was too similar to past messages, retrying...")
@@ -202,7 +202,7 @@ async def miku_engage_random_user_for_server(guild_id: int):
try:
# Use consistent user_id for engaging users to enable conversation history
message = await query_ollama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id)
message = await query_llama(prompt, user_id=f"miku-engage-{guild_id}", guild_id=guild_id)
await channel.send(f"{target.mention} {message}")
_server_user_engagements[guild_id][target.id] = time.time()
print(f"👤 Miku engaged {display_name} in server {server_config.guild_name}")
@@ -263,7 +263,7 @@ async def miku_detect_and_join_conversation_for_server(guild_id: int):
try:
# Use consistent user_id for joining conversations to enable conversation history
reply = await query_ollama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join")
reply = await query_llama(prompt, user_id=f"miku-conversation-{guild_id}", guild_id=guild_id, response_type="conversation_join")
await channel.send(reply)
print(f"💬 Miku joined an ongoing conversation in server {server_config.guild_name}")
except Exception as e:
@@ -309,7 +309,7 @@ async def share_miku_tweet_for_server(guild_id: int):
img_desc = await analyze_image_with_qwen(base64_img)
base_prompt += f"\n\nThe image looks like this: {img_desc}"
miku_comment = await query_ollama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet")
miku_comment = await query_llama(base_prompt, user_id=f"autonomous-{guild_id}", guild_id=guild_id, response_type="autonomous_tweet")
# Post to Discord (convert to fxtwitter for better embeds)
fx_tweet_url = tweet['url'].replace("twitter.com", "fxtwitter.com").replace("x.com", "fxtwitter.com")
@@ -342,7 +342,7 @@ async def handle_custom_prompt_for_server(guild_id: int, user_prompt: str):
try:
# Use consistent user_id for manual prompts to enable conversation history
message = await query_ollama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
message = await query_llama(prompt, user_id=f"miku-manual-{guild_id}", guild_id=guild_id, response_type="autonomous_general")
await channel.send(message)
print(f"🎤 Miku responded to custom prompt in server {server_config.guild_name}")
@@ -585,7 +585,7 @@ async def miku_autonomous_reaction_for_server(guild_id: int, force_message=None,
f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text."
)
emoji = await query_ollama(
emoji = await query_llama(
prompt,
user_id=f"miku-reaction-{guild_id}", # Use consistent user_id
guild_id=guild_id,
@@ -750,7 +750,7 @@ async def miku_autonomous_reaction_for_dm(user_id: int, force_message=None):
f"Be bold! Use uncommon emojis! Respond with ONLY the emoji character itself, no text."
)
emoji = await query_ollama(
emoji = await query_llama(
prompt,
user_id=f"miku-dm-reaction-{user_id}", # Use consistent user_id per DM user
guild_id=None, # DM doesn't have guild

View File

@@ -10,7 +10,7 @@ from discord import Status
from discord import TextChannel
from difflib import SequenceMatcher
import globals
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.moods import MOOD_EMOJIS
from utils.twitter_fetcher import fetch_miku_tweets
from utils.image_handling import analyze_image_with_qwen, download_and_encode_image
@@ -95,7 +95,7 @@ async def miku_say_something_general(guild_id, settings):
)
for attempt in range(3): # retry up to 3 times if message is too similar
message = await query_ollama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
message = await query_llama(prompt, user_id=f"miku-general-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
if not is_too_similar(message, _last_autonomous_messages):
break
print("🔁 Response was too similar to past messages, retrying...")
@@ -183,7 +183,7 @@ async def miku_engage_random_user(guild_id, settings):
)
try:
message = await query_ollama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
message = await query_llama(prompt, user_id=f"miku-engage-{int(time.time())}", guild_id=guild_id, response_type="autonomous_general")
await channel.send(f"{target.mention} {message}")
print(f"👤 Miku engaged {display_name}")
_last_user_engagements[target.id] = time.time()
@@ -236,7 +236,7 @@ async def miku_detect_and_join_conversation():
)
try:
reply = await query_ollama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join")
reply = await query_llama(prompt, user_id=f"miku-chat-{int(time.time())}", guild_id=guild_id, response_type="conversation_join")
await channel.send(reply)
print(f"💬 Miku joined an ongoing conversation.")
except Exception as e:
@@ -275,7 +275,7 @@ async def share_miku_tweet(guild_id, settings):
img_desc = await analyze_image_with_qwen(base64_img)
base_prompt += f"\n\nThe image looks like this: {img_desc}"
miku_comment = await query_ollama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet")
miku_comment = await query_llama(base_prompt, user_id="autonomous", guild_id=guild_id, response_type="autonomous_tweet")
# Post to Discord
# Convert to fxtwitter for better embeds
@@ -302,7 +302,7 @@ async def handle_custom_prompt(user_prompt: str):
)
try:
message = await query_ollama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general")
message = await query_llama(prompt, user_id=f"manual-{int(time.time())}", guild_id=None, response_type="autonomous_general")
await channel.send(message)
print("🎤 Miku responded to custom prompt.")
_last_autonomous_messages.append(message)

View File

@@ -9,7 +9,7 @@ from datetime import datetime, timedelta
from typing import List, Dict, Optional
import discord
import globals
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.dm_logger import dm_logger
# Directories
@@ -167,7 +167,7 @@ Respond ONLY with the JSON object, no other text."""
# Query the LLM
try:
response = await query_ollama(
response = await query_llama(
analysis_prompt,
user_id=f"analyzer-{user_id}",
guild_id=None,

View File

@@ -9,7 +9,7 @@ import globals
from utils.twitter_fetcher import fetch_figurine_tweets_latest
from utils.image_handling import analyze_image_with_qwen, download_and_encode_image
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.dm_logger import dm_logger
@@ -165,7 +165,7 @@ async def send_figurine_dm_to_user(client: discord.Client, user_id: int, tweet:
base_prompt += "\n\nSign off as Miku with a cute emoji."
# Query LLM in DM context (no guild_id -> DM mood rules apply)
miku_comment = await query_ollama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response")
miku_comment = await query_llama(base_prompt, user_id=f"figurine_dm_{user_id}", guild_id=None, response_type="dm_response")
dm = await user.create_dm()
tweet_url = tweet.get("url", "")

View File

@@ -13,7 +13,7 @@ import tempfile
import time
from typing import Optional, Tuple
import globals
from utils.llm import query_ollama
from utils.llm import query_llama
# Image generation detection patterns
IMAGE_REQUEST_PATTERNS = [
@@ -299,7 +299,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!"
response_type = "dm_response" if is_dm else "server_response"
initial_response = await query_ollama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
initial_response = await query_llama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
# Send initial response
initial_msg = await message.channel.send(initial_response)
@@ -318,7 +318,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
# Create a follow-up message about the completed image
completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!"
completion_response = await query_ollama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
completion_response = await query_llama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
await message.channel.send(completion_response, file=file)
@@ -333,7 +333,7 @@ async def handle_image_generation_request(message, prompt: str) -> bool:
else:
# Image generation failed
error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later."
error_response = await query_ollama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
error_response = await query_llama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
await message.channel.send(error_response)
print(f"❌ Image generation failed for prompt: {prompt}")

View File

@@ -2,7 +2,7 @@
import random
import globals
from utils.llm import query_ollama # Adjust path as needed
from utils.llm import query_llama # Adjust path as needed
async def detect_and_react_to_kindness(message, after_reply=False, server_context=None):
@@ -37,7 +37,7 @@ async def detect_and_react_to_kindness(message, after_reply=False, server_contex
"Answer with 'yes' or 'no' only.\n\n"
f"Message: \"{message.content}\""
)
result = await query_ollama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response")
result = await query_llama(prompt, user_id="kindness-check", guild_id=None, response_type="dm_response")
if result.strip().lower().startswith("yes"):
await message.add_reaction(emoji)

View File

@@ -11,7 +11,7 @@ from discord import Status, ActivityType
import globals
from server_manager import server_manager
from utils.llm import query_ollama
from utils.llm import query_llama
from utils.dm_interaction_analyzer import dm_analyzer
BEDTIME_TRACKING_FILE = "last_bedtime_targets.json"
@@ -27,7 +27,7 @@ async def send_monday_video_for_server(guild_id: int):
# Generate a motivational message
prompt = "It's Miku Monday! Give me an energetic and heartfelt Miku Monday morning message to inspire someone for the week ahead."
response = await query_ollama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id)
response = await query_llama(prompt, user_id=f"weekly-motivation-{guild_id}", guild_id=guild_id)
video_url = "http://zip.koko210cloud.xyz/u/zEgU7Z.mp4"
@@ -158,7 +158,7 @@ async def send_bedtime_reminder_for_server(guild_id: int, client=None):
f"Miku is currently feeling: {server_config.current_mood_description or 'neutral'}\nPlease word in a way that reflects this emotional tone."
)
bedtime_message = await query_ollama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id)
bedtime_message = await query_llama(prompt, user_id=f"bedtime-{guild_id}", guild_id=guild_id)
try:
await channel.send(f"{chosen_one.mention} {bedtime_message}")

View File

@@ -1,8 +1,8 @@
from utils.llm import query_ollama
from utils.llm import query_llama
async def analyze_sentiment(messages: list) -> tuple[str, float]:
"""
Analyze the sentiment of a conversation using Ollama
Analyze the sentiment of a conversation using llama.cpp
Returns a tuple of (sentiment description, positivity score from 0-1)
"""
# Combine the last few messages for context (up to 5)
@@ -29,7 +29,7 @@ Score: 0.85
Response:"""
try:
response = await query_ollama(prompt)
response = await query_llama(prompt)
if not response or 'Score:' not in response:
return "Could not analyze sentiment", 0.5

View File

@@ -1,17 +0,0 @@
#!/bin/sh
# Start the server in the background
ollama serve &
# Wait until the server is reachable
until curl -s http://localhost:11434 | grep -q 'Ollama is running'; do
echo 'Waiting for Ollama to start...'
sleep 2
done
# Pull the model
ollama pull llama3.1
ollama pull moondream
# Wait for background jobs
wait

View File

@@ -1,222 +0,0 @@
# Voice Chat Implementation with Fish.audio
## Overview
This document explains how to integrate Fish.audio TTS API with the Miku Discord bot for voice channel conversations.
## Fish.audio API Setup
### 1. Get API Key
- Create account at https://fish.audio/
- Get API key from: https://fish.audio/app/api-keys/
### 2. Find Your Miku Voice Model ID
- Browse voices at https://fish.audio/
- Find your Miku voice model
- Copy the model ID from the URL (e.g., `8ef4a238714b45718ce04243307c57a7`)
- Or use the copy button on the voice page
## API Usage for Discord Voice Chat
### Basic TTS Request (REST API)
```python
import requests
def generate_speech(text: str, voice_id: str, api_key: str) -> bytes:
"""Generate speech using Fish.audio API"""
url = "https://api.fish.audio/v1/tts"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"model": "s1" # Recommended model
}
payload = {
"text": text,
"reference_id": voice_id, # Your Miku voice model ID
"format": "mp3", # or "pcm" for raw audio
"latency": "balanced", # Lower latency for real-time
"temperature": 0.9, # Controls randomness (0-1)
"normalize": True # Reduces latency
}
response = requests.post(url, json=payload, headers=headers)
return response.content # Returns audio bytes
```
### Real-time Streaming (WebSocket - Recommended for VC)
```python
from fish_audio_sdk import WebSocketSession, TTSRequest
def stream_to_discord(text: str, voice_id: str, api_key: str):
"""Stream audio directly to Discord voice channel"""
ws_session = WebSocketSession(api_key)
# Define text generator (can stream from LLM responses)
def text_stream():
# You can yield text as it's generated from your LLM
yield text
with ws_session:
for audio_chunk in ws_session.tts(
TTSRequest(
text="", # Empty when streaming
reference_id=voice_id,
format="pcm", # Best for Discord
sample_rate=48000 # Discord uses 48kHz
),
text_stream()
):
# Send audio_chunk to Discord voice channel
yield audio_chunk
```
### Async Streaming (Better for Discord.py)
```python
from fish_audio_sdk import AsyncWebSocketSession, TTSRequest
import asyncio
async def async_stream_speech(text: str, voice_id: str, api_key: str):
"""Async streaming for Discord.py integration"""
ws_session = AsyncWebSocketSession(api_key)
async def text_stream():
yield text
async with ws_session:
audio_buffer = bytearray()
async for audio_chunk in ws_session.tts(
TTSRequest(
text="",
reference_id=voice_id,
format="pcm",
sample_rate=48000
),
text_stream()
):
audio_buffer.extend(audio_chunk)
return bytes(audio_buffer)
```
## Integration with Miku Bot
### Required Dependencies
Add to `requirements.txt`:
```
discord.py[voice]
PyNaCl
fish-audio-sdk
speech_recognition # For STT
pydub # Audio processing
```
### Environment Variables
Add to your `.env` or docker-compose.yml:
```bash
FISH_API_KEY=your_api_key_here
MIKU_VOICE_ID=your_miku_model_id_here
```
### Discord Voice Channel Flow
```
1. User speaks in VC
2. Capture audio → Speech Recognition (STT)
3. Convert speech to text
4. Process with Miku's LLM (existing bot logic)
5. Generate response text
6. Send to Fish.audio TTS API
7. Stream audio back to Discord VC
```
## Key Implementation Details
### For Low Latency Voice Chat:
- Use WebSocket streaming instead of REST API
- Set `latency: "balanced"` in requests
- Use `format: "pcm"` with `sample_rate: 48000` for Discord
- Stream LLM responses as they generate (don't wait for full response)
### Audio Format for Discord:
- **Sample Rate**: 48000 Hz (Discord standard)
- **Channels**: 1 (mono)
- **Format**: PCM (raw audio) or Opus (compressed)
- **Bit Depth**: 16-bit
### Cost Considerations:
- **TTS**: $15.00 per million UTF-8 bytes
- Example: ~$0.015 for 1000 characters
- Monitor usage at https://fish.audio/app/billing/
### API Features Available:
- **Temperature** (0-1): Controls speech randomness/expressiveness
- **Prosody**: Control speed and volume
```python
"prosody": {
"speed": 1.0, # 0.5-2.0 range
"volume": 0 # -10 to 10 dB
}
```
- **Chunk Length** (100-300): Affects streaming speed
- **Normalize**: Reduces latency but may affect number/date pronunciation
## Example: Integrate with Existing LLM
```python
from utils.llm import query_ollama
from fish_audio_sdk import AsyncWebSocketSession, TTSRequest
async def miku_voice_response(user_message: str):
"""Generate Miku's response and convert to speech"""
# 1. Get text response from existing LLM
response_text = await query_ollama(
prompt=user_message,
model=globals.OLLAMA_MODEL
)
# 2. Convert to speech
ws_session = AsyncWebSocketSession(globals.FISH_API_KEY)
async def text_stream():
# Can stream as LLM generates if needed
yield response_text
async with ws_session:
async for audio_chunk in ws_session.tts(
TTSRequest(
text="",
reference_id=globals.MIKU_VOICE_ID,
format="pcm",
sample_rate=48000
),
text_stream()
):
# Send to Discord voice channel
yield audio_chunk
```
## Rate Limits
Check the current rate limits at:
https://docs.fish.audio/developer-platform/models-pricing/pricing-and-rate-limits
## Additional Resources
- **API Reference**: https://docs.fish.audio/api-reference/introduction
- **Python SDK**: https://github.com/fishaudio/fish-audio-python
- **WebSocket Docs**: https://docs.fish.audio/sdk-reference/python/websocket
- **Discord Community**: https://discord.com/invite/dF9Db2Tt3Y
- **Support**: support@fish.audio
## Next Steps
1. Create Fish.audio account and get API key
2. Find/select Miku voice model and get its ID
3. Install required dependencies
4. Implement voice channel connection in bot
5. Add speech-to-text for user audio
6. Connect Fish.audio TTS to output audio
7. Test latency and quality