fix: protect server config from truncation and recover from Discord guilds
- Save servers_config.json atomically via temp file + fsync + rename - Keep .bak backup and auto-restore when main config is empty/corrupt - Add /servers/recover endpoint for manual recovery - Auto-recover basic server configs on startup when config is empty but bot is in guilds
This commit is contained in:
36
bot/bot.py
36
bot/bot.py
@@ -163,6 +163,42 @@ async def on_ready():
|
||||
# Start server-specific schedulers (includes DM mood rotation)
|
||||
server_manager.start_all_schedulers(globals.client)
|
||||
|
||||
# Auto-recover server config if it was lost/corrupted (e.g., disk full)
|
||||
if not server_manager.servers and globals.client.guilds:
|
||||
logger.warning("⚠️ Server config is empty but bot is in guilds — attempting auto-recovery")
|
||||
recovered = 0
|
||||
for guild in globals.client.guilds:
|
||||
text_channels = [ch for ch in guild.text_channels if ch.permissions_for(guild.me).send_messages]
|
||||
if not text_channels:
|
||||
text_channels = guild.text_channels
|
||||
if not text_channels:
|
||||
continue
|
||||
preferred = None
|
||||
for ch in text_channels:
|
||||
if ch.name.lower() in ("general", "chat", "main", "lounge", "general-chat"):
|
||||
preferred = ch
|
||||
break
|
||||
channel = preferred or text_channels[0]
|
||||
try:
|
||||
server_manager.add_server(
|
||||
guild_id=guild.id,
|
||||
guild_name=guild.name,
|
||||
autonomous_channel_id=channel.id,
|
||||
autonomous_channel_name=f"#{channel.name}",
|
||||
bedtime_channel_ids=[channel.id],
|
||||
enabled_features={"autonomous", "bedtime", "monday_video"}
|
||||
)
|
||||
recovered += 1
|
||||
logger.info(f"🔄 Auto-recovered server: {guild.name} (ID: {guild.id}) → #{channel.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to auto-recover server {guild.name}: {e}")
|
||||
if recovered > 0:
|
||||
logger.info(f"✅ Auto-recovered {recovered} server(s) — restarting schedulers")
|
||||
server_manager.stop_all_schedulers()
|
||||
server_manager.start_all_schedulers(globals.client)
|
||||
else:
|
||||
logger.warning("Auto-recovery found no recoverable servers")
|
||||
|
||||
# Start the global scheduler for other tasks
|
||||
globals.scheduler.start()
|
||||
|
||||
|
||||
@@ -136,3 +136,96 @@ def repair_server_config():
|
||||
return {"status": "ok", "message": "Server configuration repaired and saved"}
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=500, content={"status": "error", "message": f"Failed to repair configuration: {e}"})
|
||||
|
||||
|
||||
@router.post("/servers/recover")
|
||||
def recover_servers_from_discord():
|
||||
"""Auto-discover servers from Discord guilds and create config entries.
|
||||
|
||||
Use this when servers_config.json is lost/corrupted and you need to
|
||||
quickly restore basic server configurations. Each discovered guild gets
|
||||
a placeholder config using the first available text channel as the
|
||||
autonomous channel. You can then adjust channels via the dashboard.
|
||||
"""
|
||||
if not globals.client or not globals.client.is_ready():
|
||||
return JSONResponse(status_code=503, content={
|
||||
"status": "error",
|
||||
"message": "Discord client not ready — bot must be connected"
|
||||
})
|
||||
|
||||
if not globals.client.guilds:
|
||||
return JSONResponse(status_code=404, content={
|
||||
"status": "error",
|
||||
"message": "Bot is not in any Discord guilds"
|
||||
})
|
||||
|
||||
recovered = []
|
||||
skipped = []
|
||||
failed = []
|
||||
|
||||
for guild in globals.client.guilds:
|
||||
guild_id = guild.id
|
||||
guild_name = guild.name
|
||||
|
||||
# Skip if already configured
|
||||
if server_manager.get_server_config(guild_id):
|
||||
skipped.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "Already configured"})
|
||||
continue
|
||||
|
||||
# Find the first text channel (prefer one named "general" or "chat")
|
||||
text_channels = [ch for ch in guild.text_channels if ch.permissions_for(guild.me).send_messages]
|
||||
if not text_channels:
|
||||
# Try any text channel even without send permissions
|
||||
text_channels = guild.text_channels
|
||||
|
||||
if not text_channels:
|
||||
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "No text channels found"})
|
||||
continue
|
||||
|
||||
# Prefer "general" or "chat" channel, otherwise use the first one
|
||||
preferred = None
|
||||
for ch in text_channels:
|
||||
if ch.name.lower() in ("general", "chat", "main", "lounge", "general-chat"):
|
||||
preferred = ch
|
||||
break
|
||||
channel = preferred or text_channels[0]
|
||||
|
||||
try:
|
||||
success = server_manager.add_server(
|
||||
guild_id=guild_id,
|
||||
guild_name=guild_name,
|
||||
autonomous_channel_id=channel.id,
|
||||
autonomous_channel_name=f"#{channel.name}",
|
||||
bedtime_channel_ids=[channel.id],
|
||||
enabled_features={"autonomous", "bedtime", "monday_video"}
|
||||
)
|
||||
if success:
|
||||
recovered.append({
|
||||
"guild_id": str(guild_id),
|
||||
"guild_name": guild_name,
|
||||
"autonomous_channel": f"#{channel.name} ({channel.id})"
|
||||
})
|
||||
logger.info(f"Recovered server config: {guild_name} (ID: {guild_id}) → #{channel.name}")
|
||||
else:
|
||||
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "add_server returned False"})
|
||||
except Exception as e:
|
||||
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": str(e)})
|
||||
logger.error(f"Failed to recover server {guild_name}: {e}")
|
||||
|
||||
# Restart schedulers if we recovered any servers
|
||||
if recovered:
|
||||
try:
|
||||
server_manager.stop_all_schedulers()
|
||||
server_manager.start_all_schedulers(globals.client)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to restart schedulers after recovery: {e}")
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"recovered": recovered,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"total_guilds": len(globals.client.guilds),
|
||||
"note": "Recovered servers use the first text channel as autonomous channel. "
|
||||
"Use the Servers tab to adjust channel settings."
|
||||
}
|
||||
|
||||
@@ -79,23 +79,60 @@ class ServerManager:
|
||||
self.load_config()
|
||||
|
||||
def load_config(self):
|
||||
"""Load server configurations from file"""
|
||||
if os.path.exists(self.config_file):
|
||||
try:
|
||||
with open(self.config_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
for guild_id_str, server_data in data.items():
|
||||
guild_id = int(guild_id_str)
|
||||
self.servers[guild_id] = ServerConfig.from_dict(server_data)
|
||||
logger.info(f"Loaded config for server: {server_data['guild_name']} (ID: {guild_id})")
|
||||
|
||||
# After loading, check if we need to repair the config
|
||||
self.repair_config()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load server config: {e}")
|
||||
logger.info("Starting with zero servers — add servers via the API or dashboard")
|
||||
else:
|
||||
logger.info("No servers_config.json found — starting with zero servers")
|
||||
"""Load server configurations from file.
|
||||
|
||||
If the main file is missing, empty, or corrupt, falls back to the
|
||||
.bak backup file automatically.
|
||||
"""
|
||||
loaded = self._try_load_file(self.config_file)
|
||||
|
||||
if not loaded:
|
||||
# Try backup file
|
||||
bak_file = self.config_file + ".bak"
|
||||
if os.path.exists(bak_file):
|
||||
logger.warning(f"Main config is empty/corrupt, trying backup: {bak_file}")
|
||||
loaded = self._try_load_file(bak_file)
|
||||
if loaded:
|
||||
# Restore main config from backup
|
||||
logger.info("Successfully restored server config from backup")
|
||||
self.save_config()
|
||||
|
||||
if not loaded:
|
||||
logger.info("No valid servers_config.json found — starting with zero servers")
|
||||
|
||||
# After loading, check if we need to repair the config
|
||||
self.repair_config()
|
||||
|
||||
def _try_load_file(self, filepath: str) -> bool:
|
||||
"""Try to load server config from a file. Returns True if any servers loaded."""
|
||||
if not os.path.exists(filepath):
|
||||
return False
|
||||
|
||||
# Check for empty file
|
||||
if os.path.getsize(filepath) == 0:
|
||||
logger.warning(f"Config file is empty (0 bytes): {filepath}")
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not data or not isinstance(data, dict):
|
||||
logger.warning(f"Config file has invalid structure: {filepath}")
|
||||
return False
|
||||
|
||||
for guild_id_str, server_data in data.items():
|
||||
guild_id = int(guild_id_str)
|
||||
self.servers[guild_id] = ServerConfig.from_dict(server_data)
|
||||
logger.info(f"Loaded config for server: {server_data.get('guild_name', 'Unknown')} (ID: {guild_id})")
|
||||
|
||||
return len(self.servers) > 0
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse server config from {filepath}: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load server config from {filepath}: {e}")
|
||||
return False
|
||||
|
||||
def repair_config(self):
|
||||
"""Repair corrupted configuration data and save it back"""
|
||||
@@ -122,7 +159,11 @@ class ServerManager:
|
||||
logger.error(f"Failed to repair config: {e}")
|
||||
|
||||
def save_config(self):
|
||||
"""Save server configurations to file"""
|
||||
"""Save server configurations to file (atomic write with backup).
|
||||
|
||||
Uses write-to-temp-then-rename to prevent file corruption if the
|
||||
filesystem runs out of space mid-write. Also keeps a .bak backup.
|
||||
"""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(self.config_file), exist_ok=True)
|
||||
config_data = {}
|
||||
@@ -134,10 +175,42 @@ class ServerManager:
|
||||
server_dict['enabled_features'] = list(server_dict['enabled_features'])
|
||||
config_data[str(guild_id)] = server_dict
|
||||
|
||||
with open(self.config_file, "w", encoding="utf-8") as f:
|
||||
json.dump(config_data, f, indent=2)
|
||||
serialized = json.dumps(config_data, indent=2)
|
||||
|
||||
# Step 1: Write to a temporary file first
|
||||
tmp_file = self.config_file + ".tmp"
|
||||
with open(tmp_file, "w", encoding="utf-8") as f:
|
||||
f.write(serialized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno()) # Ensure data is written to disk
|
||||
|
||||
# Step 2: Keep a .bak copy of the current valid config (if any)
|
||||
bak_file = self.config_file + ".bak"
|
||||
if os.path.exists(self.config_file) and os.path.getsize(self.config_file) > 0:
|
||||
try:
|
||||
os.replace(self.config_file, bak_file)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not create backup of server config: {e}")
|
||||
|
||||
# Step 3: Atomically rename temp file to the real config file
|
||||
os.replace(tmp_file, self.config_file)
|
||||
|
||||
# Step 4: Write a second backup copy (paranoid double-backup)
|
||||
try:
|
||||
with open(bak_file, "w", encoding="utf-8") as f:
|
||||
f.write(serialized)
|
||||
except OSError:
|
||||
pass # Backup is best-effort
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save server config: {e}")
|
||||
# Clean up temp file if something went wrong
|
||||
tmp_file = self.config_file + ".tmp"
|
||||
if os.path.exists(tmp_file):
|
||||
try:
|
||||
os.remove(tmp_file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def add_server(self, guild_id: int, guild_name: str, autonomous_channel_id: int,
|
||||
autonomous_channel_name: str, bedtime_channel_ids: List[int] = None,
|
||||
|
||||
Reference in New Issue
Block a user