fix: protect server config from truncation and recover from Discord guilds

- Save servers_config.json atomically via temp file + fsync + rename
- Keep .bak backup and auto-restore when main config is empty/corrupt
- Add /servers/recover endpoint for manual recovery
- Auto-recover basic server configs on startup when config is empty but bot is in guilds
This commit is contained in:
2026-06-11 20:37:04 +03:00
parent 486acb5c14
commit cfd5eb16f7
3 changed files with 222 additions and 20 deletions

View File

@@ -163,6 +163,42 @@ async def on_ready():
# Start server-specific schedulers (includes DM mood rotation) # Start server-specific schedulers (includes DM mood rotation)
server_manager.start_all_schedulers(globals.client) server_manager.start_all_schedulers(globals.client)
# Auto-recover server config if it was lost/corrupted (e.g., disk full)
if not server_manager.servers and globals.client.guilds:
logger.warning("⚠️ Server config is empty but bot is in guilds — attempting auto-recovery")
recovered = 0
for guild in globals.client.guilds:
text_channels = [ch for ch in guild.text_channels if ch.permissions_for(guild.me).send_messages]
if not text_channels:
text_channels = guild.text_channels
if not text_channels:
continue
preferred = None
for ch in text_channels:
if ch.name.lower() in ("general", "chat", "main", "lounge", "general-chat"):
preferred = ch
break
channel = preferred or text_channels[0]
try:
server_manager.add_server(
guild_id=guild.id,
guild_name=guild.name,
autonomous_channel_id=channel.id,
autonomous_channel_name=f"#{channel.name}",
bedtime_channel_ids=[channel.id],
enabled_features={"autonomous", "bedtime", "monday_video"}
)
recovered += 1
logger.info(f"🔄 Auto-recovered server: {guild.name} (ID: {guild.id}) → #{channel.name}")
except Exception as e:
logger.error(f"Failed to auto-recover server {guild.name}: {e}")
if recovered > 0:
logger.info(f"✅ Auto-recovered {recovered} server(s) — restarting schedulers")
server_manager.stop_all_schedulers()
server_manager.start_all_schedulers(globals.client)
else:
logger.warning("Auto-recovery found no recoverable servers")
# Start the global scheduler for other tasks # Start the global scheduler for other tasks
globals.scheduler.start() globals.scheduler.start()

View File

@@ -136,3 +136,96 @@ def repair_server_config():
return {"status": "ok", "message": "Server configuration repaired and saved"} return {"status": "ok", "message": "Server configuration repaired and saved"}
except Exception as e: except Exception as e:
return JSONResponse(status_code=500, content={"status": "error", "message": f"Failed to repair configuration: {e}"}) return JSONResponse(status_code=500, content={"status": "error", "message": f"Failed to repair configuration: {e}"})
@router.post("/servers/recover")
def recover_servers_from_discord():
"""Auto-discover servers from Discord guilds and create config entries.
Use this when servers_config.json is lost/corrupted and you need to
quickly restore basic server configurations. Each discovered guild gets
a placeholder config using the first available text channel as the
autonomous channel. You can then adjust channels via the dashboard.
"""
if not globals.client or not globals.client.is_ready():
return JSONResponse(status_code=503, content={
"status": "error",
"message": "Discord client not ready — bot must be connected"
})
if not globals.client.guilds:
return JSONResponse(status_code=404, content={
"status": "error",
"message": "Bot is not in any Discord guilds"
})
recovered = []
skipped = []
failed = []
for guild in globals.client.guilds:
guild_id = guild.id
guild_name = guild.name
# Skip if already configured
if server_manager.get_server_config(guild_id):
skipped.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "Already configured"})
continue
# Find the first text channel (prefer one named "general" or "chat")
text_channels = [ch for ch in guild.text_channels if ch.permissions_for(guild.me).send_messages]
if not text_channels:
# Try any text channel even without send permissions
text_channels = guild.text_channels
if not text_channels:
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "No text channels found"})
continue
# Prefer "general" or "chat" channel, otherwise use the first one
preferred = None
for ch in text_channels:
if ch.name.lower() in ("general", "chat", "main", "lounge", "general-chat"):
preferred = ch
break
channel = preferred or text_channels[0]
try:
success = server_manager.add_server(
guild_id=guild_id,
guild_name=guild_name,
autonomous_channel_id=channel.id,
autonomous_channel_name=f"#{channel.name}",
bedtime_channel_ids=[channel.id],
enabled_features={"autonomous", "bedtime", "monday_video"}
)
if success:
recovered.append({
"guild_id": str(guild_id),
"guild_name": guild_name,
"autonomous_channel": f"#{channel.name} ({channel.id})"
})
logger.info(f"Recovered server config: {guild_name} (ID: {guild_id}) → #{channel.name}")
else:
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": "add_server returned False"})
except Exception as e:
failed.append({"guild_id": str(guild_id), "guild_name": guild_name, "reason": str(e)})
logger.error(f"Failed to recover server {guild_name}: {e}")
# Restart schedulers if we recovered any servers
if recovered:
try:
server_manager.stop_all_schedulers()
server_manager.start_all_schedulers(globals.client)
except Exception as e:
logger.error(f"Failed to restart schedulers after recovery: {e}")
return {
"status": "ok",
"recovered": recovered,
"skipped": skipped,
"failed": failed,
"total_guilds": len(globals.client.guilds),
"note": "Recovered servers use the first text channel as autonomous channel. "
"Use the Servers tab to adjust channel settings."
}

View File

@@ -79,23 +79,60 @@ class ServerManager:
self.load_config() self.load_config()
def load_config(self): def load_config(self):
"""Load server configurations from file""" """Load server configurations from file.
if os.path.exists(self.config_file):
try: If the main file is missing, empty, or corrupt, falls back to the
with open(self.config_file, "r", encoding="utf-8") as f: .bak backup file automatically.
data = json.load(f) """
for guild_id_str, server_data in data.items(): loaded = self._try_load_file(self.config_file)
guild_id = int(guild_id_str)
self.servers[guild_id] = ServerConfig.from_dict(server_data) if not loaded:
logger.info(f"Loaded config for server: {server_data['guild_name']} (ID: {guild_id})") # Try backup file
bak_file = self.config_file + ".bak"
# After loading, check if we need to repair the config if os.path.exists(bak_file):
self.repair_config() logger.warning(f"Main config is empty/corrupt, trying backup: {bak_file}")
except Exception as e: loaded = self._try_load_file(bak_file)
logger.error(f"Failed to load server config: {e}") if loaded:
logger.info("Starting with zero servers — add servers via the API or dashboard") # Restore main config from backup
else: logger.info("Successfully restored server config from backup")
logger.info("No servers_config.json found — starting with zero servers") self.save_config()
if not loaded:
logger.info("No valid servers_config.json found — starting with zero servers")
# After loading, check if we need to repair the config
self.repair_config()
def _try_load_file(self, filepath: str) -> bool:
"""Try to load server config from a file. Returns True if any servers loaded."""
if not os.path.exists(filepath):
return False
# Check for empty file
if os.path.getsize(filepath) == 0:
logger.warning(f"Config file is empty (0 bytes): {filepath}")
return False
try:
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
if not data or not isinstance(data, dict):
logger.warning(f"Config file has invalid structure: {filepath}")
return False
for guild_id_str, server_data in data.items():
guild_id = int(guild_id_str)
self.servers[guild_id] = ServerConfig.from_dict(server_data)
logger.info(f"Loaded config for server: {server_data.get('guild_name', 'Unknown')} (ID: {guild_id})")
return len(self.servers) > 0
except json.JSONDecodeError as e:
logger.error(f"Failed to parse server config from {filepath}: {e}")
return False
except Exception as e:
logger.error(f"Failed to load server config from {filepath}: {e}")
return False
def repair_config(self): def repair_config(self):
"""Repair corrupted configuration data and save it back""" """Repair corrupted configuration data and save it back"""
@@ -122,7 +159,11 @@ class ServerManager:
logger.error(f"Failed to repair config: {e}") logger.error(f"Failed to repair config: {e}")
def save_config(self): def save_config(self):
"""Save server configurations to file""" """Save server configurations to file (atomic write with backup).
Uses write-to-temp-then-rename to prevent file corruption if the
filesystem runs out of space mid-write. Also keeps a .bak backup.
"""
try: try:
os.makedirs(os.path.dirname(self.config_file), exist_ok=True) os.makedirs(os.path.dirname(self.config_file), exist_ok=True)
config_data = {} config_data = {}
@@ -134,10 +175,42 @@ class ServerManager:
server_dict['enabled_features'] = list(server_dict['enabled_features']) server_dict['enabled_features'] = list(server_dict['enabled_features'])
config_data[str(guild_id)] = server_dict config_data[str(guild_id)] = server_dict
with open(self.config_file, "w", encoding="utf-8") as f: serialized = json.dumps(config_data, indent=2)
json.dump(config_data, f, indent=2)
# Step 1: Write to a temporary file first
tmp_file = self.config_file + ".tmp"
with open(tmp_file, "w", encoding="utf-8") as f:
f.write(serialized)
f.flush()
os.fsync(f.fileno()) # Ensure data is written to disk
# Step 2: Keep a .bak copy of the current valid config (if any)
bak_file = self.config_file + ".bak"
if os.path.exists(self.config_file) and os.path.getsize(self.config_file) > 0:
try:
os.replace(self.config_file, bak_file)
except OSError as e:
logger.warning(f"Could not create backup of server config: {e}")
# Step 3: Atomically rename temp file to the real config file
os.replace(tmp_file, self.config_file)
# Step 4: Write a second backup copy (paranoid double-backup)
try:
with open(bak_file, "w", encoding="utf-8") as f:
f.write(serialized)
except OSError:
pass # Backup is best-effort
except Exception as e: except Exception as e:
logger.error(f"Failed to save server config: {e}") logger.error(f"Failed to save server config: {e}")
# Clean up temp file if something went wrong
tmp_file = self.config_file + ".tmp"
if os.path.exists(tmp_file):
try:
os.remove(tmp_file)
except OSError:
pass
def add_server(self, guild_id: int, guild_name: str, autonomous_channel_id: int, def add_server(self, guild_id: int, guild_name: str, autonomous_channel_id: int,
autonomous_channel_name: str, bedtime_channel_ids: List[int] = None, autonomous_channel_name: str, bedtime_channel_ids: List[int] = None,