Added Japanese and Bulgarian addressing

2026-01-30 21:34:24 +02:00
parent 38a986658d
commit 7368ef0cd5
1 changed files with 59 additions and 35 deletions
--- a/bot/utils/core.py
+++ b/bot/utils/core.py
@@ -40,15 +40,16 @@ async def is_miku_addressed(message) -> bool:
        except Exception as e:
            logger.warning(f"Could not fetch referenced message: {e}")

-    cleaned = message.content.strip().lower()
+    cleaned = message.content.strip()
+    cleaned_lower = cleaned.lower()
    
    # Base names for Miku in different scripts
    base_names = [
        'miku', 'мику', 'みく', 'ミク', '未来'
    ]
    
-    # Japanese honorifics - all scripts combined for simpler matching
-    honorifics_all_scripts = [
+    # Japanese honorifics - all scripts combined
+    honorifics = [
        # Latin
        'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika', 
        'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
@@ -59,51 +60,74 @@ async def is_miku_addressed(message) -> bool:
        'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
        'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
        # Cyrillic
-        'чан', 'сан', 'кун', 'ньян', 'химе', 'тан', 'чин', 'хэйка',
-        'дэнка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сэнсэй', 'сэнпай', 'жо'
+        'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
+        'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
    ]
    
-    # Optional o- prefix in different scripts
+    # o- prefix variants
    o_prefixes = ['o-', 'о-', 'お', 'オ']
    
-    # Strategy: Just check if any base name appears (case insensitive for latin/cyrillic)
-    # Then allow any honorific to optionally follow
+    # Build all possible name variations to check
+    name_patterns = []
    
    for base in base_names:
        base_lower = base.lower()
+        base_escaped = re.escape(base_lower)
        
-        # Check for just the base name
-        if re.search(r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])', cleaned):
-            return True
+        # Base name alone
+        name_patterns.append(base_escaped)
        
-        # Check with optional o- prefix
-        for prefix in o_prefixes:
-            prefix_pattern = prefix.lower() if prefix != 'お' and prefix != 'オ' else prefix
-            pattern = r'(?<![a-zа-яa-я\w])' + re.escape(prefix_pattern) + r'\s*' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])'
-            if re.search(pattern, cleaned):
-                return True
-        
-        # Check base name followed by any honorific (no spacing requirement to catch mixed script)
-        for honorific in honorifics_all_scripts:
+        # With honorifics (allows optional dash/space between)
+        for honorific in honorifics:
            honorific_lower = honorific.lower()
-            # Allow optional dash, space, or no separator between name and honorific
-            pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) + 
-                      r'[-\s]*' + re.escape(honorific_lower) + 
-                      r'(?![a-zа-яa-я\w])')
-            if re.search(pattern, cleaned):
-                return True
+            honorific_escaped = re.escape(honorific_lower)
+            # Build pattern: base + optional [dash or space] + honorific
+            name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
        
-        # Check with o- prefix + base + honorific
+        # With o- prefix
        for prefix in o_prefixes:
-            prefix_lower = prefix.lower() if prefix != 'お' and prefix != 'オ' else prefix
-            for honorific in honorifics_all_scripts:
+            prefix_lower = prefix.lower()
+            prefix_escaped = re.escape(prefix_lower)
+            # o-prefix + optional space + base
+            name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
+            
+            # With o- prefix + honorific
+            for honorific in honorifics:
                honorific_lower = honorific.lower()
-                pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(prefix_lower) + 
-                          r'[-\s]*' + re.escape(base_lower) + 
-                          r'[-\s]*' + re.escape(honorific_lower) + 
-                          r'(?![a-zа-яa-я\w])')
-                if re.search(pattern, cleaned):
-                    return True
+                honorific_escaped = re.escape(honorific_lower)
+                # o-prefix + space + base + dash/space + honorific
+                name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
+    
+    # Check all patterns - she must be "addressed" not just mentioned
+    for pattern in name_patterns:
+        try:
+            # Pattern 1: Start of message + punctuation/end
+            # "Miku, ..." or "みく！" or "ミクちゃん、..."
+            start_p = r'^' + pattern + r'(?:[,，、!！?？.。\s]+|$)'
+            if re.search(start_p, cleaned_lower, re.IGNORECASE):
+                return True
+            
+            # Pattern 2: End of message (optionally preceded by punctuation)
+            # "..., Miku" or "...みく" or "...ミクちゃん！"
+            end_p = r'(?:[,，、!！?？.。\s]+|^)' + pattern + r'[!！?？.。\s]*$'
+            if re.search(end_p, cleaned_lower, re.IGNORECASE):
+                return True
+            
+            # Pattern 3: Middle (surrounded by punctuation)
+            # "..., Miku, ..." or "...、ミク、..."
+            middle_p = r'[,，、!！?？.。\s]+' + pattern + r'[,，、!！?？.。\s]+'
+            if re.search(middle_p, cleaned_lower, re.IGNORECASE):
+                return True
+            
+            # Pattern 4: Just the name alone
+            # "Miku" or "みく！" or "ミクちゃん"
+            alone_p = r'^\s*' + pattern + r'[!！?？.。]*\s*$'
+            if re.search(alone_p, cleaned_lower, re.IGNORECASE):
+                return True
+        except re.error as e:
+            # Log the problematic pattern and skip it
+            logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
+            continue
    
    return False