Added Japanese and Bulgarian addressing

This commit is contained in:
2026-01-30 21:34:24 +02:00
parent 38a986658d
commit 7368ef0cd5

View File

@@ -40,15 +40,16 @@ async def is_miku_addressed(message) -> bool:
except Exception as e:
logger.warning(f"Could not fetch referenced message: {e}")
cleaned = message.content.strip().lower()
cleaned = message.content.strip()
cleaned_lower = cleaned.lower()
# Base names for Miku in different scripts
base_names = [
'miku', 'мику', 'みく', 'ミク', '未来'
]
# Japanese honorifics - all scripts combined for simpler matching
honorifics_all_scripts = [
# Japanese honorifics - all scripts combined
honorifics = [
# Latin
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
@@ -59,51 +60,74 @@ async def is_miku_addressed(message) -> bool:
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
'デンカ', 'カッカ', '', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
# Cyrillic
'чан', 'сан', 'кун', 'ньян', 'химе', 'тан', 'чин', 'хэйка',
'дэнка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сэнсэй', 'сэнпай', 'жо'
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
]
# Optional o- prefix in different scripts
# o- prefix variants
o_prefixes = ['o-', 'о-', '', '']
# Strategy: Just check if any base name appears (case insensitive for latin/cyrillic)
# Then allow any honorific to optionally follow
# Build all possible name variations to check
name_patterns = []
for base in base_names:
base_lower = base.lower()
base_escaped = re.escape(base_lower)
# Check for just the base name
if re.search(r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])', cleaned):
return True
# Base name alone
name_patterns.append(base_escaped)
# Check with optional o- prefix
for prefix in o_prefixes:
prefix_pattern = prefix.lower() if prefix != '' and prefix != '' else prefix
pattern = r'(?<![a-zа-яa-я\w])' + re.escape(prefix_pattern) + r'\s*' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])'
if re.search(pattern, cleaned):
return True
# Check base name followed by any honorific (no spacing requirement to catch mixed script)
for honorific in honorifics_all_scripts:
# With honorifics (allows optional dash/space between)
for honorific in honorifics:
honorific_lower = honorific.lower()
# Allow optional dash, space, or no separator between name and honorific
pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) +
r'[-\s]*' + re.escape(honorific_lower) +
r'(?![a-zа-яa-я\w])')
if re.search(pattern, cleaned):
return True
honorific_escaped = re.escape(honorific_lower)
# Build pattern: base + optional [dash or space] + honorific
name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
# Check with o- prefix + base + honorific
# With o- prefix
for prefix in o_prefixes:
prefix_lower = prefix.lower() if prefix != '' and prefix != '' else prefix
for honorific in honorifics_all_scripts:
prefix_lower = prefix.lower()
prefix_escaped = re.escape(prefix_lower)
# o-prefix + optional space + base
name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
# With o- prefix + honorific
for honorific in honorifics:
honorific_lower = honorific.lower()
pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(prefix_lower) +
r'[-\s]*' + re.escape(base_lower) +
r'[-\s]*' + re.escape(honorific_lower) +
r'(?![a-zа-яa-я\w])')
if re.search(pattern, cleaned):
return True
honorific_escaped = re.escape(honorific_lower)
# o-prefix + space + base + dash/space + honorific
name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
# Check all patterns - she must be "addressed" not just mentioned
for pattern in name_patterns:
try:
# Pattern 1: Start of message + punctuation/end
# "Miku, ..." or "みく!" or "ミクちゃん、..."
start_p = r'^' + pattern + r'(?:[,,、!?.。\s]+|$)'
if re.search(start_p, cleaned_lower, re.IGNORECASE):
return True
# Pattern 2: End of message (optionally preceded by punctuation)
# "..., Miku" or "...みく" or "...ミクちゃん!"
end_p = r'(?:[,,、!?.。\s]+|^)' + pattern + r'[!?.。\s]*$'
if re.search(end_p, cleaned_lower, re.IGNORECASE):
return True
# Pattern 3: Middle (surrounded by punctuation)
# "..., Miku, ..." or "...、ミク、..."
middle_p = r'[,,、!?.。\s]+' + pattern + r'[,,、!?.。\s]+'
if re.search(middle_p, cleaned_lower, re.IGNORECASE):
return True
# Pattern 4: Just the name alone
# "Miku" or "みく!" or "ミクちゃん"
alone_p = r'^\s*' + pattern + r'[!?.。]*\s*$'
if re.search(alone_p, cleaned_lower, re.IGNORECASE):
return True
except re.error as e:
# Log the problematic pattern and skip it
logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
continue
return False