Added Japanese and Bulgarian addressing
This commit is contained in:
@@ -40,15 +40,16 @@ async def is_miku_addressed(message) -> bool:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not fetch referenced message: {e}")
|
logger.warning(f"Could not fetch referenced message: {e}")
|
||||||
|
|
||||||
cleaned = message.content.strip().lower()
|
cleaned = message.content.strip()
|
||||||
|
cleaned_lower = cleaned.lower()
|
||||||
|
|
||||||
# Base names for Miku in different scripts
|
# Base names for Miku in different scripts
|
||||||
base_names = [
|
base_names = [
|
||||||
'miku', 'мику', 'みく', 'ミク', '未来'
|
'miku', 'мику', 'みく', 'ミク', '未来'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Japanese honorifics - all scripts combined for simpler matching
|
# Japanese honorifics - all scripts combined
|
||||||
honorifics_all_scripts = [
|
honorifics = [
|
||||||
# Latin
|
# Latin
|
||||||
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
|
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
|
||||||
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
|
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
|
||||||
@@ -59,51 +60,74 @@ async def is_miku_addressed(message) -> bool:
|
|||||||
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
|
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
|
||||||
'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
|
'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
|
||||||
# Cyrillic
|
# Cyrillic
|
||||||
'чан', 'сан', 'кун', 'ньян', 'химе', 'тан', 'чин', 'хэйка',
|
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
|
||||||
'дэнка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сэнсэй', 'сэнпай', 'жо'
|
'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Optional o- prefix in different scripts
|
# o- prefix variants
|
||||||
o_prefixes = ['o-', 'о-', 'お', 'オ']
|
o_prefixes = ['o-', 'о-', 'お', 'オ']
|
||||||
|
|
||||||
# Strategy: Just check if any base name appears (case insensitive for latin/cyrillic)
|
# Build all possible name variations to check
|
||||||
# Then allow any honorific to optionally follow
|
name_patterns = []
|
||||||
|
|
||||||
for base in base_names:
|
for base in base_names:
|
||||||
base_lower = base.lower()
|
base_lower = base.lower()
|
||||||
|
base_escaped = re.escape(base_lower)
|
||||||
|
|
||||||
# Check for just the base name
|
# Base name alone
|
||||||
if re.search(r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])', cleaned):
|
name_patterns.append(base_escaped)
|
||||||
return True
|
|
||||||
|
|
||||||
# Check with optional o- prefix
|
# With honorifics (allows optional dash/space between)
|
||||||
for prefix in o_prefixes:
|
for honorific in honorifics:
|
||||||
prefix_pattern = prefix.lower() if prefix != 'お' and prefix != 'オ' else prefix
|
|
||||||
pattern = r'(?<![a-zа-яa-я\w])' + re.escape(prefix_pattern) + r'\s*' + re.escape(base_lower) + r'(?![a-zа-яa-я\w])'
|
|
||||||
if re.search(pattern, cleaned):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check base name followed by any honorific (no spacing requirement to catch mixed script)
|
|
||||||
for honorific in honorifics_all_scripts:
|
|
||||||
honorific_lower = honorific.lower()
|
honorific_lower = honorific.lower()
|
||||||
# Allow optional dash, space, or no separator between name and honorific
|
honorific_escaped = re.escape(honorific_lower)
|
||||||
pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(base_lower) +
|
# Build pattern: base + optional [dash or space] + honorific
|
||||||
r'[-\s]*' + re.escape(honorific_lower) +
|
name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
|
||||||
r'(?![a-zа-яa-я\w])')
|
|
||||||
if re.search(pattern, cleaned):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check with o- prefix + base + honorific
|
# With o- prefix
|
||||||
for prefix in o_prefixes:
|
for prefix in o_prefixes:
|
||||||
prefix_lower = prefix.lower() if prefix != 'お' and prefix != 'オ' else prefix
|
prefix_lower = prefix.lower()
|
||||||
for honorific in honorifics_all_scripts:
|
prefix_escaped = re.escape(prefix_lower)
|
||||||
|
# o-prefix + optional space + base
|
||||||
|
name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
|
||||||
|
|
||||||
|
# With o- prefix + honorific
|
||||||
|
for honorific in honorifics:
|
||||||
honorific_lower = honorific.lower()
|
honorific_lower = honorific.lower()
|
||||||
pattern = (r'(?<![a-zа-яa-я\w])' + re.escape(prefix_lower) +
|
honorific_escaped = re.escape(honorific_lower)
|
||||||
r'[-\s]*' + re.escape(base_lower) +
|
# o-prefix + space + base + dash/space + honorific
|
||||||
r'[-\s]*' + re.escape(honorific_lower) +
|
name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
|
||||||
r'(?![a-zа-яa-я\w])')
|
|
||||||
if re.search(pattern, cleaned):
|
# Check all patterns - she must be "addressed" not just mentioned
|
||||||
return True
|
for pattern in name_patterns:
|
||||||
|
try:
|
||||||
|
# Pattern 1: Start of message + punctuation/end
|
||||||
|
# "Miku, ..." or "みく!" or "ミクちゃん、..."
|
||||||
|
start_p = r'^' + pattern + r'(?:[,,、!!??.。\s]+|$)'
|
||||||
|
if re.search(start_p, cleaned_lower, re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Pattern 2: End of message (optionally preceded by punctuation)
|
||||||
|
# "..., Miku" or "...みく" or "...ミクちゃん!"
|
||||||
|
end_p = r'(?:[,,、!!??.。\s]+|^)' + pattern + r'[!!??.。\s]*$'
|
||||||
|
if re.search(end_p, cleaned_lower, re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Pattern 3: Middle (surrounded by punctuation)
|
||||||
|
# "..., Miku, ..." or "...、ミク、..."
|
||||||
|
middle_p = r'[,,、!!??.。\s]+' + pattern + r'[,,、!!??.。\s]+'
|
||||||
|
if re.search(middle_p, cleaned_lower, re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Pattern 4: Just the name alone
|
||||||
|
# "Miku" or "みく!" or "ミクちゃん"
|
||||||
|
alone_p = r'^\s*' + pattern + r'[!!??.。]*\s*$'
|
||||||
|
if re.search(alone_p, cleaned_lower, re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
except re.error as e:
|
||||||
|
# Log the problematic pattern and skip it
|
||||||
|
logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user