fix: 포트 충돌 회피 — note_bridge 8098, intent_service 8099
Jellyfin(8096), OrbStack(8097) 포트 충돌으로 변경. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,232 @@
|
||||
"""Text collation support for string comparisons.
|
||||
|
||||
This module provides collation (text comparison) functionality with optional
|
||||
PyICU support for advanced Unicode collation. Falls back to simple binary
|
||||
and case-insensitive comparisons when PyICU is not available.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from enum import Enum
|
||||
|
||||
# Try to import PyICU for advanced collation support
|
||||
try:
|
||||
from icu import Collator as ICUCollator
|
||||
from icu import Locale as ICULocale
|
||||
|
||||
HAS_PYICU = True
|
||||
except ImportError:
|
||||
HAS_PYICU = False
|
||||
|
||||
|
||||
class Collation(str, Enum):
|
||||
"""Text comparison collation strategies.
|
||||
|
||||
For most users, use case_sensitive parameter in add_property_filter()
|
||||
instead of working with Collation directly.
|
||||
|
||||
Examples:
|
||||
# Simple API (recommended for most users):
|
||||
searcher.add_property_filter("SUMMARY", "meeting", case_sensitive=False)
|
||||
|
||||
# Advanced API (for power users):
|
||||
searcher.add_property_filter("SUMMARY", "Müller",
|
||||
collation=Collation.LOCALE,
|
||||
locale="de_DE")
|
||||
"""
|
||||
|
||||
SIMPLE = "simple"
|
||||
"""Simple Python-based collation (no PyICU required).
|
||||
|
||||
- case_sensitive=True: Byte-for-byte comparison
|
||||
- case_sensitive=False: Python's str.lower() comparison
|
||||
"""
|
||||
|
||||
UNICODE = "unicode"
|
||||
"""Unicode Collation Algorithm (UCA) root collation.
|
||||
|
||||
- case_sensitive=True: ICU TERTIARY strength (distinguishes case)
|
||||
- case_sensitive=False: ICU SECONDARY strength (ignores case)
|
||||
|
||||
Requires PyICU to be installed."""
|
||||
|
||||
LOCALE = "locale"
|
||||
"""Locale-aware collation using CLDR rules.
|
||||
|
||||
- case_sensitive=True: ICU TERTIARY strength (distinguishes case)
|
||||
- case_sensitive=False: ICU SECONDARY strength (ignores case)
|
||||
|
||||
Requires PyICU to be installed and locale parameter."""
|
||||
|
||||
|
||||
class CollationError(Exception):
|
||||
"""Raised when collation operation cannot be performed."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def get_collation_function(
|
||||
collation: Collation = Collation.SIMPLE,
|
||||
case_sensitive: bool = True,
|
||||
locale: str | None = None,
|
||||
) -> Callable[[str, str], bool]:
|
||||
"""Get a collation function for substring matching.
|
||||
|
||||
Args:
|
||||
collation: The collation strategy to use
|
||||
case_sensitive: Whether comparison should be case-sensitive
|
||||
locale: Locale string (e.g., "de_DE", "en_US") for LOCALE collation
|
||||
|
||||
Returns:
|
||||
A function that takes (needle, haystack) and returns True if needle
|
||||
is found in haystack according to the collation rules.
|
||||
|
||||
Raises:
|
||||
CollationError: If PyICU is required but not available, or if
|
||||
invalid parameters are provided.
|
||||
|
||||
Examples:
|
||||
>>> match_fn = get_collation_function(Collation.SIMPLE, case_sensitive=False)
|
||||
>>> match_fn("test", "This is a TEST")
|
||||
True
|
||||
"""
|
||||
if collation == Collation.SIMPLE:
|
||||
if case_sensitive:
|
||||
return _binary_contains
|
||||
else:
|
||||
return _case_insensitive_contains
|
||||
|
||||
elif collation in (Collation.UNICODE, Collation.LOCALE):
|
||||
if not HAS_PYICU:
|
||||
raise CollationError(
|
||||
f"Collation '{collation}' requires PyICU to be installed. "
|
||||
"Install with: pip install 'icalendar-searcher[collation]'"
|
||||
)
|
||||
|
||||
if collation == Collation.LOCALE:
|
||||
if not locale:
|
||||
raise CollationError("LOCALE collation requires a locale parameter")
|
||||
return _get_icu_contains(locale, case_sensitive)
|
||||
else:
|
||||
# UNICODE collation uses root locale
|
||||
return _get_icu_contains(None, case_sensitive)
|
||||
|
||||
else:
|
||||
raise CollationError(f"Unknown collation: {collation}")
|
||||
|
||||
|
||||
def get_sort_key_function(
|
||||
collation: Collation = Collation.SIMPLE,
|
||||
case_sensitive: bool = True,
|
||||
locale: str | None = None,
|
||||
) -> Callable[[str], bytes]:
|
||||
"""Get a collation function for generating sort keys.
|
||||
|
||||
Args:
|
||||
collation: The collation strategy to use
|
||||
case_sensitive: Whether comparison should be case-sensitive
|
||||
locale: Locale string (e.g., "de_DE", "en_US") for LOCALE collation
|
||||
|
||||
Returns:
|
||||
A function that takes a string and returns a sort key (bytes) that
|
||||
can be used for sorting according to the collation rules.
|
||||
|
||||
Raises:
|
||||
CollationError: If PyICU is required but not available, or if
|
||||
invalid parameters are provided.
|
||||
|
||||
Examples:
|
||||
>>> sort_key_fn = get_sort_key_function(Collation.SIMPLE, case_sensitive=False)
|
||||
>>> sorted(["Zebra", "apple", "Banana"], key=sort_key_fn)
|
||||
['apple', 'Banana', 'Zebra']
|
||||
"""
|
||||
if collation == Collation.SIMPLE:
|
||||
if case_sensitive:
|
||||
return lambda s: s.encode("utf-8")
|
||||
else:
|
||||
return lambda s: s.lower().encode("utf-8")
|
||||
|
||||
elif collation in (Collation.UNICODE, Collation.LOCALE):
|
||||
if not HAS_PYICU:
|
||||
raise CollationError(
|
||||
f"Collation '{collation}' requires PyICU to be installed. "
|
||||
"Install with: pip install 'icalendar-searcher[collation]'"
|
||||
)
|
||||
|
||||
if collation == Collation.LOCALE:
|
||||
if not locale:
|
||||
raise CollationError("LOCALE collation requires a locale parameter")
|
||||
return _get_icu_sort_key(locale, case_sensitive)
|
||||
else:
|
||||
# UNICODE collation uses root locale
|
||||
return _get_icu_sort_key(None, case_sensitive)
|
||||
|
||||
else:
|
||||
raise CollationError(f"Unknown collation: {collation}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Internal implementation functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _binary_contains(needle: str, haystack: str) -> bool:
|
||||
"""Binary (case-sensitive) substring match."""
|
||||
return needle in haystack
|
||||
|
||||
|
||||
def _case_insensitive_contains(needle: str, haystack: str) -> bool:
|
||||
"""Case-insensitive substring match."""
|
||||
return needle.lower() in haystack.lower()
|
||||
|
||||
|
||||
def _get_icu_contains(locale: str | None, case_sensitive: bool) -> Callable[[str, str], bool]:
|
||||
"""Get ICU-based substring matcher.
|
||||
|
||||
Note: This is a simplified implementation. PyICU doesn't expose ICU's
|
||||
StringSearch API which would be needed for proper substring matching with
|
||||
collation. For now, we use Python's built-in matching.
|
||||
|
||||
Future enhancement: Implement proper collation-aware substring matching.
|
||||
"""
|
||||
|
||||
def icu_contains(needle: str, haystack: str) -> bool:
|
||||
"""Check if needle is in haystack.
|
||||
|
||||
This is a fallback implementation until proper ICU StringSearch support
|
||||
is added. It provides reasonable behavior for most use cases.
|
||||
"""
|
||||
# TODO: Use ICU StringSearch for proper collation-aware substring matching
|
||||
# For now, fall back to Python's built-in contains
|
||||
if case_sensitive:
|
||||
return needle in haystack
|
||||
else:
|
||||
return needle.lower() in haystack.lower()
|
||||
|
||||
return icu_contains
|
||||
|
||||
|
||||
def _get_icu_sort_key(locale: str | None, case_sensitive: bool) -> Callable[[str], bytes]:
|
||||
"""Get ICU-based sort key function.
|
||||
|
||||
Creates a collator instance and returns a function that generates sort keys.
|
||||
The collator strength is configured based on case_sensitive parameter.
|
||||
"""
|
||||
icu_locale = ICULocale(locale) if locale else ICULocale.getRoot()
|
||||
collator = ICUCollator.createInstance(icu_locale)
|
||||
|
||||
# Set strength based on case sensitivity:
|
||||
# PRIMARY = base character differences only
|
||||
# SECONDARY = base + accent differences (case-insensitive)
|
||||
# TERTIARY = base + accent + case differences (case-sensitive, default)
|
||||
if case_sensitive:
|
||||
collator.setStrength(ICUCollator.TERTIARY)
|
||||
else:
|
||||
collator.setStrength(ICUCollator.SECONDARY)
|
||||
|
||||
def icu_sort_key(s: str) -> bytes:
|
||||
"""Generate ICU collation sort key."""
|
||||
return collator.getSortKey(s)
|
||||
|
||||
return icu_sort_key
|
||||
Reference in New Issue
Block a user