mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 07:19:35 +00:00
31 lines
835 B
Python
31 lines
835 B
Python
"""Unit tests for dealix.intelligence.arabic_nlp."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dealix.intelligence.arabic_nlp import ArabicNLP, normalize_arabic, segment_arabic
|
|
|
|
|
|
def test_normalize_removes_diacritics():
|
|
n = normalize_arabic("مَرْحَبَاً")
|
|
# tashkeel and tatweel removed, alef retained
|
|
assert n == "مرحبا"
|
|
|
|
|
|
def test_normalize_hamza():
|
|
assert normalize_arabic("إحسان أمر آسف") == "احسان امر اسف"
|
|
|
|
|
|
def test_normalize_taa_marbuta():
|
|
assert normalize_arabic("مؤسسة") == "مؤسسه"
|
|
|
|
|
|
def test_segment_returns_tokens():
|
|
tokens = segment_arabic("مرحبا بك في دياليكس")
|
|
assert len(tokens) >= 3
|
|
|
|
|
|
def test_arabic_nlp_is_arabic():
|
|
nlp = ArabicNLP()
|
|
assert nlp.is_arabic("مرحبا بك")
|
|
assert not nlp.is_arabic("hello world")
|