Exemplo n.º 1
0
def unicodeInScripts(uv, scripts):
    """ Check UnicodeData's ScriptExtension property for unicode codepoint
    'uv' and return True if it intersects with the set of 'scripts' provided,
    False if it does not intersect.
    Return None for 'Common' script ('Zyyy').
    """
    sx = unicodedata.script_extension(unichr(uv))
    if "Zyyy" in sx:
        return None
    return not sx.isdisjoint(scripts)
Exemplo n.º 2
0
def test_script_extension():
    assert unicodedata.script_extension("a") == {"Latn"}
    assert unicodedata.script_extension(unichr(0)) == {"Zyyy"}
    assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"}
    assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"}

    assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'}
    assert unicodedata.script_extension("\u0964") == {
        'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Guru', 'Knda',
        'Mahj', 'Mlym', 'Nand', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml',
        'Telu', 'Tirh'}
Exemplo n.º 3
0
def test_script_extension():
    assert unicodedata.script_extension("a") == {"Latn"}
    assert unicodedata.script_extension(unichr(0)) == {"Zyyy"}
    assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"}
    assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"}

    assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'}
    assert unicodedata.script_extension("\u0964") == {
        'Beng', 'Deva', 'Dogr', 'Gong', 'Gran', 'Gujr', 'Guru', 'Knda',
        'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml',
        'Telu', 'Tirh'}