Exemplo n.º 1
0
def _read_primary_languages():
    # Hand-edited linguistic data:
    path = os.path.join(paths.datadir, 'languages')
    cp = configparser.ConfigParser(interpolation=None, default_section='')
    cp.read(path, encoding='UTF-8')
    primary_languages = {name: sect for name, sect in cp.items() if sect.name}
    name_to_code = {}
    misc.check_sorted(cp)
    for language, section in cp.items():
        if not language:
            continue
        for key in section.keys():
            if key in {
                    'names', 'characters', 'macrolanguage', 'plural-forms',
                    'principal-territory'
            }:
                continue
            if key.startswith('characters@'):
                continue
            raise misc.DataIntegrityError('unknown key: {}'.format(key))
        for name in section['names'].splitlines():
            name = _munch_language_name(name)
            if name:
                if name in name_to_code:
                    raise misc.DataIntegrityError
                name_to_code[name] = language
    return primary_languages, name_to_code
Exemplo n.º 2
0
def _read_header_fields():
    path = os.path.join(paths.datadir, 'header-fields')
    with open(path, 'rt', encoding='ASCII') as file:
        fields = [
            s.rstrip() for s in file
            if s.rstrip() and not s.startswith('#')
        ]
    misc.check_sorted(fields)
    return frozenset(fields)
Exemplo n.º 3
0
def _read_string_formats():
    path = os.path.join(paths.datadir, 'string-formats')
    cp = configparser.ConfigParser(interpolation=None, default_section='')
    cp.read(path, encoding='ASCII')
    section = cp['formats']
    misc.check_sorted(section)
    return {
        name: frozenset(examples.split())
        for name, examples in section.items()
    }
Exemplo n.º 4
0
def _read_tags():
    path = os.path.join(paths.datadir, 'tags')
    cp = configparser.ConfigParser(interpolation=None, default_section='')
    cp.read(path, encoding='UTF-8')
    misc.check_sorted(cp)
    tags = {}
    for tagname, section in cp.items():
        if not tagname:
            continue
        kwargs = dict(section.items())
        kwargs['name'] = tagname
        tags[tagname] = Tag(**kwargs)
    return tags
Exemplo n.º 5
0
def _read_control_characters():
    path = os.path.join(paths.datadir, 'control-characters')
    cp = configparser.ConfigParser(interpolation=None, default_section='')
    cp.read(path, encoding='UTF-8')
    for section in cp.values():
        if not section.name:
            continue
        misc.check_sorted(section)
        for code, name in section.items():
            if len(code) != 2:
                raise misc.DataIntegrityError
            code = chr(int(code, 16))
            if unicodedata.category(code) != 'Cc':
                raise misc.DataIntegrityError
            if name.upper() != name:
                raise misc.DataIntegrityError
            yield (code, name)
Exemplo n.º 6
0
def _read_iso_codes():
    # ISO language/territory codes:
    path = os.path.join(paths.datadir, 'iso-codes')
    cp = configparser.ConfigParser(interpolation=None, default_section='')
    cp.read(path, encoding='UTF-8')
    cfg_iso_639 = cp['language-codes']
    misc.check_sorted(cfg_iso_639)
    iso_639 = {}
    for lll, ll in cfg_iso_639.items():
        if ll:
            iso_639[ll] = ll
            iso_639[lll] = ll
        else:
            iso_639[lll] = lll
    cfg_iso_3166 = cp['territory-codes']
    misc.check_sorted(cfg_iso_3166)
    iso_3166 = frozenset(cc.upper() for cc in cfg_iso_3166.keys())
    return (iso_639, iso_3166)
Exemplo n.º 7
0
 def test_unsorted(self):
     with assert_raises(M.DataIntegrityError) as cm:
         M.check_sorted([23, 37, 17])
     assert_equal(str(cm.exception), '37 > 17')
Exemplo n.º 8
0
 def test_sorted(self):
     M.check_sorted([17, 23, 37])