示例#1
0
 def from_file(cls, fname):
     """
     Orthography profiles must be
     - tab-separated CSV files
     - encoded in UTF-8
     - with a header containing a column "Grapheme"
     """
     return cls(*list(
         reader(readlines(fname, normalize='NFD'),
                dicts=True,
                delimiter='\t',
                quotechar=None)))
示例#2
0
def test_readlines(tmpdir):
    from clldutils.path import readlines

    # Test files are read using universal newline mode:
    tpath = tmpdir / 'test.txt'
    tpath.write_binary(b'a\nb\r\nc\rd')
    assert len(readlines(str(tpath))) == 4

    lines = ['\t#ä ']
    assert readlines(lines) == lines
    assert readlines(lines, normalize='NFD') != lines
    assert readlines(lines, strip=True)[0] == lines[0].strip()
    assert readlines(lines, comment='#') == []
    assert readlines(lines, comment='#', linenumbers=True) == [(1, None)]
    lines = ['']
    assert readlines(lines) == ['']
    assert readlines(lines, comment='#') == []
    assert readlines(lines, strip=True, normalize='NFC') == []
示例#3
0
def read_lff(api, log, new, level, fname=None):
    assert level in [Level.language, Level.dialect]
    log.info('reading {0}s from {1}'.format(level.name, fname))

    path = None
    for line in fname if isinstance(fname, list) \
            else readlines(fname or api.build_path('%sff.txt' % level.name[0])):
        line = line.rstrip()
        if line.startswith('#') or not line.strip():
            # ignore comments or empty lines
            continue

        if re.match('\s', line):
            # leading whitespace => a language/dialect spec.
            if path is None:
                raise ValueError('language line without classification line')
            name, id_, hid = parse_languoid(line.strip(), log)
            yield languoid(api, log, new, path, name, id_, hid, level)
        else:
            path = [
                parse_languoid(s.strip(), log) for s in line.split(LINEAGE_SEP)
            ]
示例#4
0
    def editors(self):
        res = []
        Editor = namedtuple('Editor', ['name', 'start', 'end'])
        in_editors, in_table = False, False
        for line in readlines(self.path('CONTRIBUTORS.md'), strip=True):
            if in_editors and line.startswith('#'):
                in_editors, in_table = False, False
                continue

            if line.endswith('# Editors'):
                in_editors = True
                continue

            if in_editors and line.startswith('--- '):
                in_table = True
                continue

            if in_table and '|' in line:
                period, _, name = line.partition('|')
                period = period.strip().partition('-')
                res.append(
                    Editor(name.strip(), period[0].strip(), period[2].strip() or None))
        return res
示例#5
0
 def from_file(cls, fname):
     return cls(
         *list(reader(readlines(fname, comment='#', normalize='NFD'))))
示例#6
0
 def from_textfile(cls, fname, mapping='mapping'):
     return cls.from_text(' '.join(readlines(fname)), mapping=mapping)