Пример #1
0
    def __init__(self, name, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        self.name = name
        self.default = default
        config_dir = Path(kw.pop('config_dir', None) or DIR)
        RawConfigParser.__init__(self, kw, allow_no_value=True)
        if self.default:
            if PY3:
                fp = io.StringIO(self.default)
            else:
                fp = io.BytesIO(self.default.encode('utf8'))
            self.readfp(fp)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                with open(cfg_path.as_posix(), 'w') as fp:
                    self.write(fp)
        self.path = cfg_path
Пример #2
0
    def __init__(self, name, dir_=None, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        INI.__init__(self, kw, allow_no_value=True)
        self.name = name
        config_dir = Path(dir_ or CONFIG_DIR)

        if default:
            if isinstance(default, text_type):
                self.read_string(default)
            #elif isinstance(default, (dict, OrderedDict)):
            #    self.read_dict(default)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                self.write(cfg_path.as_posix())
        self.path = cfg_path
Пример #3
0
    def to_cldf(self, dest, mdname='cldf-metadata.json'):
        """
        Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`.

        :param dest:
        :param mdname:
        :return: path of the metadata file
        """
        dest = Path(dest)
        if not dest.exists():
            dest.mkdir()

        data = self.read()

        if data[self.source_table_name]:
            sources = Sources()
            for src in data[self.source_table_name]:
                sources.add(Source(
                    src['genre'],
                    src['id'],
                    **{k: v for k, v in src.items() if k not in ['id', 'genre']}))
            sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib'))

        for table_type, items in data.items():
            try:
                table = self.dataset[table_type]
                table.common_props['dc:extent'] = table.write(
                    [self.retranslate(table, item) for item in items],
                    base=dest)
            except KeyError:
                assert table_type == self.source_table_name, table_type
        return self.dataset.write_metadata(dest / 'cldf-metadata.json')
Пример #4
0
class Cache(object):
    def __init__(self, dir_=None):
        self._dir = Path(dir_ or CACHE_DIR)
        if not self._dir.exists():
            self._dir.mkdir(parents=True)  # pragma: no cover

    def _path(self, key):
        return self._dir.joinpath(path_component(key))

    def __len__(self):
        return len(list(self.keys()))

    def __getitem__(self, item):
        with self._path(item).open('rb') as fp:
            return pickle.load(fp)

    def __setitem__(self, key, value):
        with self._path(key).open('wb') as fp:
            pickle.dump(value, fp)

    def __delitem__(self, key):
        remove(self._path(key))

    def __contains__(self, item):
        return self._path(item).exists()

    def keys(self):
        for p in self._dir.iterdir():
            yield as_unicode(p.name)

    def clear(self):
        for key in self.keys():
            remove(self._path(key))
Пример #5
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
Пример #6
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
Пример #7
0
def new_dataset(args):
    """
    lexibank new-dataset OUTDIR [ID]
    """
    if not args.args:
        raise ParserError('you must specify an existing directory')
    outdir = Path(args.args.pop(0))
    if not outdir.exists():
        raise ParserError('you must specify an existing directory')

    id_pattern = re.compile('[a-z_0-9]+$')
    md = {}
    if args.args:
        md['id'] = args.args.pop(0)
    else:
        md['id'] = input('Dataset ID: ')

    while not id_pattern.match(md['id']):
        print(
            'dataset id must only consist of lowercase ascii letters, digits and _ (underscore)!'
        )
        md['id'] = input('Dataset ID: ')

    outdir = outdir / md['id']
    if not outdir.exists():
        outdir.mkdir()

    for key in ['title', 'url', 'license', 'conceptlist', 'citation']:
        md[key] = input('Dataset {0}: '.format(key))

    # check license!
    # check conceptlist!

    for path in Path(
            pylexibank.__file__).parent.joinpath('dataset_template').iterdir():
        if path.is_file():
            if path.suffix in ['.pyc']:
                continue  # pragma: no cover
            target = path.name
            content = read_text(path)
            if '+' in path.name:
                target = re.sub('\+([a-z]+)\+',
                                lambda m: '{' + m.groups()[0] + '}',
                                path.name).format(**md)
            if target.endswith('_tmpl'):
                target = target[:-5]
                content = content.format(**md)
            write_text(outdir / target, content)
        else:
            target = outdir / path.name
            if target.exists():
                shutil.rmtree(str(target))
            shutil.copytree(str(path), str(target))
    del md['id']
    jsonlib.dump(md, outdir / 'metadata.json', indent=4)
Пример #8
0
def curate(args):  # pragma: no cover
    datasets = {ds.id: ds for ds in args.cfg.datasets}

    class TheCompleter(Completer):
        def get_completions(self, document, complete_event):
            word_before_cursor = document.get_word_before_cursor(WORD=True)
            words = document.text_before_cursor.split()
            if words and words[0] in commands:
                for ds in fuzzyfinder(word_before_cursor, datasets):
                    yield Completion(ds,
                                     start_position=-len(word_before_cursor))
            else:  # elif word_before_cursor:
                for c in fuzzyfinder(word_before_cursor, commands):
                    yield Completion(c,
                                     start_position=-len(word_before_cursor))

    user_input = []
    appdir = Path(user_data_dir('lexibank'))
    if not appdir.exists():
        appdir.mkdir(parents=True)

    while not user_input or user_input[0] != 'quit':
        try:
            user_input = prompt(
                u'lexibank-curator> ',
                history=FileHistory(str(appdir / 'history.txt')),
                auto_suggest=AutoSuggestFromHistory(),
                completer=TheCompleter(),
            ).split()
        except EOFError:
            break
        except KeyboardInterrupt:
            break

        if len(user_input) == 0:
            continue  # ignore empty commands
        if user_input[0] not in commands:
            print(colored('Invalid command!', 'red'))
            continue
        if len(user_input) > 1 and user_input[1] not in datasets:
            print(colored('Invalid dataset!', 'red'))
            continue

        args.args = user_input[1:]
        try:
            s = time()
            commands[user_input[0]](args)
            print('[{0:.3f}]'.format(time() - s))
        except Exception as e:
            traceback.print_exc()
            print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red'))

    print('see ya!')
Пример #9
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #10
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #11
0
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    # FIXME: instead of removing trees, we should just move the current one
    # from outdir to build, and then recreate in outdir.
    builddir = Path(builddir) if builddir else build_path('tree')
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}
    out = Path(outdir or tree)
    if not out.parent.exists():
        out.parent.mkdir()

    if out.exists():
        if builddir.exists():
            try:
                rmtree(builddir)
            except:  # pragma: no cover
                pass
            if builddir.exists():  # pragma: no cover
                raise ValueError('please remove %s before proceeding' %
                                 builddir)
        # move the old tree out of the way
        shutil.move(out.as_posix(), builddir.as_posix())
    out.mkdir()

    lffs = lffs or {}
    languages = {}
    for lang in read_lff(Level.language, fp=lffs.get(Level.language)):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError('unattached dialect')  # pragma: no cover

        lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage,
                  out, old_tree)
Пример #12
0
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    # FIXME: instead of removing trees, we should just move the current one
    # from outdir to build, and then recreate in outdir.
    builddir = Path(builddir) if builddir else build_path("tree")
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}
    out = Path(outdir or tree)
    if not out.parent.exists():
        out.parent.mkdir()

    if out.exists():
        if builddir.exists():
            try:
                rmtree(builddir)
            except:  # pragma: no cover
                pass
            if builddir.exists():  # pragma: no cover
                raise ValueError("please remove %s before proceeding" % builddir)
        # move the old tree out of the way
        shutil.move(out.as_posix(), builddir.as_posix())
    out.mkdir()

    lffs = lffs or {}
    languages = {}
    for lang in read_lff(Level.language, fp=lffs.get(Level.language)):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError("unattached dialect")  # pragma: no cover

        lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
Пример #13
0
def lff2tree(tree=TREE, outdir=None, test=False):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    out = Path(outdir or build_path('tree'))
    if not out.parent.exists():
        out.parent.mkdir()
    if out.exists():
        rmtree(out)
    out.mkdir()
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}

    languages = {}
    for lang in read_lff('language'):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff('dialect'):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError('unattached dialect')

        lang2tree(
            lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)

    if not test:
        rmtree(TREE, ignore_errors=True)
        copytree(out, TREE)
Пример #14
0
        codes[denumber] = {
            'ID': '{0}-{1}'.format(fid, denumber),
            'Name': dename,
            'Parameter_ID': fid,
        }

    fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0]
    ds.write(
        ValueTable=values,
        LanguageTable=languages,
        ParameterTable=[{
            'ID': fid,
            'Name': fname,
            'Area': aname,
            'Authors': fauthors,
            'Url': 'http://wals.info/feature/' + fid
        }],
        CodeTable=codes.values(),
    )


if __name__ == '__main__':
    import sys

    db = create_engine(sys.argv[1])
    feature = sys.argv[2]
    out = Path('wals_{0}_cldf'.format(feature))
    if not out.exists():
        out.mkdir()
    make_cldf(db, out, feature)
Пример #15
0
def lff2tree(lff, tree=None, outdir='fromlff'):
    out = Path(outdir)
    out.mkdir()
    old_tree = {l.id: l for l in languoids_from_tree(tree)} if tree else {}

    nodes = set()
    languages = {}
    for lang in read_lff(lff, 'language'):
        groupdir = out
        languages[lang.id] = lang

        for name, id_, level in lang.lineage:
            groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_))
            if not groupdir.exists():
                groupdir.mkdir()
                if id_ in old_tree:
                    group = old_tree[id_]
                    assert group.level == level
                    if name != group.name:
                        # rename a subgroup!
                        group.name = name
                    group.write_info(groupdir)
                else:
                    # TODO: create Languoid, write info file!
                    pass

            assert id_ in old_tree
            nodes.add(id_)

        assert lang.id in old_tree
        nodes.add(lang.id)
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        langdir = groupdir.joinpath(lang.fname())
        langdir.mkdir()
        old_lang.write_info(langdir)

    for lang in read_lff(lff.replace('lff', 'dff'), 'dialect'):
        groupdir = out

        if not lang.lineage:
            # TODO: handle error of un-attached dialects!
            continue

        for name, id_, level in languages[lang.lineage[0][1]].lineage + lang.lineage:
            groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_))
            if not groupdir.exists():
                groupdir.mkdir()
                if id_ in old_tree:
                    group = old_tree[id_]
                    assert group.level == level
                    if name != group.name:
                        # rename a subgroup!
                        group.name = name
                    group.write_info(groupdir)
                else:
                    # TODO: create Languoid, write info file!
                    pass

            assert id_ in old_tree
            nodes.add(id_)

        assert lang.id in old_tree
        nodes.add(lang.id)
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        langdir = groupdir.joinpath(lang.fname())
        langdir.mkdir()
        old_lang.write_info(langdir)

    print len(nodes)
Пример #16
0
        ))
        codes[denumber] = {
            'ID': '{0}-{1}'.format(fid, denumber),
            'Name': dename,
            'Parameter_ID': fid,
        }

    fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0]
    ds.write(
        ValueTable=values,
        LanguageTable=languages,
        ParameterTable=[{
            'ID': fid,
            'Name': fname,
            'Area': aname,
            'Authors': fauthors,
            'Url': 'http://wals.info/feature/' + fid}],
        CodeTable=codes.values(),
    )


if __name__ == '__main__':
    import sys

    db = create_engine(sys.argv[1])
    feature = sys.argv[2]
    out = Path('wals_{0}_cldf'.format(feature))
    if not out.exists():
        out.mkdir()
    make_cldf(db, out, feature)
Пример #17
0
def lff2tree(lff, tree=None, outdir='fromlff'):
    out = Path(outdir)
    out.mkdir()
    old_tree = {l.id: l for l in languoids_from_tree(tree)} if tree else {}

    nodes = set()
    languages = {}
    for lang in read_lff(lff, 'language'):
        groupdir = out
        languages[lang.id] = lang

        for name, id_, level in lang.lineage:
            groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_))
            if not groupdir.exists():
                groupdir.mkdir()
                if id_ in old_tree:
                    group = old_tree[id_]
                    assert group.level == level
                    if name != group.name:
                        # rename a subgroup!
                        group.name = name
                    group.write_info(groupdir)
                else:
                    # TODO: create Languoid, write info file!
                    pass

            assert id_ in old_tree
            nodes.add(id_)

        assert lang.id in old_tree
        nodes.add(lang.id)
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        langdir = groupdir.joinpath(lang.fname())
        langdir.mkdir()
        old_lang.write_info(langdir)

    for lang in read_lff(lff.replace('lff', 'dff'), 'dialect'):
        groupdir = out

        if not lang.lineage:
            # TODO: handle error of un-attached dialects!
            continue

        for name, id_, level in languages[lang.lineage[0]
                                          [1]].lineage + lang.lineage:
            groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_))
            if not groupdir.exists():
                groupdir.mkdir()
                if id_ in old_tree:
                    group = old_tree[id_]
                    assert group.level == level
                    if name != group.name:
                        # rename a subgroup!
                        group.name = name
                    group.write_info(groupdir)
                else:
                    # TODO: create Languoid, write info file!
                    pass

            assert id_ in old_tree
            nodes.add(id_)

        assert lang.id in old_tree
        nodes.add(lang.id)
        old_lang = old_tree[lang.id]
        assert old_lang.level == lang.level
        if old_lang.name != lang.name:
            old_lang.name = lang.name
        langdir = groupdir.joinpath(lang.fname())
        langdir.mkdir()
        old_lang.write_info(langdir)

    print len(nodes)