Пример #1
0
    def __init__(self, name, dir_=None, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        INI.__init__(self, kw, allow_no_value=True)
        self.name = name
        config_dir = Path(dir_ or CONFIG_DIR)

        if default:
            if isinstance(default, text_type):
                self.read_string(default)
            #elif isinstance(default, (dict, OrderedDict)):
            #    self.read_dict(default)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                self.write(cfg_path.as_posix())
        self.path = cfg_path
Пример #2
0
    def __init__(self, name, default=None, **kw):
        """Initialization.

        :param name: Basename for the config file (suffix .ini will be appended).
        :param default: Default content of the config file.
        """
        self.name = name
        self.default = default
        config_dir = Path(kw.pop('config_dir', None) or DIR)
        RawConfigParser.__init__(self, kw, allow_no_value=True)
        if self.default:
            if PY3:
                fp = io.StringIO(self.default)
            else:
                fp = io.BytesIO(self.default.encode('utf8'))
            self.readfp(fp)

        cfg_path = config_dir.joinpath(name + '.ini')
        if cfg_path.exists():
            assert cfg_path.is_file()
            self.read(cfg_path.as_posix())
        else:
            if not config_dir.exists():
                try:
                    config_dir.mkdir()
                except OSError:  # pragma: no cover
                    # this happens when run on travis-ci, by a system user.
                    pass
            if config_dir.exists():
                with open(cfg_path.as_posix(), 'w') as fp:
                    self.write(fp)
        self.path = cfg_path
Пример #3
0
def get_ini(fname, **kw):
    fname = Path(fname)
    if not fname.exists():
        # For old-style (<=3.4) repository layout we ship the config data with pyglottolog:
        name = fname.name if fname.name != 'hhtype.ini' else 'document_types.ini'
        fname = Path(__file__).parent / name
    assert fname.exists()
    return INI.from_file(fname, **kw)
Пример #4
0
def new_dataset(args):
    """
    lexibank new-dataset OUTDIR [ID]
    """
    if not args.args:
        raise ParserError('you must specify an existing directory')
    outdir = Path(args.args.pop(0))
    if not outdir.exists():
        raise ParserError('you must specify an existing directory')

    id_pattern = re.compile('[a-z_0-9]+$')
    md = {}
    if args.args:
        md['id'] = args.args.pop(0)
    else:
        md['id'] = input('Dataset ID: ')

    while not id_pattern.match(md['id']):
        print(
            'dataset id must only consist of lowercase ascii letters, digits and _ (underscore)!'
        )
        md['id'] = input('Dataset ID: ')

    outdir = outdir / md['id']
    if not outdir.exists():
        outdir.mkdir()

    for key in ['title', 'url', 'license', 'conceptlist', 'citation']:
        md[key] = input('Dataset {0}: '.format(key))

    # check license!
    # check conceptlist!

    for path in Path(
            pylexibank.__file__).parent.joinpath('dataset_template').iterdir():
        if path.is_file():
            if path.suffix in ['.pyc']:
                continue  # pragma: no cover
            target = path.name
            content = read_text(path)
            if '+' in path.name:
                target = re.sub('\+([a-z]+)\+',
                                lambda m: '{' + m.groups()[0] + '}',
                                path.name).format(**md)
            if target.endswith('_tmpl'):
                target = target[:-5]
                content = content.format(**md)
            write_text(outdir / target, content)
        else:
            target = outdir / path.name
            if target.exists():
                shutil.rmtree(str(target))
            shutil.copytree(str(path), str(target))
    del md['id']
    jsonlib.dump(md, outdir / 'metadata.json', indent=4)
Пример #5
0
def link(args):
    """\
Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or
CONCEPTICON_ID is given, the other is added.

concepticon link <concept-list>
"""
    conceptlist = Path(args.args[0])
    if not conceptlist.exists() or not conceptlist.is_file():
        conceptlist = data_path('conceptlists', args.args[0])
        if not conceptlist.exists() or not conceptlist.is_file():
            raise ParserError('no file %s found' % args.args[0])

    rewrite(conceptlist, Linker(conceptlist.stem))
Пример #6
0
def link(args):
    """
    Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or
    CONCEPTICON_ID is given, the other is added.

    concepticon link <concept-list>
    """
    api = Concepticon(args.data)
    conceptlist = Path(args.args[0])
    if not conceptlist.exists() or not conceptlist.is_file():
        conceptlist = api.data_path('conceptlists', args.args[0])
        if not conceptlist.exists() or not conceptlist.is_file():
            raise ParserError('no file %s found' % args.args[0])

    rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
Пример #7
0
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    # FIXME: instead of removing trees, we should just move the current one
    # from outdir to build, and then recreate in outdir.
    builddir = Path(builddir) if builddir else build_path('tree')
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}
    out = Path(outdir or tree)
    if not out.parent.exists():
        out.parent.mkdir()

    if out.exists():
        if builddir.exists():
            try:
                rmtree(builddir)
            except:  # pragma: no cover
                pass
            if builddir.exists():  # pragma: no cover
                raise ValueError('please remove %s before proceeding' %
                                 builddir)
        # move the old tree out of the way
        shutil.move(out.as_posix(), builddir.as_posix())
    out.mkdir()

    lffs = lffs or {}
    languages = {}
    for lang in read_lff(Level.language, fp=lffs.get(Level.language)):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError('unattached dialect')  # pragma: no cover

        lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage,
                  out, old_tree)
Пример #8
0
class Cache(object):
    def __init__(self, dir_=None):
        self._dir = Path(dir_ or CACHE_DIR)
        if not self._dir.exists():
            self._dir.mkdir(parents=True)  # pragma: no cover

    def _path(self, key):
        return self._dir.joinpath(path_component(key))

    def __len__(self):
        return len(list(self.keys()))

    def __getitem__(self, item):
        with self._path(item).open('rb') as fp:
            return pickle.load(fp)

    def __setitem__(self, key, value):
        with self._path(key).open('wb') as fp:
            pickle.dump(value, fp)

    def __delitem__(self, key):
        remove(self._path(key))

    def __contains__(self, item):
        return self._path(item).exists()

    def keys(self):
        for p in self._dir.iterdir():
            yield as_unicode(p.name)

    def clear(self):
        for key in self.keys():
            remove(self._path(key))
Пример #9
0
def stats(args):
    """
    cldf stats <DATASET>

    Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to
    - a CLDF metadata file
    - a CLDF core data file
    - a CLDF zip archive
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.zip':
        ds = Dataset.from_zip(fname)
    elif fname.name.endswith(MD_SUFFIX):
        ds = Dataset.from_metadata(fname)
    else:
        ds = Dataset.from_file(fname)
    print(fname)
    stats_ = ds.stats
    print("""
Name: %s
Different languages: %s
Different parameters: %s
Rows: %s
""" % (
        ds.name,
        len(stats_['languages']),
        len(stats_['parameters']),
        stats_['rowcount']
    ))
Пример #10
0
class SourcesCatalog(object):
    def __init__(self, path):
        self.path = Path(path)
        self.items = jsonlib.load(self.path) if self.path.exists() else {}

    def __contains__(self, item):
        return item in self.items

    def get(self, item):
        return self.items.get(item)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        jsonlib.dump(OrderedDict([(k,
                                   OrderedDict([i for i in sorted(v.items())]))
                                  for k, v in sorted(self.items.items())]),
                     self.path,
                     indent=4)

    def add(self, key, obj):
        bsid = obj.bitstreams[0].id
        self.items[key] = OrderedDict([
            ('url', 'https://cdstar.shh.mpg.de/bitstreams/{0}/{1}'.format(
                obj.id, bsid)),
            ('objid', obj.id),
            ('original', bsid),
            ('size', obj.bitstreams[0].size),
            ('mimetype', obj.bitstreams[0].mimetype),
        ])
        return self.items[key]
Пример #11
0
    def to_cldf(self, dest, mdname='cldf-metadata.json'):
        """
        Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`.

        :param dest:
        :param mdname:
        :return: path of the metadata file
        """
        dest = Path(dest)
        if not dest.exists():
            dest.mkdir()

        data = self.read()

        if data[self.source_table_name]:
            sources = Sources()
            for src in data[self.source_table_name]:
                sources.add(Source(
                    src['genre'],
                    src['id'],
                    **{k: v for k, v in src.items() if k not in ['id', 'genre']}))
            sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib'))

        for table_type, items in data.items():
            try:
                table = self.dataset[table_type]
                table.common_props['dc:extent'] = table.write(
                    [self.retranslate(table, item) for item in items],
                    base=dest)
            except KeyError:
                assert table_type == self.source_table_name, table_type
        return self.dataset.write_metadata(dest / 'cldf-metadata.json')
Пример #12
0
    def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False):
        outdir = Path(outdir)
        if not outdir.exists():
            raise ValueError(outdir.as_posix())

        close = False
        if archive:
            if isinstance(archive, Archive):
                container = archive
            else:
                container = Archive(outdir.joinpath(self.name + '.zip'), mode='w')
                close = True
        else:
            container = outdir

        fname = Path(outdir).joinpath(self.name + suffix)
        if fname.suffix in TAB_SUFFIXES:
            self.table.dialect.delimiter = '\t'

        with UnicodeWriter(
                None if isinstance(container, Archive) else fname,
                delimiter=self.table.dialect.delimiter) as writer:
            writer.writerow(self.fields)
            for row in self.rows:
                writer.writerow(row.to_list())

        if isinstance(container, Archive):
            container.write_text(writer.read(), fname.name)
        self.table.url = fname.name

        self.metadata.write(Dataset.filename(fname, 'metadata'), container)
        ids = self._cited_sources if cited_sources_only else None
        self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids)
        if close:
            container.close()
Пример #13
0
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, "path does not exist")
     if not path_.is_dir():
         raise argparse.ArgumentError(self, "path is no directory")
     setattr(namespace, self.dest, path_)
Пример #14
0
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, 'path does not exist')
     if not path_.is_dir():
         raise argparse.ArgumentError(self, 'path is no directory')
     setattr(namespace, self.dest, path_)
Пример #15
0
Файл: util.py Проект: clld/apics
def wals_detail_html(context=None, request=None, **kw):
    wals_data = Path(apics.__file__).parent.joinpath(
        'static', 'wals', '%sA.json' % context.parameter.wals_id)
    if not wals_data.exists():
        raise HTTPNotFound()

    wals_data = jsonlib.load(wals_data)
    value_map = {}

    for layer in wals_data['layers']:
        for feature in layer['features']:
            feature['properties']['icon'] = request.registry.getUtility(
                IIcon, name=feature['properties']['icon']).url(request)
            feature['properties']['popup'] = external_link(
                'http://wals.info/languoid/lect/wals_code_'
                + feature['properties']['language']['id'],
                label=feature['properties']['language']['name'])
        value_map[layer['properties']['number']] = {
            'icon': layer['features'][0]['properties']['icon'],
            'name': layer['properties']['name'],
            'number': layer['properties']['number'],
        }

    return {
        'wals_data': wals_data,
        'wals_map': WalsMap(
            context.parameter, request, data=wals_data, value_map=value_map),
        'apics_map': ApicsWalsMap(
            context.parameter, request, data=wals_data, value_map=value_map)}
Пример #16
0
def get_dataset(fname=None):
    """Load a CLDF dataset.

    Load the file as `json` CLDF metadata description file, or as metadata-free
    dataset contained in a single csv file.

    The distinction is made depending on the file extension: `.json` files are
    loaded as metadata descriptions, all other files are matched against the
    CLDF module specifications. Directories are checked for the presence of
    any CLDF datasets in undefined order of the dataset types.

    Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    pycldf.Dataset
    """
    if fname is None:
        fname = repository
    else:
        fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError('{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #17
0
    def test_Matrix(self):
        from wals3.adapters import Matrix

        p = Path(mktemp())
        assert not p.exists()

        class TestMatrix(Matrix):
            def abspath(self, req):
                return p

            def query(self, req):
                return Matrix.query(self, req).filter(Language.pk < 100)

        m = TestMatrix(Language, "wals3", description="Feature values CSV")
        m.create(self.env["request"], verbose=False)
        assert p.exists()
        remove(p)
Пример #18
0
    def test_Matrix(self):
        from wals3.adapters import Matrix

        p = Path(mktemp())
        assert not p.exists()

        class TestMatrix(Matrix):
            def abspath(self, req):
                return p

            def query(self, req):
                return Matrix.query(self, req).filter(Language.pk < 100)

        m = TestMatrix(Language, 'wals3', description="Feature values CSV")
        m.create(self.env['request'], verbose=False)
        assert p.exists()
        remove(p)
Пример #19
0
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    # FIXME: instead of removing trees, we should just move the current one
    # from outdir to build, and then recreate in outdir.
    builddir = Path(builddir) if builddir else build_path("tree")
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}
    out = Path(outdir or tree)
    if not out.parent.exists():
        out.parent.mkdir()

    if out.exists():
        if builddir.exists():
            try:
                rmtree(builddir)
            except:  # pragma: no cover
                pass
            if builddir.exists():  # pragma: no cover
                raise ValueError("please remove %s before proceeding" % builddir)
        # move the old tree out of the way
        shutil.move(out.as_posix(), builddir.as_posix())
    out.mkdir()

    lffs = lffs or {}
    languages = {}
    for lang in read_lff(Level.language, fp=lffs.get(Level.language)):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError("unattached dialect")  # pragma: no cover

        lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
Пример #20
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
Пример #21
0
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #22
0
def jsondump(obj, fname, log=None):
    fname = Path(fname)
    if fname.exists():
        d = jsonlib.load(fname)
        d.update(obj)
        obj = d
    jsonlib.dump(sorted_obj(obj), fname, indent=4)
    log_dump(fname, log=log)
    return obj
Пример #23
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
Пример #24
0
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #25
0
def update(path, default=None, load_kw=None, **kw):
    path = Path(path)
    if not path.exists():
        if default is None:
            raise ValueError('path does not exist')
        res = default
    else:
        res = load(path, **(load_kw or {}))
    yield res
    dump(res, path, **kw)
Пример #26
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":",1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #27
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":", 1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #28
0
def link(args):
    """
    Link concepts to concept sets for a given concept list.

    Notes
    -----
    If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added.

    Examples
    --------
    $ concepticon link path_to_conceptlist.tsv
    """
    api = Concepticon(args.repos)
    conceptlist = Path(args.args[0])
    if not conceptlist.exists() or not conceptlist.is_file():
        conceptlist = api.data_path('conceptlists', args.args[0])
        if not conceptlist.exists() or not conceptlist.is_file():
            raise ParserError('no file %s found' % args.args[0])

    rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
Пример #29
0
 def from_file(cls, path, **keywords):
     """
     Function loads a concept list outside the Concepticon collection.
     """
     path = Path(path)
     assert path.exists()
     attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()}
     attrs.update(id=path.stem,
                  items=keywords.get('items', len(read_dicts(path))),
                  year=keywords.get('year', 0))
     return cls(api=path, **attrs)
Пример #30
0
def curate(args):  # pragma: no cover
    datasets = {ds.id: ds for ds in args.cfg.datasets}

    class TheCompleter(Completer):
        def get_completions(self, document, complete_event):
            word_before_cursor = document.get_word_before_cursor(WORD=True)
            words = document.text_before_cursor.split()
            if words and words[0] in commands:
                for ds in fuzzyfinder(word_before_cursor, datasets):
                    yield Completion(ds,
                                     start_position=-len(word_before_cursor))
            else:  # elif word_before_cursor:
                for c in fuzzyfinder(word_before_cursor, commands):
                    yield Completion(c,
                                     start_position=-len(word_before_cursor))

    user_input = []
    appdir = Path(user_data_dir('lexibank'))
    if not appdir.exists():
        appdir.mkdir(parents=True)

    while not user_input or user_input[0] != 'quit':
        try:
            user_input = prompt(
                u'lexibank-curator> ',
                history=FileHistory(str(appdir / 'history.txt')),
                auto_suggest=AutoSuggestFromHistory(),
                completer=TheCompleter(),
            ).split()
        except EOFError:
            break
        except KeyboardInterrupt:
            break

        if len(user_input) == 0:
            continue  # ignore empty commands
        if user_input[0] not in commands:
            print(colored('Invalid command!', 'red'))
            continue
        if len(user_input) > 1 and user_input[1] not in datasets:
            print(colored('Invalid dataset!', 'red'))
            continue

        args.args = user_input[1:]
        try:
            s = time()
            commands[user_input[0]](args)
            print('[{0:.3f}]'.format(time() - s))
        except Exception as e:
            traceback.print_exc()
            print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red'))

    print('see ya!')
Пример #31
0
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
Пример #32
0
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Пример #33
0
Файл: util.py Проект: clld/clld
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Пример #34
0
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
Пример #35
0
 def test_generate_extract(self):
     xml = self.tmp_path('test.xml')
     self._run_main('-v -o {0} {1}'.format(xml.as_posix(), config_path('basic')))
     self.assertTrue(xml.exists())
     # Overwriting existing files must be specified explicitely:
     self._run_main('-o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=4)
     self._run_main('--overwrite -o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=0)
     tcfg = Path('beastling_test.conf')
     self._run_main('--extract {0}'.format(xml.as_posix()))
     self.assertTrue(tcfg.exists())
     remove(tcfg)
Пример #36
0
    def from_file(cls, path, **keywords):
        """
        Function loads a concept list outside the Concepticon collection.

        @todo: uniqueness-check hier einbauen, siehe Funktion read_dicts
        """
        path = Path(path)
        assert path.exists()
        attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()}
        attrs.update(id=path.stem,
                     items=keywords.get('items', len(read_dicts(path))),
                     year=keywords.get('year', 0),
                     local=True)
        return cls(api=path, **attrs)
Пример #37
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #38
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #39
0
def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Пример #40
0
def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')
    print(dls)

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Пример #41
0
    def from_file(cls, bibFile, encoding='utf8', lowercase=False):
        """Create bibtex database from a bib-file.

        @param bibFile: path of the bibtex-database-file to be read.
        """
        if not isinstance(bibFile, Path):
            bibFile = Path(bibFile)
        if bibFile.exists():
            with bibFile.open(encoding=encoding) as fp:
                content = fp.read()
        else:
            content = ''

        return cls((Record.from_string('@' + m, lowercase=lowercase)
                    for m in re.split('^\s*@', content, 0, re.MULTILINE)))
Пример #42
0
 def test_generate_extract(self):
     xml = self.tmp_path('test.xml')
     self._run_main('-v -o {0} {1}'.format(xml.as_posix(),
                                           config_path('basic')))
     self.assertTrue(xml.exists())
     # Overwriting existing files must be specified explicitely:
     self._run_main('-o {0} {1}'.format(xml.as_posix(),
                                        config_path('basic')),
                    status=4)
     self._run_main('--overwrite -o {0} {1}'.format(xml.as_posix(),
                                                    config_path('basic')),
                    status=0)
     tcfg = Path('beastling_test.conf')
     self._run_main('--extract {0}'.format(xml.as_posix()))
     self.assertTrue(tcfg.exists())
     remove(tcfg)
Пример #43
0
    def test_extractor(self):
        config = self.make_cfg(
            [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")])
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        self.assertTrue(bool(self._extract(xmlfile)))

        config = self.make_cfg({
            'admin': {
                'basename': 'abcdefg'
            },
            'model': {
                'model': 'mk',
                'data': data_path('basic.csv').as_posix()
            }
        })
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        beastling.extractor.extract(xmlfile)
        p = Path('abcdefg.conf')
        self.assertTrue(p.exists())
        cfg = INI(interpolation=None)
        cfg.read(p.as_posix())
        remove(p)
        self.assertEqual(cfg['admin']['basename'], 'abcdefg')
        self.assertEqual(cfg['model']['model'], 'mk')

        fname = self.tmp.joinpath('test.xml')
        datafile = self.tmp.joinpath(('test.csv'))
        self.assertFalse(datafile.exists())
        with fname.open('w', encoding='utf8') as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
            beastling.extractor._config_file_str,
            beastling.extractor._data_file_str, datafile.as_posix()))
        res = self._extract(fname)
        self.assertIn(datafile.name, ''.join(res))
Пример #44
0
def write_config(comment_text, overwrite):
    lines = comment_text.split("\n")
    assert lines[1] in (_config_file_str, _proggen_str)
    if lines[1] == _proggen_str:
        return "Original configuration was generated programmatically, no configuration to extract."
    config_text = "\n".join(lines[2:])
    p = INI()
    p.read_string(config_text)
    filename = p.get("admin", "basename") \
        if p.has_option("admin", "basename") else 'beastling'
    filename = Path(filename + '.conf')
    if filename.exists() and not overwrite:
        return "BEASTling configuration file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()

    p.write(filename)
    return "Wrote BEASTling configuration file %s.\n" % filename
Пример #45
0
    def test_extractor(self):
        config = self.make_cfg(
            [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")])
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        self.assertTrue(bool(self._extract(xmlfile)))

        config = self.make_cfg({
            'admin': {'basename': 'abcdefg'},
            'model': {
                'model': 'mk',
                'data': data_path('basic.csv').as_posix()}})
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        beastling.extractor.extract(xmlfile)
        p = Path('abcdefg.conf')
        self.assertTrue(p.exists())
        cfg = INI(interpolation=None)
        cfg.read(p.as_posix())
        remove(p)
        self.assertEqual(cfg['admin']['basename'], 'abcdefg')
        self.assertEqual(cfg['model']['model'], 'mk')

        fname = self.tmp.joinpath('test.xml')
        datafile = self.tmp.joinpath(('test.csv'))
        self.assertFalse(datafile.exists())
        with fname.open('w', encoding='utf8') as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
       beastling.extractor._config_file_str,
       beastling.extractor._data_file_str,
       datafile.as_posix()))
        res = self._extract(fname)
        self.assertIn(datafile.name, ''.join(res))
Пример #46
0
def test_extractor(config_factory, tmppath, data_dir):
    config = config_factory("admin", "mk", "embed_data")
    xml = beastling.beastxml.BeastXml(config)
    xmlfile = str(tmppath / "beastling.xml")
    xml.write_file(xmlfile)
    assert bool(_extract(xmlfile))

    config = config_factory({
            'admin': {'basename': 'abcdefg'},
            'model model': {
                'model': 'mk',
                'data': str(data_dir / 'basic.csv')}})
    xml = beastling.beastxml.BeastXml(config)
    xmlfile = str(tmppath / "beastling.xml")
    xml.write_file(xmlfile)
    beastling.extractor.extract(xmlfile)
    p = Path('abcdefg.conf')
    assert p.exists()
    cfg = INI(interpolation=None)
    cfg.read(p.as_posix())
    remove(p)
    assert cfg['admin']['basename'] == 'abcdefg'
    assert cfg['model model']['model'] == 'mk'

    fname = tmppath / 'test.xml'
    datafile = tmppath / 'test.csv'
    assert not datafile.exists()
    with fname.open('w', encoding='utf8') as fp:
        fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
       beastling.extractor._config_file_str,
       beastling.extractor._data_file_str,
       datafile.as_posix()))
    res = _extract(fname)
    assert datafile.name in ''.join(res)
Пример #47
0
def configure(cfgpath=None):
    """
    Configure lexibank.

    :return: a pair (config, logger)
    """
    cfgpath = Path(cfgpath) \
        if cfgpath else Path(user_config_dir(pylexibank.__name__)) / 'config.ini'
    if not cfgpath.exists():
        print("""
{0}

You seem to be running lexibank for the first time.
Your system configuration will now be written to a config file to be used
whenever lexibank is run lateron.
""".format(colored('Welcome to lexibank!', 'blue', attrs=['bold', 'reverse'])))
        if not cfgpath.parent.exists():
            cfgpath.parent.mkdir(parents=True)
        cfg = Config()
        cfg['paths'] = {k: get_path(src) for k, src in REPOS}
        cfg.write(cfgpath)
        print("""
Configuration has been written to:
{0}
You may edit this file to adapt to changes in your system or to reconfigure settings
such as the logging level.""".format(cfgpath.resolve()))
    else:
        cfg = Config.from_file(cfgpath)

    try:
        cfg.glottolog
    except (FileNotFoundError, ValueError):
        raise ParserError(
            'Misconfigured Glottolog path in {0}'.format(cfgpath))
    if not Path(cfg['paths']['concepticon']).exists():
        raise ParserError(
            'Misconfigured Concepticon path in {0}'.format(cfgpath))

    # Print the configuration directory for reference:
    print("Using configuration file at:")
    print(str(cfgpath) + '\n')
    return cfg
Пример #48
0
class API(UnicodeMixin):
    """An API base class to provide programmatic access to data in a git repository."""

    # A light-weight way to specifiy a default repository location (without having to
    # overwrite __init__)
    __repos_path__ = None

    def __init__(self, repos=None):
        self.repos = Path(repos or self.__repos_path__)

    def __unicode__(self):
        name = self.repos.resolve().name if self.repos.exists(
        ) else self.repos.name
        return '<{0} repository {1} at {2}>'.format(name,
                                                    git_describe(self.repos),
                                                    self.repos)

    def path(self, *comps):
        return self.repos.joinpath(*comps)

    @property
    def appdir(self):
        return self.path('app')

    @property
    def appdatadir(self):
        return self.appdir.joinpath('data')

    @classmethod
    def app_wrapper(cls, func):
        @wraps(func)
        def wrapper(args):
            api = cls(args.repos)
            if not api.appdatadir.exists() or '--recreate' in args.args:
                api.appdatadir.mkdir(exist_ok=True)
                args.api = api
                func(args)
            index = api.appdir / 'index.html'
            if index.exists():
                webbrowser.open(index.resolve().as_uri())

        return wrapper
Пример #49
0
def datasets(args):
    """
    cldf datasets <DIR> [ATTRS]

    List all CLDF datasets in directory <DIR>
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    d = Path(args.args[0])
    if not d.exists() or not d.is_dir():
        raise ParserError('%s is not an existing directory' % d)
    for fname in sorted(d.glob('*' + MD_SUFFIX), key=lambda p: p.name):
        md = Metadata(load(fname))
        data = fname.parent.joinpath(
            md.get_table().url or fname.name[:-len(MD_SUFFIX)])
        if data.exists():
            print(data)
            if len(args.args) > 1:
                maxlen = max(len(a) for a in args.args[1:])
                for attr in args.args[1:]:
                    if md.get(attr):
                        print('    %s %s' % ((attr + ':').ljust(maxlen + 1), md[attr]))
Пример #50
0
def lff2tree(tree=TREE, outdir=None, test=False):
    """
    - get mapping glottocode -> Languoid from old tree
    - assemble new directory tree
      - for each path component in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
      - for each language/dialect in lff/dff:
        - create new dir
        - copy info file from old tree (possibly updating the name) or
        - create info file
    - rm old tree
    - copy new tree
    """
    out = Path(outdir or build_path('tree'))
    if not out.parent.exists():
        out.parent.mkdir()
    if out.exists():
        rmtree(out)
    out.mkdir()
    old_tree = {l.id: l for l in walk_tree(tree)} if tree else {}

    languages = {}
    for lang in read_lff('language'):
        languages[lang.id] = lang
        lang2tree(lang, lang.lineage, out, old_tree)

    for lang in read_lff('dialect'):
        if not lang.lineage or lang.lineage[0][1] not in languages:
            raise ValueError('unattached dialect')

        lang2tree(
            lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)

    if not test:
        rmtree(TREE, ignore_errors=True)
        copytree(out, TREE)
Пример #51
0
 def _existing_file(fname):
     fname = Path(fname)
     assert fname.exists() and fname.is_file()
     return fname
Пример #52
0
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values.split("#")[0])
     if not path_.exists():
         raise argparse.ArgumentError(self, "file does not exist")
     setattr(namespace, self.dest, values)
Пример #53
0
    def validate(self, log=None, validators=None):
        validators = validators or []
        validators.extend(VALIDATORS)
        success = True
        default_tg = TableGroup.from_file(
            pkg_path('modules', '{0}{1}'.format(self.module, MD_SUFFIX)))
        for default_table in default_tg.tables:
            dtable_uri = default_table.common_props['dc:conformsTo']
            try:
                table = self[dtable_uri]
            except KeyError:
                log_or_raise('{0} requires {1}'.format(self.module, dtable_uri), log=log)
                success = False
                table = None

            if table:
                default_cols = {
                    c.propertyUrl.uri for c in default_table.tableSchema.columns
                    if c.required or c.common_props.get('dc:isRequiredBy')}
                cols = {
                    c.propertyUrl.uri for c in table.tableSchema.columns
                    if c.propertyUrl}
                table_uri = table.common_props['dc:conformsTo']
                for col in default_cols - cols:
                    log_or_raise('{0} requires column {1}'.format(table_uri, col), log=log)
                    success = False

        for table in self.tables:
            type_uri = table.common_props.get('dc:conformsTo')
            if type_uri:
                try:
                    TERMS.is_cldf_uri(type_uri)
                except ValueError:
                    success = False
                    log_or_raise('invalid CLDF URI: {0}'.format(type_uri), log=log)

            # FIXME: check whether table.common_props['dc:conformsTo'] is in validators!
            validators_ = []
            for col in table.tableSchema.columns:
                if col.propertyUrl:
                    col_uri = col.propertyUrl.uri
                    try:
                        TERMS.is_cldf_uri(col_uri)
                    except ValueError:
                        success = False
                        log_or_raise('invalid CLDF URI: {0}'.format(col_uri), log=log)
                for table_, col_, v_ in validators:
                    if (not table_ or table is self.get(table_)) and col is self.get((table, col_)):
                        validators_.append((col, v_))

            fname = Path(table.url.resolve(table._parent.base))
            if fname.exists():
                for fname, lineno, row in table.iterdicts(log=log, with_metadata=True):
                    for col, validate in validators_:
                        try:
                            validate(self, table, col, row)
                        except ValueError as e:
                            log_or_raise(
                                '{0}:{1}:{2} {3}'.format(fname.name, lineno, col.name, e),
                                log=log)
                            success = False
                if not table.check_primary_key(log=log):
                    success = False
            else:
                log_or_raise('{0} does not exist'.format(fname), log=log)
                success = False

        if not self.tablegroup.check_referential_integrity(log=log):
            success = False

        return success
Пример #54
0
        ))
        codes[denumber] = {
            'ID': '{0}-{1}'.format(fid, denumber),
            'Name': dename,
            'Parameter_ID': fid,
        }

    fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0]
    ds.write(
        ValueTable=values,
        LanguageTable=languages,
        ParameterTable=[{
            'ID': fid,
            'Name': fname,
            'Area': aname,
            'Authors': fauthors,
            'Url': 'http://wals.info/feature/' + fid}],
        CodeTable=codes.values(),
    )


if __name__ == '__main__':
    import sys

    db = create_engine(sys.argv[1])
    feature = sys.argv[2]
    out = Path('wals_{0}_cldf'.format(feature))
    if not out.exists():
        out.mkdir()
    make_cldf(db, out, feature)
Пример #55
0
 def from_file(cls, fname):
     res = cls()
     fname = Path(fname)
     if fname.exists():
         res.read(fname)
     return res