Пример #1
0
def get_config(p):
    """Read a config file.

    :return: dict of ('section.option', value) pairs.
    """
    if not isinstance(p, Path):
        p = Path(p)
    cfg = {}

    parser = ConfigParser()
    parser.readfp(p.open(encoding='utf8'))

    for section in parser.sections():
        getters = {
            'int': partial(parser.getint, section),
            'boolean': partial(parser.getboolean, section),
            'float': partial(parser.getfloat, section),
            'list': lambda option: parser.get(section, option).split(),
        }
        default = partial(parser.get, section)
        for option in parser.options(section):
            type_ = option.rpartition('_')[2] if '_' in option else None
            value = getters.get(type_, default)(option)
            cfg['{0}.{1}'.format(section, option)] = value

    return cfg
Пример #2
0
 def __init__(self, repos=None):
     self.repos = (Path(repos)
                   if repos else Path(__file__).parent.parent).resolve()
     self.tree = self.repos / 'languoids' / 'tree'
     if not self.tree.exists():
         raise ValueError('repos dir %s missing tree dir: %s' %
                          (self.repos, self.tree))
Пример #3
0
 def create(self, req, filename=None, verbose=True, outfile=None):
     with safe_overwrite(outfile or self.abspath(req)) as tmp:
         if self.rdf:
             # we do not create archives with a readme for rdf downloads, because each
             # RDF entity points to the dataset and the void description of the dataset
             # covers all relevant metadata.
             #
             # TODO: write test for the file name things!?
             #
             with closing(
                     GzipFile(filename=Path(tmp.stem).stem,
                              fileobj=tmp.open('wb'))) as fp:
                 self.before(req, fp)
                 for i, item in enumerate(
                         page_query(self.query(req), verbose=verbose)):
                     self.dump(req, fp, item, i)
                 self.after(req, fp)
         else:
             with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                 if not filename:
                     fp = self.get_stream()
                     self.before(req, fp)
                     for i, item in enumerate(
                             page_query(self.query(req), verbose=verbose)):
                         self.dump(req, fp, item, i)
                     self.after(req, fp)
                     zipfile.writestr(self.name, self.read_stream(fp))
                 else:  # pragma: no cover
                     zipfile.write(Path(filename).as_posix(), self.name)
                 zipfile.writestr(
                     'README.txt',
                     format_readme(
                         req,
                         req.db.query(Dataset).first()).encode('utf8'))
Пример #4
0
def create_repos(dir_):
    tsammalexdata = dir_.join('tsammalexdata')
    tsammalexdata.mkdir()
    data = tsammalexdata.join('data')
    data.mkdir()

    with data.join('test.csv').open('w', encoding='utf8') as fp:
        fp.write("""\
a,b,c
1,2,3
4,5,6""")

    with data.join('distribution.csv').open('w', encoding='utf8') as fp:
        fp.write("id,coregions__ids,countries_ids")

    test_eco_path = fixture_path('test_ecoregions.json')
    eco_path = data.join('ecoregions.json')

    copy(Path(test_eco_path), Path(eco_path))

    external = data.join('external')
    external.mkdir()
    with external.join('test.csv').open('w', encoding='utf8') as fp:
        fp.write("""\
a,b,c
1,2,3
4,5,6""")
    external.join('gbif').mkdir()
    occurrences = fixture_path('abelmoschusesculentus.json')

    copy(Path(occurrences), Path(external.join('gbif', occurrences.name)))

    return dir_
Пример #5
0
def test_catalogue_of_life(tmpdir):
    data = fixtures('data_providers', 'catalogueoflife')
    id_ = '9249d9473aac5c8e99fb9d758ced91ec'
    repos = create_repos(tmpdir)

    with patch('pytsammalex.util.requests',
               MockRequests(content=data['identify'])):
        prov = CatalogueOfLife(Path(repos))
        assert (prov.identify('x') == id_)

    with patch('pytsammalex.util.requests',
               MockRequests(content=data['metadata'])):
        prov = CatalogueOfLife(Path(repos))
        md = prov.cached_metadata('test', id_)
        taxon = {}
        prov.update(taxon, md)
        assert (taxon == {
            'catalogueoflife_url':
            'http://www.catalogueoflife.org/col/'
            'browse/tree/id/'
            '9249d9473aac5c8e99fb9d758ced91ec',
            'class':
            'Mammalia',
            'family':
            'Felidae',
            'kingdom':
            'Animalia',
            'order':
            'Carnivora',
            'phylum':
            'Chordata'
        })
Пример #6
0
def llod_func(args):  # pragma: no cover
    """Create an RDF dump and compute some statistics about it."""
    tmp = Path(mkdtemp())
    count_rsc = 0
    count_triples = 0

    tmp_dump = tmp.joinpath('rdf.n3')
    with open(as_posix(tmp_dump), 'w') as fp:
        for rsc in RESOURCES:
            args.log.info('Resource type %s ...' % rsc.name)
            try:
                q = DBSession.query(rsc.model)
            except InvalidRequestError:
                args.log.info('... skipping')
                continue
            for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
                graph = get_graph(obj, args.env['request'], rsc.name)
                count_triples += len(graph)
                count_rsc += 1
                fp.write(n3(graph, with_head=count_rsc == 1))
            args.log.info('... finished')

    # put in args.data_file('..', 'static', 'download')?
    md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
    md.update(count_links(as_posix(tmp_dump)))
    jsonlib.dump(md, args.data_file('rdf-metadata.json'))
    print(md)

    dataset = Dataset.first()
    rdf_dump = args.module_dir.joinpath(
        'static', 'download', '%s-dataset.n3' % dataset.id)
    tmp_dump.copy(rdf_dump)
    check_call('gzip -f %s' % rdf_dump, shell=True)
    print(str(rdf_dump))
Пример #7
0
    def to_cldf(self, dest, mdname='cldf-metadata.json'):
        """
        Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`.

        :param dest:
        :param mdname:
        :return: path of the metadata file
        """
        dest = Path(dest)
        if not dest.exists():
            dest.mkdir()

        data = self.read()

        if data[self.source_table_name]:
            sources = Sources()
            for src in data[self.source_table_name]:
                sources.add(Source(
                    src['genre'],
                    src['id'],
                    **{k: v for k, v in src.items() if k not in ['id', 'genre']}))
            sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib'))

        for table_type, items in data.items():
            try:
                table = self.dataset[table_type]
                table.common_props['dc:extent'] = table.write(
                    [self.retranslate(table, item) for item in items],
                    base=dest)
            except KeyError:
                assert table_type == self.source_table_name, table_type
        return self.dataset.write_metadata(dest / 'cldf-metadata.json')
Пример #8
0
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, 'path does not exist')
     if not path_.is_dir():
         raise argparse.ArgumentError(self, 'path is no directory')
     setattr(namespace, self.dest, path_)
Пример #9
0
Файл: util.py Проект: clld/apics
def wals_detail_html(context=None, request=None, **kw):
    wals_data = Path(apics.__file__).parent.joinpath(
        'static', 'wals', '%sA.json' % context.parameter.wals_id)
    if not wals_data.exists():
        raise HTTPNotFound()

    wals_data = jsonlib.load(wals_data)
    value_map = {}

    for layer in wals_data['layers']:
        for feature in layer['features']:
            feature['properties']['icon'] = request.registry.getUtility(
                IIcon, name=feature['properties']['icon']).url(request)
            feature['properties']['popup'] = external_link(
                'http://wals.info/languoid/lect/wals_code_'
                + feature['properties']['language']['id'],
                label=feature['properties']['language']['name'])
        value_map[layer['properties']['number']] = {
            'icon': layer['features'][0]['properties']['icon'],
            'name': layer['properties']['name'],
            'number': layer['properties']['number'],
        }

    return {
        'wals_data': wals_data,
        'wals_map': WalsMap(
            context.parameter, request, data=wals_data, value_map=value_map),
        'apics_map': ApicsWalsMap(
            context.parameter, request, data=wals_data, value_map=value_map)}
Пример #10
0
    def create(self, path, metadata, filter_=filter_hidden, object_class=None):
        """
        Create objects in CDSTAR and register them in the catalog.

        Note that we guess the mimetype based on the filename extension, using
        `mimetypes.guess_type`. Thus, it is the caller's responsibility to add custom or
        otherwise uncommon types to the list of known types using `mimetypes.add_type`.

        :param path:
        :param metadata:
        :param filter_:
        :return:
        """
        path = Path(path)
        if path.is_file():
            fnames = [path]
        elif path.is_dir():
            fnames = list(walk(path, mode='files'))
        else:
            raise ValueError(
                'path must be a file or directory')  # pragma: no cover
        for fname in fnames:
            if not filter_ or filter_(fname):
                created, obj = self._create(fname,
                                            metadata,
                                            object_class=object_class)
                yield fname, created, obj
Пример #11
0
def langsearch(args):
    """Search Glottolog languoids

    glottolog --repos=. langsearch "QUERY"
    """
    def highlight(text):
        res, i = '', 0
        for m in re.finditer('\[\[(?P<m>[^\]]+)\]\]', text):
            res += text[i:m.start()]
            res += colored(m.group('m'), 'red', attrs=['bold'])
            i = m.end()
        res += text[i:]
        return res + '\n'

    count, results = fts.search_langs(args.repos, args.args[0])
    cwd = os.getcwd()
    print('{} matches'.format(count))
    for res in results:
        try:
            p = Path(res.fname).relative_to(Path(cwd))
        except ValueError:
            p = res.fname
        sprint('{0.name} [{0.id}] {0.level}'.format(res),
               color=None,
               attrs=['bold'])
        sprint(p, color='green')
        sprint(highlight(res.highlights) if res.highlights else '')
    print('{} matches'.format(count))
Пример #12
0
def iso2codes(args):
    """
    Map ISO codes to the list of all Glottolog languages and dialects subsumed "under" it.
    """
    nodes = list(args.repos.languoids())

    res = {}
    for node in nodes:
        if node.iso:
            res[node.id] = (node.iso, set())

    for node in nodes:
        if node.level == args.repos.languoid_levels.family or node.id in res:
            continue
        for nid in res:
            matched = False
            for l in node.lineage:
                if l[1] == nid:
                    res[nid][1].add(node.id)
                    matched = True
                    break
            if matched:
                break

    outdir = Path('.') if not args.args else Path(args.args[0])
    with UnicodeWriter(outdir / 'iso2glottocodes.csv') as writer:
        writer.writerow(['iso', 'glottocodes'])
        for gc, (iso, gcs) in res.items():
            writer.writerow([iso, ';'.join([gc] + list(gcs))])
Пример #13
0
def download_tables(outdir=None):
    match = ZIP_NAME_PATTERN.search(urlopen(BASE_URL + 'download.asp').read())
    if not match:
        raise ValueError('no matching zip file name found')  # pragma: no cover
    target = Path(outdir or '.').joinpath(match.group('name'))
    urlretrieve(BASE_URL + match.group('name'), target.as_posix())
    return target
Пример #14
0
def get_dataset(fname=None):
    """Load a CLDF dataset.

    Load the file as `json` CLDF metadata description file, or as metadata-free
    dataset contained in a single csv file.

    The distinction is made depending on the file extension: `.json` files are
    loaded as metadata descriptions, all other files are matched against the
    CLDF module specifications. Directories are checked for the presence of
    any CLDF datasets in undefined order of the dataset types.

    Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    pycldf.Dataset
    """
    if fname is None:
        fname = repository
    else:
        fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError('{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #15
0
    def __init__(self, fname):
        """
        A `Database` instance is initialized with a file path.

        :param fname: Path to a file in the file system where the db is to be stored.
        """
        self.fname = Path(fname)
Пример #16
0
def test_data_url_from_string():
    from clldutils.path import Path

    assert data_url('ü') == 'data:application/octet-stream;base64,w7w='
    assert data_url(Path(__file__)).startswith('data:')
    assert data_url(Path(__file__),
                    mimetype='text/plain').startswith('data:text/plain')
Пример #17
0
def stats(args):
    """
    cldf stats <DATASET>

    Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to
    - a CLDF metadata file
    - a CLDF core data file
    - a CLDF zip archive
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.zip':
        ds = Dataset.from_zip(fname)
    elif fname.name.endswith(MD_SUFFIX):
        ds = Dataset.from_metadata(fname)
    else:
        ds = Dataset.from_file(fname)
    print(fname)
    stats_ = ds.stats
    print("""
Name: %s
Different languages: %s
Different parameters: %s
Rows: %s
""" % (
        ds.name,
        len(stats_['languages']),
        len(stats_['parameters']),
        stats_['rowcount']
    ))
Пример #18
0
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, "path does not exist")
     if not path_.is_dir():
         raise argparse.ArgumentError(self, "path is no directory")
     setattr(namespace, self.dest, path_)
Пример #19
0
    def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False):
        outdir = Path(outdir)
        if not outdir.exists():
            raise ValueError(outdir.as_posix())

        close = False
        if archive:
            if isinstance(archive, Archive):
                container = archive
            else:
                container = Archive(outdir.joinpath(self.name + '.zip'), mode='w')
                close = True
        else:
            container = outdir

        fname = Path(outdir).joinpath(self.name + suffix)
        if fname.suffix in TAB_SUFFIXES:
            self.table.dialect.delimiter = '\t'

        with UnicodeWriter(
                None if isinstance(container, Archive) else fname,
                delimiter=self.table.dialect.delimiter) as writer:
            writer.writerow(self.fields)
            for row in self.rows:
                writer.writerow(row.to_list())

        if isinstance(container, Archive):
            container.write_text(writer.read(), fname.name)
        self.table.url = fname.name

        self.metadata.write(Dataset.filename(fname, 'metadata'), container)
        ids = self._cited_sources if cited_sources_only else None
        self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids)
        if close:
            container.close()
Пример #20
0
class Cache(object):
    def __init__(self, dir_=None):
        self._dir = Path(dir_ or CACHE_DIR)
        if not self._dir.exists():
            self._dir.mkdir(parents=True)  # pragma: no cover

    def _path(self, key):
        return self._dir.joinpath(path_component(key))

    def __len__(self):
        return len(list(self.keys()))

    def __getitem__(self, item):
        with self._path(item).open('rb') as fp:
            return pickle.load(fp)

    def __setitem__(self, key, value):
        with self._path(key).open('wb') as fp:
            pickle.dump(value, fp)

    def __delitem__(self, key):
        remove(self._path(key))

    def __contains__(self, item):
        return self._path(item).exists()

    def keys(self):
        for p in self._dir.iterdir():
            yield as_unicode(p.name)

    def clear(self):
        for key in self.keys():
            remove(self._path(key))
Пример #21
0
def cldf(args):
    """
    Create CLDF datasets from the raw data for a dataset.

    lexibank --glottolog-repos PATH --concepticon-repos PATH cldf [DATASET_ID]
    """
    if not args.glottolog_repos or not Path(args.glottolog_repos).exists():
        raise ParserError('Invalid glottolog repository path given')

    if not args.concepticon_repos or not Path(args.concepticon_repos).exists():
        raise ParserError('Invalid concepticon repository path given')

    # FIXME: get dict of all glottolog langs right here, and attach to datasets!
    try:
        languoids = load('glottolog')
    except ValueError:
        languoids = {
            l.id: l
            for l in Glottolog(args.glottolog_repos).languoids()
        }
        dump(languoids, 'glottolog')

    def _cldf(ds, **kw):
        ds.glottolog_languoids = languoids
        ds.cldf(**kw)
        ds.write_cognates()

    with_dataset(args, _cldf)
Пример #22
0
def download_tables(outdir=None):
    match = ZIP_NAME_PATTERN.search(_open('code_tables/download_tables').read().decode('utf-8-sig'))
    if not match:
        raise ValueError('no matching zip file name found')  # pragma: no cover
    target = Path(outdir or '.').joinpath(match.group('name').split('/')[-1])
    with target.open('wb') as fp:
        fp.write(_open(match.group('name')).read())
    return target
Пример #23
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
Пример #24
0
 def test_init3(self):  # with kw check=True
     bad_file = Path(test_data('bad_file.tsv'))
     assert_raises(ValueError, LexStat, bad_file.as_posix())
     ls = self._make_one(bad_file.as_posix(), check=True, apply_checks=True)
     assert hasattr(ls, 'errors')
     cleaned = bad_file.parent.joinpath(bad_file.name + '_cleaned.tsv')
     self.assertTrue(cleaned.exists())
     os.remove(cleaned.as_posix())
     assert_raises(ValueError, LexStat, {0: ['concept', 'language', 'ipa']})
Пример #25
0
def jsondump(obj, fname, log=None):
    fname = Path(fname)
    if fname.exists():
        d = jsonlib.load(fname)
        d.update(obj)
        obj = d
    jsonlib.dump(sorted_obj(obj), fname, indent=4)
    log_dump(fname, log=log)
    return obj
Пример #26
0
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #27
0
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
Пример #28
0
def copy_downloads(app, pattern='*'):
    dl_dir = app.src.joinpath(app.name, 'static', 'download')
    require.files.directory(dl_dir, use_sudo=True, mode="777")
    local_dl_dir = Path(import_module(app.name).__file__).parent.joinpath('static', 'download')
    for f in local_dl_dir.glob(pattern):
        target = dl_dir.joinpath(f.name)
        create_file_as_root(target, open(f.as_posix()).read())
        sudo('chown %s:%s %s' % (app.name, app.name, target))
    require.files.directory(dl_dir, use_sudo=True, mode="755")
Пример #29
0
def update(path, default=None, load_kw=None, **kw):
    path = Path(path)
    if not path.exists():
        if default is None:
            raise ValueError('path does not exist')
        res = default
    else:
        res = load(path, **(load_kw or {}))
    yield res
    dump(res, path, **kw)
Пример #30
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":", 1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #31
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":",1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #32
0
def create_archive(args):
    rels = get_release_config()
    for section in rels.sections():
        _load_sql_dump(rels[section], args.log)
    out = Path('archive')
    if args.args:
        out = Path(args.args[0])
    static_archive.create(
        [rels.get(sec, 'version') for sec in rels.sections()], out)
    args.log.info('static archive created in {0}'.format(out))
Пример #33
0
 def format_data_file(self, filename):
     """
     Return an ElementTree node corresponding to a comment containing
     the text of the specified data file.
     """
     header = "BEASTling embedded data file: %s" % filename
     fp = Path(filename).open("r")
     data_block = "\n".join([header, fp.read()])
     fp.close()
     return ET.Comment(data_block)
Пример #34
0
 def from_file(cls, path, **keywords):
     """
     Function loads a concept list outside the Concepticon collection.
     """
     path = Path(path)
     assert path.exists()
     attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()}
     attrs.update(id=path.stem,
                  items=keywords.get('items', len(read_dicts(path))),
                  year=keywords.get('year', 0))
     return cls(api=path, **attrs)
Пример #35
0
def test_json_data(tmpdir):
    tmp_ = create_repos(tmpdir)

    with JsonData('test.json', repos=Path(tmp_)) as jdat:
        jdat['a'] = 1

    assert (data_file('test.json', repos=Path(tmp_)).exists() is True)

    with JsonData('test.json', repos=Path(tmp_)) as jdat:
        assert (len(jdat) == 1)
        assert (jdat['a'] == 1)
Пример #36
0
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
Пример #37
0
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
Пример #38
0
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Пример #39
0
def curate(args):  # pragma: no cover
    datasets = {ds.id: ds for ds in args.cfg.datasets}

    class TheCompleter(Completer):
        def get_completions(self, document, complete_event):
            word_before_cursor = document.get_word_before_cursor(WORD=True)
            words = document.text_before_cursor.split()
            if words and words[0] in commands:
                for ds in fuzzyfinder(word_before_cursor, datasets):
                    yield Completion(ds,
                                     start_position=-len(word_before_cursor))
            else:  # elif word_before_cursor:
                for c in fuzzyfinder(word_before_cursor, commands):
                    yield Completion(c,
                                     start_position=-len(word_before_cursor))

    user_input = []
    appdir = Path(user_data_dir('lexibank'))
    if not appdir.exists():
        appdir.mkdir(parents=True)

    while not user_input or user_input[0] != 'quit':
        try:
            user_input = prompt(
                u'lexibank-curator> ',
                history=FileHistory(str(appdir / 'history.txt')),
                auto_suggest=AutoSuggestFromHistory(),
                completer=TheCompleter(),
            ).split()
        except EOFError:
            break
        except KeyboardInterrupt:
            break

        if len(user_input) == 0:
            continue  # ignore empty commands
        if user_input[0] not in commands:
            print(colored('Invalid command!', 'red'))
            continue
        if len(user_input) > 1 and user_input[1] not in datasets:
            print(colored('Invalid dataset!', 'red'))
            continue

        args.args = user_input[1:]
        try:
            s = time()
            commands[user_input[0]](args)
            print('[{0:.3f}]'.format(time() - s))
        except Exception as e:
            traceback.print_exc()
            print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red'))

    print('see ya!')
Пример #40
0
Файл: util.py Проект: clld/clld
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
Пример #41
0
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
Пример #42
0
def main():  # pragma: no cover
    pkg_dir = Path(glottolog3.__file__).parent
    parser = ArgumentParserWithLogging('glottolog3')
    parser.add_argument(
        '--repos',
        help="path to glottolog data repository",
        type=Glottolog,
        default=Glottolog(
            Path(glottolog3.__file__).parent.parent.parent.joinpath(
                'glottolog')))
    parser.add_argument('--pkg-dir', help=argparse.SUPPRESS, default=pkg_dir)
    sys.exit(parser.main())
Пример #43
0
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
Пример #44
0
def upgrade():
    mappings = Path(
        __file__).parent.joinpath('..', '..', 'data', 'apics_phoible.json').as_posix()
    with open(mappings) as fp:
        mappings = json.load(fp)

    conn = op.get_bind()
    for k, v in mappings.items():
        d = conn.execute("select jsondata from parameter where id = %s", (k,)).fetchone()
        d = json.loads(d[0])
        d.update(phoible=v)
        conn.execute(
            "update parameter set jsondata = %s where id = %s", (json.dumps(d), k))
Пример #45
0
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        p = Path(dir_).joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            if isinstance(content, text_type):
                content = content.encode('utf8')
            fp.write(content)
        return p.as_posix()
Пример #46
0
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        if not isinstance(dir_, Path):
            dir_ = Path(dir_)
        p = dir_.joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            fp.write(content)
        return p.as_posix()
Пример #47
0
 def test_generate_extract(self):
     xml = self.tmp_path('test.xml')
     self._run_main('-v -o {0} {1}'.format(xml.as_posix(), config_path('basic')))
     self.assertTrue(xml.exists())
     # Overwriting existing files must be specified explicitely:
     self._run_main('-o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=4)
     self._run_main('--overwrite -o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=0)
     tcfg = Path('beastling_test.conf')
     self._run_main('--extract {0}'.format(xml.as_posix()))
     self.assertTrue(tcfg.exists())
     remove(tcfg)
Пример #48
0
def dependencies_graph(imps):
    deps = dict([((f1, f2), v) for (v, f1, f2) in imps if v > 0.0])
    V = set([f for fs in deps.iterkeys() for f in fs])
    G = dict([(k, v) for (k, v) in deps.items() if v > 0.0])
    MSTs = [mst(G, x) for x in V]
    (mv, H) = max([(sum(H.values()), H) for H in MSTs])
    #W = dict([(y, 1.0-v) for ((x, y), v) in H.iteritems()])
    #sav(dot(H, V), 'grambank_mst.gv')
    path = Path(grambank.__file__).parent.joinpath('static', 'dependencies.gv')
    with open(path.as_posix(), 'w') as fp:
        fp.write(dot(H, V))

    return (H, V) #dot(H, V)
Пример #49
0
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
Пример #50
0
def link(args):
    """\
Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or
CONCEPTICON_ID is given, the other is added.

concepticon link <concept-list>
"""
    conceptlist = Path(args.args[0])
    if not conceptlist.exists() or not conceptlist.is_file():
        conceptlist = data_path('conceptlists', args.args[0])
        if not conceptlist.exists() or not conceptlist.is_file():
            raise ParserError('no file %s found' % args.args[0])

    rewrite(conceptlist, Linker(conceptlist.stem))
Пример #51
0
def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')
    print(dls)

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Пример #52
0
    def from_file(cls, bibFile, encoding='utf8', lowercase=False):
        """Create bibtex database from a bib-file.

        @param bibFile: path of the bibtex-database-file to be read.
        """
        if not isinstance(bibFile, Path):
            bibFile = Path(bibFile)
        if bibFile.exists():
            with bibFile.open(encoding=encoding) as fp:
                content = fp.read()
        else:
            content = ''

        return cls((Record.from_string('@' + m, lowercase=lowercase)
                    for m in re.split('^\s*@', content, 0, re.MULTILINE)))
Пример #53
0
def load_alias(_path):
    """
    Alias are one-character sequences which we can convert on a step-by step
    basis by applying them successively to all subsegments of a segment.
    """
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)

    alias = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                alias[eval('"' + source + '"')] = eval('r"' + target + '"')
    return alias
Пример #54
0
    def test_Matrix(self):
        from wals3.adapters import Matrix

        p = Path(mktemp())
        assert not p.exists()

        class TestMatrix(Matrix):
            def abspath(self, req):
                return p

            def query(self, req):
                return Matrix.query(self, req).filter(Language.pk < 100)

        m = TestMatrix(Language, "wals3", description="Feature values CSV")
        m.create(self.env["request"], verbose=False)
        assert p.exists()
        remove(p)
Пример #55
0
def write_config(comment_text, overwrite):
    lines = comment_text.split("\n")
    assert lines[1] in (_config_file_str, _proggen_str)
    if lines[1] == _proggen_str:
        return "Original configuration was generated programmatically, no configuration to extract."
    config_text = "\n".join(lines[2:])
    p = INI()
    p.read_string(config_text)
    filename = p.get("admin", "basename") \
        if p.has_option("admin", "basename") else 'beastling'
    filename = Path(filename + '.conf')
    if filename.exists() and not overwrite:
        return "BEASTling configuration file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()

    p.write(filename)
    return "Wrote BEASTling configuration file %s.\n" % filename
Пример #56
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexiRumahLanguage),
            glottolog_repos=glottolog_repos,
            isolates_icon='tcccccc')
Пример #57
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(GzipFile(
                    filename=Path(tmp.stem).stem, fileobj=tmp.open('wb')
            )) as fp:
                self.before(req, fp)
                for i, item in enumerate(page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (
                            len(req.dataset.name)
                            + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset, req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
Пример #58
0
    def test_extractor(self):
        config = self.make_cfg(
            [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")])
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        self.assertTrue(bool(self._extract(xmlfile)))

        config = self.make_cfg({
            'admin': {'basename': 'abcdefg'},
            'model': {
                'model': 'mk',
                'data': data_path('basic.csv').as_posix()}})
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        beastling.extractor.extract(xmlfile)
        p = Path('abcdefg.conf')
        self.assertTrue(p.exists())
        cfg = INI(interpolation=None)
        cfg.read(p.as_posix())
        remove(p)
        self.assertEqual(cfg['admin']['basename'], 'abcdefg')
        self.assertEqual(cfg['model']['model'], 'mk')

        fname = self.tmp.joinpath('test.xml')
        datafile = self.tmp.joinpath(('test.csv'))
        self.assertFalse(datafile.exists())
        with fname.open('w', encoding='utf8') as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
       beastling.extractor._config_file_str,
       beastling.extractor._data_file_str,
       datafile.as_posix()))
        res = self._extract(fname)
        self.assertIn(datafile.name, ''.join(res))
Пример #59
0
def _str_path(path, mkdir=False):
    """Get a file-system path as text_type, suitable for passing into io.open.

    Parameters
    ----------
    path : {text_type, Path}
        A fs path either as Path instance or as text_type.
    mkdir : bool (default=False)
        If True, create the directories within the path.

    Returns
    -------
    path : text_type
        The path as text_type.
    """
    res = Path(path_component(path))
    if mkdir and res.parent and not res.parent.exists():
        res.parent.mkdir(parents=True)
    return res.as_posix()