示例#1
0
def test_non_ascii():
    from clldutils.path import Path, path_component, as_unicode

    assert path_component(b'abc') == 'abc'

    p = Path(path_component('äöü')).joinpath(path_component('äöü'))
    assert isinstance(as_unicode(p), text_type)
    assert isinstance(as_unicode(p.name), text_type)
示例#2
0
    def from_path(cls, path):
        assert path.is_dir()
        cache = Cache()

        def cache_key(suffix):
            return '.'.join(['SoundClassModel', as_unicode(path.name), suffix])

        if cache_key('converter') not in cache:
            cache[cache_key('converter')] = _read_converter(path.joinpath('converter'))
        converter = cache[cache_key('converter')]

        if cache_key('scorer') not in cache:
            cache[cache_key('scorer')] = _read_scorer(path)
        scorer = cache[cache_key('scorer')]

        # read information from the info-file
        info = {k: ''
                for k in ['description', 'compiler', 'source', 'date', 'vowels', 'tones']}
        meta_pattern = re.compile('@(?P<key>[^:]+):\s*(?P<value>.*)')
        for line in read_lines(path.joinpath('INFO')):
            match = meta_pattern.match(line)
            if match:
                info[match.group('key')] = match.group('value')

        return cls(
            as_unicode(path.name), converter, scorer, info['vowels'], info['tones'], info)
示例#3
0
def upload_sources(args):
    """
    concepticon upload_sources path/to/cdstar/catalog
    """
    toc = ['# Sources\n']
    api = Concepticon(args.data)
    with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat:
        with Catalog(args.args[0],
                     cdstar_url=os.environ['CDSTAR_URL'],
                     cdstar_user=os.environ['CDSTAR_USER'],
                     cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
            for fname in sorted(api.data_path('sources').glob('*.pdf'),
                                key=lambda f: f.stem):
                clid = as_unicode(fname.stem)
                spec = lcat.get(clid)
                if not spec:
                    _, _, obj = list(
                        cat.create(fname, {'collection': 'concepticon'}))[0]
                    spec = lcat.add(clid, obj)

        for key in sorted(lcat.items):
            spec = lcat.get(key)
            toc.append('- [{0} [PDF {1}]]({2})'.format(
                key, format_size(spec['size']), spec['url']))

    readme(api.data_path('sources'), toc)
示例#4
0
def run_and_dump(name, obj, __checksum__=None, **kw):
    adapter = get_adapter(obj, interfaces.IOperation, name=name)
    if __checksum__:
        cached = path_from_checksum(__checksum__, outdir=CACHE_DIR)
        if cached.exists():
            return load(cached, cls=adapter.returns), __checksum__
    res = adapter(**kw)
    out = dump(res, outdir=CACHE_DIR)
    return res, as_unicode(out.stem)
示例#5
0
 def _create(self, path, metadata, object_class=None):
     mimetype = mimetypes.guess_type(path.as_posix(), strict=False)[0] \
         or 'application/octet-stream'
     maintype, subtype = mimetype.split('/')
     cls = object_class or getattr(media, maintype.capitalize(), media.File)
     file_ = cls(as_unicode(path.as_posix()))
     if file_.md5 not in self.md5_to_object:
         obj, md, bitstreams = file_.create_object(self.api, metadata)
         return True, self.add(obj, metadata=md)
     return False, self.md5_to_object[file_.md5][0]
示例#6
0
def upload_mediafiles(args):
    """
    Uploads media files from the passed directory to the CDSTAR server,
    if an object identified by metadata's 'name' exists it will be deleted first
    """
    supported_types = {
        'imagefile': ['png', 'gif', 'jpg', 'jpeg', 'tif', 'tiff'],
        'pdffile': ['pdf'],
        'moviefile': ['mp4']
    }

    if not args.args or not Path(args.args[0]).exists():
        print("Error: Upload path does not exist")
        exit(1)

    with get_catalog(args) as cat:
        name_map = {obj.metadata['name']: obj for obj in cat}

        for ifn in sorted(Path(args.args[0]).iterdir()):
            print(ifn.name)

            fmt = ifn.suffix[1:].lower()
            meta_type = None
            for t, suffixes in supported_types.items():
                if fmt in suffixes:
                    meta_type = t
                    break
            if meta_type is None:
                print('No supported media format - skipping {0}'.format(fmt))
                continue

            md = {
                'collection': 'amsd',
                'name': as_unicode(ifn.stem),
                'type': meta_type,
                'path': as_unicode(ifn.name)
            }

            # Create the new object
            for (fname, created, obj) in cat.create(str(ifn), md):
                args.log.info('{0} -> {1} object {2.id}'.format(
                    fname, 'new' if created else 'existing', obj))
示例#7
0
def _load_sql_dump(rel, log):
    dump = Path('glottolog-{0}.sql'.format(rel['version']))
    dbname = as_unicode(dump.stem)
    dbs = [
        l.split(b'|')[0].decode('utf8') for l in
        subprocess.check_output(['psql', '-l', '-t', '-A']).splitlines()]
    if dbname in dbs:
        log.warn('db {0} exists! Drop first to recreate.'.format(dump.name))
    else:
        if not dump.exists():
            _download_sql_dump(rel, log)
        subprocess.check_call(['createdb', dbname])
        subprocess.check_call(['psql', '-d', dbname, '-f', str(dump)])
        log.info('db {0} created'.format(dbname))
示例#8
0
def _load_sql_dump(rel, log):
    dump = Path('glottolog-{0}.sql'.format(rel['version']))
    dbname = as_unicode(dump.stem)
    dbs = [
        l.split(b'|')[0].decode('utf8') for l in subprocess.check_output(
            ['psql', '-l', '-t', '-A']).splitlines()
    ]
    if dbname in dbs:
        log.warn('db {0} exists! Drop first to recreate.'.format(dump.name))
    else:
        if not dump.exists():
            _download_sql_dump(rel, log)
        subprocess.check_call(['createdb', dbname])
        subprocess.check_call(['psql', '-d', dbname, '-f', str(dump)])
        log.info('db {0} created'.format(dbname))
示例#9
0
def build_langs_index(api, log):
    writer = get_langs_index(api, recreate=True).writer()
    for lang in api.languoids():
        writer.add_document(
            id=lang.id,
            name=lang.name,
            fname=as_unicode(lang.fname),
            iso=lang.iso,
            level=lang.level.name.decode() if PY2 else lang.level.name,
            macroarea=' '.join('{0}'.format(ma) for ma in lang.macroareas),
            country=' '.join('{0}'.format(c) for c in lang.countries),
            latitude=lang.latitude,
            longitude=lang.longitude,
            ini=lang.cfg.write_string(),
        )
    writer.commit()
示例#10
0
文件: cli.py 项目: lingpy/lingpy3
 def __call__(self, args):
     opargs, opkw = _args_kw(args.name)
     readargs, readkw = _args_kw(args.object)
     oname, if_, input_ = readargs
     input_ = text_type(input_)
     if Path(path_component(input_)).exists():
         # We heuristically interpret the input as filename, if a file with that name
         # exists.
         input_ = Path(path_component(input_))
     res = run(
         opargs[0],
         read(oname, getattr(interfaces, if_), input_, **readkw),
         **opkw)
     p = jsonlib.dump(res, outdir=Path(args.output))
     print('Result written to <%s>' % as_unicode(p))
     return p
示例#11
0
def upload_sources(args):
    """
    Compile sources and upload the result to GWDG CDSTAR instance.

    Notes
    -----
    CDSTAR authorisation information should be supplied in the form of
    environment variables:
        - CDSTAR_URL
        - CDSTAR_USER
        - CDSTAR_PWD

    Examples
    --------
    $ concepticon upload_sources path/to/cdstar/catalog
    """
    catalog_path = args.args[0] if args.args else os.environ["CDSTAR_CATALOG"]
    toc = ["# Sources\n"]
    api = Concepticon(args.repos)
    with SourcesCatalog(api.data_path("sources", "cdstar.json")) as lcat:
        with Catalog(
                catalog_path,
                cdstar_url=os.environ["CDSTAR_URL"],
                cdstar_user=os.environ["CDSTAR_USER"],
                cdstar_pwd=os.environ["CDSTAR_PWD"],
        ) as cat:
            for fname in sorted(api.data_path("sources").glob("*.pdf"),
                                key=lambda f: f.stem):
                clid = as_unicode(fname.stem)
                spec = lcat.get(clid)
                if not spec:
                    _, _, obj = list(
                        cat.create(fname, {"collection": "concepticon"}))[0]
                    lcat.add(clid, obj)

        for key in sorted(lcat.items):
            spec = lcat.get(key)
            toc.append("- [{0} [PDF {1}]]({2})".format(
                key, format_size(spec["size"]), spec["url"]))

    readme(api.data_path("sources"), toc)
    print(catalog_path)
示例#12
0
def upload_sources(args):
    """
    Compile sources and upload the result to GWDG CDSTAR instance.

    Notes
    -----
    CDSTAR authorisation information should be supplied in the form of
    environment variables:
        - CDSTAR_URL
        - CDSTAR_USER
        - CDSTAR_PWD

    Examples
    --------
    $ concepticon upload_sources path/to/cdstar/catalog
    """
    catalog_path = args.args[0] if args.args else os.environ['CDSTAR_CATALOG']
    toc = ['# Sources\n']
    api = Concepticon(args.repos)
    with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat:
        with Catalog(
                catalog_path,
                cdstar_url=os.environ['CDSTAR_URL'],
                cdstar_user=os.environ['CDSTAR_USER'],
                cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
            for fname in sorted(
                    api.data_path('sources').glob('*.pdf'), key=lambda f: f.stem):
                clid = as_unicode(fname.stem)
                spec = lcat.get(clid)
                if not spec:
                    _, _, obj = list(cat.create(fname, {'collection': 'concepticon'}))[0]
                    spec = lcat.add(clid, obj)

        for key in sorted(lcat.items):
            spec = lcat.get(key)
            toc.append('- [{0} [PDF {1}]]({2})'.format(
                key, format_size(spec['size']), spec['url']))

    readme(api.data_path('sources'), toc)
    print(catalog_path)
示例#13
0
 def from_path(cls, path):
     cache = Cache()
     cache_key = 'DiacriticsVowelsTones.{0}'.format(as_unicode(path.name))
     if cache_key not in cache:
         cache[cache_key] = read_dvt(path)
     return cls(as_unicode(path.name), *cache[cache_key])
示例#14
0
 def cache_key(suffix):
     return '.'.join(['SoundClassModel', as_unicode(path.name), suffix])
示例#15
0
    def test_non_ascii(self):
        from clldutils.path import Path, path_component, as_unicode

        p = Path(path_component('äöü')).joinpath(path_component('äöü'))
        self.assertIsInstance(as_unicode(p), text_type)
        self.assertIsInstance(as_unicode(p.name), text_type)
示例#16
0
文件: log.py 项目: lingpy/lingpy3
def file_written(fname, logger=None):
    logger = logger or get_logger()
    logger.info("File created at <{0}>.".format(as_unicode(fname)))
示例#17
0
def filter_hidden(fname):
    return not as_unicode(fname.stem).startswith('.')
示例#18
0
文件: cache.py 项目: xrotwang/lingpy3
 def keys(self):
     for p in self._dir.iterdir():
         yield as_unicode(p.name)