def __init__(self, name, dir_=None, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ INI.__init__(self, kw, allow_no_value=True) self.name = name config_dir = Path(dir_ or CONFIG_DIR) if default: if isinstance(default, text_type): self.read_string(default) #elif isinstance(default, (dict, OrderedDict)): # self.read_dict(default) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): self.write(cfg_path.as_posix()) self.path = cfg_path
def __init__(self, name, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ self.name = name self.default = default config_dir = Path(kw.pop('config_dir', None) or DIR) RawConfigParser.__init__(self, kw, allow_no_value=True) if self.default: if PY3: fp = io.StringIO(self.default) else: fp = io.BytesIO(self.default.encode('utf8')) self.readfp(fp) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): with open(cfg_path.as_posix(), 'w') as fp: self.write(fp) self.path = cfg_path
def get_ini(fname, **kw): fname = Path(fname) if not fname.exists(): # For old-style (<=3.4) repository layout we ship the config data with pyglottolog: name = fname.name if fname.name != 'hhtype.ini' else 'document_types.ini' fname = Path(__file__).parent / name assert fname.exists() return INI.from_file(fname, **kw)
def new_dataset(args): """ lexibank new-dataset OUTDIR [ID] """ if not args.args: raise ParserError('you must specify an existing directory') outdir = Path(args.args.pop(0)) if not outdir.exists(): raise ParserError('you must specify an existing directory') id_pattern = re.compile('[a-z_0-9]+$') md = {} if args.args: md['id'] = args.args.pop(0) else: md['id'] = input('Dataset ID: ') while not id_pattern.match(md['id']): print( 'dataset id must only consist of lowercase ascii letters, digits and _ (underscore)!' ) md['id'] = input('Dataset ID: ') outdir = outdir / md['id'] if not outdir.exists(): outdir.mkdir() for key in ['title', 'url', 'license', 'conceptlist', 'citation']: md[key] = input('Dataset {0}: '.format(key)) # check license! # check conceptlist! for path in Path( pylexibank.__file__).parent.joinpath('dataset_template').iterdir(): if path.is_file(): if path.suffix in ['.pyc']: continue # pragma: no cover target = path.name content = read_text(path) if '+' in path.name: target = re.sub('\+([a-z]+)\+', lambda m: '{' + m.groups()[0] + '}', path.name).format(**md) if target.endswith('_tmpl'): target = target[:-5] content = content.format(**md) write_text(outdir / target, content) else: target = outdir / path.name if target.exists(): shutil.rmtree(str(target)) shutil.copytree(str(path), str(target)) del md['id'] jsonlib.dump(md, outdir / 'metadata.json', indent=4)
def link(args): """\ Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. concepticon link <concept-list> """ conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem))
def link(args): """ Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. concepticon link <concept-list> """ api = Concepticon(args.data) conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = api.data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ # FIXME: instead of removing trees, we should just move the current one # from outdir to build, and then recreate in outdir. builddir = Path(builddir) if builddir else build_path('tree') old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} out = Path(outdir or tree) if not out.parent.exists(): out.parent.mkdir() if out.exists(): if builddir.exists(): try: rmtree(builddir) except: # pragma: no cover pass if builddir.exists(): # pragma: no cover raise ValueError('please remove %s before proceeding' % builddir) # move the old tree out of the way shutil.move(out.as_posix(), builddir.as_posix()) out.mkdir() lffs = lffs or {} languages = {} for lang in read_lff(Level.language, fp=lffs.get(Level.language)): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError('unattached dialect') # pragma: no cover lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
class Cache(object): def __init__(self, dir_=None): self._dir = Path(dir_ or CACHE_DIR) if not self._dir.exists(): self._dir.mkdir(parents=True) # pragma: no cover def _path(self, key): return self._dir.joinpath(path_component(key)) def __len__(self): return len(list(self.keys())) def __getitem__(self, item): with self._path(item).open('rb') as fp: return pickle.load(fp) def __setitem__(self, key, value): with self._path(key).open('wb') as fp: pickle.dump(value, fp) def __delitem__(self, key): remove(self._path(key)) def __contains__(self, item): return self._path(item).exists() def keys(self): for p in self._dir.iterdir(): yield as_unicode(p.name) def clear(self): for key in self.keys(): remove(self._path(key))
def stats(args): """ cldf stats <DATASET> Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to - a CLDF metadata file - a CLDF core data file - a CLDF zip archive """ if len(args.args) < 1: raise ParserError('not enough arguments') fname = Path(args.args[0]) if not fname.exists() or not fname.is_file(): raise ParserError('%s is not an existing directory' % fname) if fname.suffix == '.zip': ds = Dataset.from_zip(fname) elif fname.name.endswith(MD_SUFFIX): ds = Dataset.from_metadata(fname) else: ds = Dataset.from_file(fname) print(fname) stats_ = ds.stats print(""" Name: %s Different languages: %s Different parameters: %s Rows: %s """ % ( ds.name, len(stats_['languages']), len(stats_['parameters']), stats_['rowcount'] ))
class SourcesCatalog(object): def __init__(self, path): self.path = Path(path) self.items = jsonlib.load(self.path) if self.path.exists() else {} def __contains__(self, item): return item in self.items def get(self, item): return self.items.get(item) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): jsonlib.dump(OrderedDict([(k, OrderedDict([i for i in sorted(v.items())])) for k, v in sorted(self.items.items())]), self.path, indent=4) def add(self, key, obj): bsid = obj.bitstreams[0].id self.items[key] = OrderedDict([ ('url', 'https://cdstar.shh.mpg.de/bitstreams/{0}/{1}'.format( obj.id, bsid)), ('objid', obj.id), ('original', bsid), ('size', obj.bitstreams[0].size), ('mimetype', obj.bitstreams[0].mimetype), ]) return self.items[key]
def to_cldf(self, dest, mdname='cldf-metadata.json'): """ Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`. :param dest: :param mdname: :return: path of the metadata file """ dest = Path(dest) if not dest.exists(): dest.mkdir() data = self.read() if data[self.source_table_name]: sources = Sources() for src in data[self.source_table_name]: sources.add(Source( src['genre'], src['id'], **{k: v for k, v in src.items() if k not in ['id', 'genre']})) sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib')) for table_type, items in data.items(): try: table = self.dataset[table_type] table.common_props['dc:extent'] = table.write( [self.retranslate(table, item) for item in items], base=dest) except KeyError: assert table_type == self.source_table_name, table_type return self.dataset.write_metadata(dest / 'cldf-metadata.json')
def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False): outdir = Path(outdir) if not outdir.exists(): raise ValueError(outdir.as_posix()) close = False if archive: if isinstance(archive, Archive): container = archive else: container = Archive(outdir.joinpath(self.name + '.zip'), mode='w') close = True else: container = outdir fname = Path(outdir).joinpath(self.name + suffix) if fname.suffix in TAB_SUFFIXES: self.table.dialect.delimiter = '\t' with UnicodeWriter( None if isinstance(container, Archive) else fname, delimiter=self.table.dialect.delimiter) as writer: writer.writerow(self.fields) for row in self.rows: writer.writerow(row.to_list()) if isinstance(container, Archive): container.write_text(writer.read(), fname.name) self.table.url = fname.name self.metadata.write(Dataset.filename(fname, 'metadata'), container) ids = self._cited_sources if cited_sources_only else None self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids) if close: container.close()
def __call__(self, parser, namespace, values, option_string=None): path_ = Path(values) if not path_.exists(): raise argparse.ArgumentError(self, "path does not exist") if not path_.is_dir(): raise argparse.ArgumentError(self, "path is no directory") setattr(namespace, self.dest, path_)
def __call__(self, parser, namespace, values, option_string=None): path_ = Path(values) if not path_.exists(): raise argparse.ArgumentError(self, 'path does not exist') if not path_.is_dir(): raise argparse.ArgumentError(self, 'path is no directory') setattr(namespace, self.dest, path_)
def wals_detail_html(context=None, request=None, **kw): wals_data = Path(apics.__file__).parent.joinpath( 'static', 'wals', '%sA.json' % context.parameter.wals_id) if not wals_data.exists(): raise HTTPNotFound() wals_data = jsonlib.load(wals_data) value_map = {} for layer in wals_data['layers']: for feature in layer['features']: feature['properties']['icon'] = request.registry.getUtility( IIcon, name=feature['properties']['icon']).url(request) feature['properties']['popup'] = external_link( 'http://wals.info/languoid/lect/wals_code_' + feature['properties']['language']['id'], label=feature['properties']['language']['name']) value_map[layer['properties']['number']] = { 'icon': layer['features'][0]['properties']['icon'], 'name': layer['properties']['name'], 'number': layer['properties']['number'], } return { 'wals_data': wals_data, 'wals_map': WalsMap( context.parameter, request, data=wals_data, value_map=value_map), 'apics_map': ApicsWalsMap( context.parameter, request, data=wals_data, value_map=value_map)}
def get_dataset(fname=None): """Load a CLDF dataset. Load the file as `json` CLDF metadata description file, or as metadata-free dataset contained in a single csv file. The distinction is made depending on the file extension: `.json` files are loaded as metadata descriptions, all other files are matched against the CLDF module specifications. Directories are checked for the presence of any CLDF datasets in undefined order of the dataset types. Parameters ---------- fname : str or Path Path to a CLDF dataset Returns ------- pycldf.Dataset """ if fname is None: fname = repository else: fname = Path(fname) if not fname.exists(): raise FileNotFoundError('{:} does not exist'.format(fname)) if fname.suffix == '.json': return Dataset.from_metadata(fname) return Dataset.from_data(fname)
def test_Matrix(self): from wals3.adapters import Matrix p = Path(mktemp()) assert not p.exists() class TestMatrix(Matrix): def abspath(self, req): return p def query(self, req): return Matrix.query(self, req).filter(Language.pk < 100) m = TestMatrix(Language, "wals3", description="Feature values CSV") m.create(self.env["request"], verbose=False) assert p.exists() remove(p)
def test_Matrix(self): from wals3.adapters import Matrix p = Path(mktemp()) assert not p.exists() class TestMatrix(Matrix): def abspath(self, req): return p def query(self, req): return Matrix.query(self, req).filter(Language.pk < 100) m = TestMatrix(Language, 'wals3', description="Feature values CSV") m.create(self.env['request'], verbose=False) assert p.exists() remove(p)
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ # FIXME: instead of removing trees, we should just move the current one # from outdir to build, and then recreate in outdir. builddir = Path(builddir) if builddir else build_path("tree") old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} out = Path(outdir or tree) if not out.parent.exists(): out.parent.mkdir() if out.exists(): if builddir.exists(): try: rmtree(builddir) except: # pragma: no cover pass if builddir.exists(): # pragma: no cover raise ValueError("please remove %s before proceeding" % builddir) # move the old tree out of the way shutil.move(out.as_posix(), builddir.as_posix()) out.mkdir() lffs = lffs or {} languages = {} for lang in read_lff(Level.language, fp=lffs.get(Level.language)): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError("unattached dialect") # pragma: no cover lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
def in_dir(cls, d, empty_tables=False): fname = Path(d) if not fname.exists(): fname.mkdir() assert fname.is_dir() res = cls.from_metadata(fname) if empty_tables: del res.tables[:] return res
def _get_dataset(args): if len(args.args) < 1: raise ParserError('not enough arguments') fname = Path(args.args[0]) if not fname.exists() or not fname.is_file(): raise ParserError('%s is not an existing directory' % fname) if fname.suffix == '.json': return Dataset.from_metadata(fname) return Dataset.from_data(fname)
def jsondump(obj, fname, log=None): fname = Path(fname) if fname.exists(): d = jsonlib.load(fname) d.update(obj) obj = d jsonlib.dump(sorted_obj(obj), fname, indent=4) log_dump(fname, log=log) return obj
def update(path, default=None, load_kw=None, **kw): path = Path(path) if not path.exists(): if default is None: raise ValueError('path does not exist') res = default else: res = load(path, **(load_kw or {})) yield res dump(res, path, **kw)
def write_data_file(comment_text, overwrite): lines = comment_text.split("\n") filename = Path(lines[0].split(":",1)[1].strip()) if filename.exists() and not overwrite: return "Embedded data file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() with filename.open("w", encoding='utf8') as fp: fp.write("\n".join(lines[1:])) return "Wrote embedded data file %s.\n" % filename
def write_data_file(comment_text, overwrite): lines = comment_text.split("\n") filename = Path(lines[0].split(":", 1)[1].strip()) if filename.exists() and not overwrite: return "Embedded data file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() with filename.open("w", encoding='utf8') as fp: fp.write("\n".join(lines[1:])) return "Wrote embedded data file %s.\n" % filename
def link(args): """ Link concepts to concept sets for a given concept list. Notes ----- If either CONCEPTICON_GLOSS or CONCEPTICON_ID is given, the other is added. Examples -------- $ concepticon link path_to_conceptlist.tsv """ api = Concepticon(args.repos) conceptlist = Path(args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): conceptlist = api.data_path('conceptlists', args.args[0]) if not conceptlist.exists() or not conceptlist.is_file(): raise ParserError('no file %s found' % args.args[0]) rewrite(conceptlist, Linker(conceptlist.stem, api.conceptsets.values()))
def from_file(cls, path, **keywords): """ Function loads a concept list outside the Concepticon collection. """ path = Path(path) assert path.exists() attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()} attrs.update(id=path.stem, items=keywords.get('items', len(read_dicts(path))), year=keywords.get('year', 0)) return cls(api=path, **attrs)
def curate(args): # pragma: no cover datasets = {ds.id: ds for ds in args.cfg.datasets} class TheCompleter(Completer): def get_completions(self, document, complete_event): word_before_cursor = document.get_word_before_cursor(WORD=True) words = document.text_before_cursor.split() if words and words[0] in commands: for ds in fuzzyfinder(word_before_cursor, datasets): yield Completion(ds, start_position=-len(word_before_cursor)) else: # elif word_before_cursor: for c in fuzzyfinder(word_before_cursor, commands): yield Completion(c, start_position=-len(word_before_cursor)) user_input = [] appdir = Path(user_data_dir('lexibank')) if not appdir.exists(): appdir.mkdir(parents=True) while not user_input or user_input[0] != 'quit': try: user_input = prompt( u'lexibank-curator> ', history=FileHistory(str(appdir / 'history.txt')), auto_suggest=AutoSuggestFromHistory(), completer=TheCompleter(), ).split() except EOFError: break except KeyboardInterrupt: break if len(user_input) == 0: continue # ignore empty commands if user_input[0] not in commands: print(colored('Invalid command!', 'red')) continue if len(user_input) > 1 and user_input[1] not in datasets: print(colored('Invalid dataset!', 'red')) continue args.args = user_input[1:] try: s = time() commands[user_input[0]](args) print('[{0:.3f}]'.format(time() - s)) except Exception as e: traceback.print_exc() print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red')) print('see ya!')
class TemporaryPath(object): def __init__(self, suffix=''): fp = NamedTemporaryFile(suffix=suffix) self.name = Path(fp.name) fp.close() def __enter__(self): return self.name.as_posix() def __exit__(self, exc_type, exc_val, exc_tb): if self.name.exists(): remove(self.name)
def safe_overwrite(fname): fname = Path(fname) if not fname.parent.exists(): fname.parent.mkdir() assert fname.parent.exists() tmp = fname.parent while tmp.exists(): tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6))) yield tmp if fname.exists(): remove(fname) move(tmp, fname)
def test_generate_extract(self): xml = self.tmp_path('test.xml') self._run_main('-v -o {0} {1}'.format(xml.as_posix(), config_path('basic'))) self.assertTrue(xml.exists()) # Overwriting existing files must be specified explicitely: self._run_main('-o {0} {1}'.format( xml.as_posix(), config_path('basic')), status=4) self._run_main('--overwrite -o {0} {1}'.format( xml.as_posix(), config_path('basic')), status=0) tcfg = Path('beastling_test.conf') self._run_main('--extract {0}'.format(xml.as_posix())) self.assertTrue(tcfg.exists()) remove(tcfg)
def from_file(cls, path, **keywords): """ Function loads a concept list outside the Concepticon collection. @todo: uniqueness-check hier einbauen, siehe Funktion read_dicts """ path = Path(path) assert path.exists() attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()} attrs.update(id=path.stem, items=keywords.get('items', len(read_dicts(path))), year=keywords.get('year', 0), local=True) return cls(api=path, **attrs)
def write_info(self, outdir=None): outdir = outdir or self.id if not isinstance(outdir, Path): outdir = Path(outdir) if not outdir.exists(): outdir.mkdir() fname = outdir.joinpath(self.fname('.ini')) self.cfg.write(fname) if os.linesep == '\n': with fname.open(encoding='utf8') as fp: text = fp.read() with fname.open('w', encoding='utf8') as fp: fp.write(text.replace('\n', '\r\n')) return fname
def downloads(req): mod = importlib.import_module(req.registry.settings['clld.pkg']) dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json') def bitstream_link(oid, spec): url = SERVICE_URL.path( '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string() return HTML.a( '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])), href=url) dls = load(dls) if dls.exists() else {} for rel, spec in sorted(dls.items()): yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
def downloads(req): mod = importlib.import_module(req.registry.settings['clld.pkg']) dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json') print(dls) def bitstream_link(oid, spec): url = SERVICE_URL.path( '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string() return HTML.a( '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])), href=url) dls = load(dls) if dls.exists() else {} for rel, spec in sorted(dls.items()): yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
def from_file(cls, bibFile, encoding='utf8', lowercase=False): """Create bibtex database from a bib-file. @param bibFile: path of the bibtex-database-file to be read. """ if not isinstance(bibFile, Path): bibFile = Path(bibFile) if bibFile.exists(): with bibFile.open(encoding=encoding) as fp: content = fp.read() else: content = '' return cls((Record.from_string('@' + m, lowercase=lowercase) for m in re.split('^\s*@', content, 0, re.MULTILINE)))
def test_generate_extract(self): xml = self.tmp_path('test.xml') self._run_main('-v -o {0} {1}'.format(xml.as_posix(), config_path('basic'))) self.assertTrue(xml.exists()) # Overwriting existing files must be specified explicitely: self._run_main('-o {0} {1}'.format(xml.as_posix(), config_path('basic')), status=4) self._run_main('--overwrite -o {0} {1}'.format(xml.as_posix(), config_path('basic')), status=0) tcfg = Path('beastling_test.conf') self._run_main('--extract {0}'.format(xml.as_posix())) self.assertTrue(tcfg.exists()) remove(tcfg)
def test_extractor(self): config = self.make_cfg( [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")]) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) self.assertTrue(bool(self._extract(xmlfile))) config = self.make_cfg({ 'admin': { 'basename': 'abcdefg' }, 'model': { 'model': 'mk', 'data': data_path('basic.csv').as_posix() } }) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) beastling.extractor.extract(xmlfile) p = Path('abcdefg.conf') self.assertTrue(p.exists()) cfg = INI(interpolation=None) cfg.read(p.as_posix()) remove(p) self.assertEqual(cfg['admin']['basename'], 'abcdefg') self.assertEqual(cfg['model']['model'], 'mk') fname = self.tmp.joinpath('test.xml') datafile = self.tmp.joinpath(('test.csv')) self.assertFalse(datafile.exists()) with fname.open('w', encoding='utf8') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <r> <!--%s %s [admin] [model] --> <!--%s:%s--> </r> """ % (beastling.extractor._generated_str, beastling.extractor._config_file_str, beastling.extractor._data_file_str, datafile.as_posix())) res = self._extract(fname) self.assertIn(datafile.name, ''.join(res))
def write_config(comment_text, overwrite): lines = comment_text.split("\n") assert lines[1] in (_config_file_str, _proggen_str) if lines[1] == _proggen_str: return "Original configuration was generated programmatically, no configuration to extract." config_text = "\n".join(lines[2:]) p = INI() p.read_string(config_text) filename = p.get("admin", "basename") \ if p.has_option("admin", "basename") else 'beastling' filename = Path(filename + '.conf') if filename.exists() and not overwrite: return "BEASTling configuration file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() p.write(filename) return "Wrote BEASTling configuration file %s.\n" % filename
def test_extractor(self): config = self.make_cfg( [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")]) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) self.assertTrue(bool(self._extract(xmlfile))) config = self.make_cfg({ 'admin': {'basename': 'abcdefg'}, 'model': { 'model': 'mk', 'data': data_path('basic.csv').as_posix()}}) xml = beastling.beastxml.BeastXml(config) xmlfile = self.tmp.joinpath("beastling.xml") xml.write_file(xmlfile.as_posix()) beastling.extractor.extract(xmlfile) p = Path('abcdefg.conf') self.assertTrue(p.exists()) cfg = INI(interpolation=None) cfg.read(p.as_posix()) remove(p) self.assertEqual(cfg['admin']['basename'], 'abcdefg') self.assertEqual(cfg['model']['model'], 'mk') fname = self.tmp.joinpath('test.xml') datafile = self.tmp.joinpath(('test.csv')) self.assertFalse(datafile.exists()) with fname.open('w', encoding='utf8') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <r> <!--%s %s [admin] [model] --> <!--%s:%s--> </r> """ % (beastling.extractor._generated_str, beastling.extractor._config_file_str, beastling.extractor._data_file_str, datafile.as_posix())) res = self._extract(fname) self.assertIn(datafile.name, ''.join(res))
def test_extractor(config_factory, tmppath, data_dir): config = config_factory("admin", "mk", "embed_data") xml = beastling.beastxml.BeastXml(config) xmlfile = str(tmppath / "beastling.xml") xml.write_file(xmlfile) assert bool(_extract(xmlfile)) config = config_factory({ 'admin': {'basename': 'abcdefg'}, 'model model': { 'model': 'mk', 'data': str(data_dir / 'basic.csv')}}) xml = beastling.beastxml.BeastXml(config) xmlfile = str(tmppath / "beastling.xml") xml.write_file(xmlfile) beastling.extractor.extract(xmlfile) p = Path('abcdefg.conf') assert p.exists() cfg = INI(interpolation=None) cfg.read(p.as_posix()) remove(p) assert cfg['admin']['basename'] == 'abcdefg' assert cfg['model model']['model'] == 'mk' fname = tmppath / 'test.xml' datafile = tmppath / 'test.csv' assert not datafile.exists() with fname.open('w', encoding='utf8') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <r> <!--%s %s [admin] [model model] --> <!--%s:%s--> </r> """ % (beastling.extractor._generated_str, beastling.extractor._config_file_str, beastling.extractor._data_file_str, datafile.as_posix())) res = _extract(fname) assert datafile.name in ''.join(res)
def configure(cfgpath=None): """ Configure lexibank. :return: a pair (config, logger) """ cfgpath = Path(cfgpath) \ if cfgpath else Path(user_config_dir(pylexibank.__name__)) / 'config.ini' if not cfgpath.exists(): print(""" {0} You seem to be running lexibank for the first time. Your system configuration will now be written to a config file to be used whenever lexibank is run lateron. """.format(colored('Welcome to lexibank!', 'blue', attrs=['bold', 'reverse']))) if not cfgpath.parent.exists(): cfgpath.parent.mkdir(parents=True) cfg = Config() cfg['paths'] = {k: get_path(src) for k, src in REPOS} cfg.write(cfgpath) print(""" Configuration has been written to: {0} You may edit this file to adapt to changes in your system or to reconfigure settings such as the logging level.""".format(cfgpath.resolve())) else: cfg = Config.from_file(cfgpath) try: cfg.glottolog except (FileNotFoundError, ValueError): raise ParserError( 'Misconfigured Glottolog path in {0}'.format(cfgpath)) if not Path(cfg['paths']['concepticon']).exists(): raise ParserError( 'Misconfigured Concepticon path in {0}'.format(cfgpath)) # Print the configuration directory for reference: print("Using configuration file at:") print(str(cfgpath) + '\n') return cfg
class API(UnicodeMixin): """An API base class to provide programmatic access to data in a git repository.""" # A light-weight way to specifiy a default repository location (without having to # overwrite __init__) __repos_path__ = None def __init__(self, repos=None): self.repos = Path(repos or self.__repos_path__) def __unicode__(self): name = self.repos.resolve().name if self.repos.exists( ) else self.repos.name return '<{0} repository {1} at {2}>'.format(name, git_describe(self.repos), self.repos) def path(self, *comps): return self.repos.joinpath(*comps) @property def appdir(self): return self.path('app') @property def appdatadir(self): return self.appdir.joinpath('data') @classmethod def app_wrapper(cls, func): @wraps(func) def wrapper(args): api = cls(args.repos) if not api.appdatadir.exists() or '--recreate' in args.args: api.appdatadir.mkdir(exist_ok=True) args.api = api func(args) index = api.appdir / 'index.html' if index.exists(): webbrowser.open(index.resolve().as_uri()) return wrapper
def datasets(args): """ cldf datasets <DIR> [ATTRS] List all CLDF datasets in directory <DIR> """ if len(args.args) < 1: raise ParserError('not enough arguments') d = Path(args.args[0]) if not d.exists() or not d.is_dir(): raise ParserError('%s is not an existing directory' % d) for fname in sorted(d.glob('*' + MD_SUFFIX), key=lambda p: p.name): md = Metadata(load(fname)) data = fname.parent.joinpath( md.get_table().url or fname.name[:-len(MD_SUFFIX)]) if data.exists(): print(data) if len(args.args) > 1: maxlen = max(len(a) for a in args.args[1:]) for attr in args.args[1:]: if md.get(attr): print(' %s %s' % ((attr + ':').ljust(maxlen + 1), md[attr]))
def lff2tree(tree=TREE, outdir=None, test=False): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ out = Path(outdir or build_path('tree')) if not out.parent.exists(): out.parent.mkdir() if out.exists(): rmtree(out) out.mkdir() old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} languages = {} for lang in read_lff('language'): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff('dialect'): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError('unattached dialect') lang2tree( lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree) if not test: rmtree(TREE, ignore_errors=True) copytree(out, TREE)
def _existing_file(fname): fname = Path(fname) assert fname.exists() and fname.is_file() return fname
def __call__(self, parser, namespace, values, option_string=None): path_ = Path(values.split("#")[0]) if not path_.exists(): raise argparse.ArgumentError(self, "file does not exist") setattr(namespace, self.dest, values)
def validate(self, log=None, validators=None): validators = validators or [] validators.extend(VALIDATORS) success = True default_tg = TableGroup.from_file( pkg_path('modules', '{0}{1}'.format(self.module, MD_SUFFIX))) for default_table in default_tg.tables: dtable_uri = default_table.common_props['dc:conformsTo'] try: table = self[dtable_uri] except KeyError: log_or_raise('{0} requires {1}'.format(self.module, dtable_uri), log=log) success = False table = None if table: default_cols = { c.propertyUrl.uri for c in default_table.tableSchema.columns if c.required or c.common_props.get('dc:isRequiredBy')} cols = { c.propertyUrl.uri for c in table.tableSchema.columns if c.propertyUrl} table_uri = table.common_props['dc:conformsTo'] for col in default_cols - cols: log_or_raise('{0} requires column {1}'.format(table_uri, col), log=log) success = False for table in self.tables: type_uri = table.common_props.get('dc:conformsTo') if type_uri: try: TERMS.is_cldf_uri(type_uri) except ValueError: success = False log_or_raise('invalid CLDF URI: {0}'.format(type_uri), log=log) # FIXME: check whether table.common_props['dc:conformsTo'] is in validators! validators_ = [] for col in table.tableSchema.columns: if col.propertyUrl: col_uri = col.propertyUrl.uri try: TERMS.is_cldf_uri(col_uri) except ValueError: success = False log_or_raise('invalid CLDF URI: {0}'.format(col_uri), log=log) for table_, col_, v_ in validators: if (not table_ or table is self.get(table_)) and col is self.get((table, col_)): validators_.append((col, v_)) fname = Path(table.url.resolve(table._parent.base)) if fname.exists(): for fname, lineno, row in table.iterdicts(log=log, with_metadata=True): for col, validate in validators_: try: validate(self, table, col, row) except ValueError as e: log_or_raise( '{0}:{1}:{2} {3}'.format(fname.name, lineno, col.name, e), log=log) success = False if not table.check_primary_key(log=log): success = False else: log_or_raise('{0} does not exist'.format(fname), log=log) success = False if not self.tablegroup.check_referential_integrity(log=log): success = False return success
)) codes[denumber] = { 'ID': '{0}-{1}'.format(fid, denumber), 'Name': dename, 'Parameter_ID': fid, } fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0] ds.write( ValueTable=values, LanguageTable=languages, ParameterTable=[{ 'ID': fid, 'Name': fname, 'Area': aname, 'Authors': fauthors, 'Url': 'http://wals.info/feature/' + fid}], CodeTable=codes.values(), ) if __name__ == '__main__': import sys db = create_engine(sys.argv[1]) feature = sys.argv[2] out = Path('wals_{0}_cldf'.format(feature)) if not out.exists(): out.mkdir() make_cldf(db, out, feature)
def from_file(cls, fname): res = cls() fname = Path(fname) if fname.exists(): res.read(fname) return res