def __init__(self, name, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ self.name = name self.default = default config_dir = Path(kw.pop('config_dir', None) or DIR) RawConfigParser.__init__(self, kw, allow_no_value=True) if self.default: if PY3: fp = io.StringIO(self.default) else: fp = io.BytesIO(self.default.encode('utf8')) self.readfp(fp) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): with open(cfg_path.as_posix(), 'w') as fp: self.write(fp) self.path = cfg_path
def __init__(self, name, dir_=None, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ INI.__init__(self, kw, allow_no_value=True) self.name = name config_dir = Path(dir_ or CONFIG_DIR) if default: if isinstance(default, text_type): self.read_string(default) #elif isinstance(default, (dict, OrderedDict)): # self.read_dict(default) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): self.write(cfg_path.as_posix()) self.path = cfg_path
def to_cldf(self, dest, mdname='cldf-metadata.json'): """ Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`. :param dest: :param mdname: :return: path of the metadata file """ dest = Path(dest) if not dest.exists(): dest.mkdir() data = self.read() if data[self.source_table_name]: sources = Sources() for src in data[self.source_table_name]: sources.add(Source( src['genre'], src['id'], **{k: v for k, v in src.items() if k not in ['id', 'genre']})) sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib')) for table_type, items in data.items(): try: table = self.dataset[table_type] table.common_props['dc:extent'] = table.write( [self.retranslate(table, item) for item in items], base=dest) except KeyError: assert table_type == self.source_table_name, table_type return self.dataset.write_metadata(dest / 'cldf-metadata.json')
class Cache(object): def __init__(self, dir_=None): self._dir = Path(dir_ or CACHE_DIR) if not self._dir.exists(): self._dir.mkdir(parents=True) # pragma: no cover def _path(self, key): return self._dir.joinpath(path_component(key)) def __len__(self): return len(list(self.keys())) def __getitem__(self, item): with self._path(item).open('rb') as fp: return pickle.load(fp) def __setitem__(self, key, value): with self._path(key).open('wb') as fp: pickle.dump(value, fp) def __delitem__(self, key): remove(self._path(key)) def __contains__(self, item): return self._path(item).exists() def keys(self): for p in self._dir.iterdir(): yield as_unicode(p.name) def clear(self): for key in self.keys(): remove(self._path(key))
def in_dir(cls, d, empty_tables=False): fname = Path(d) if not fname.exists(): fname.mkdir() assert fname.is_dir() res = cls.from_metadata(fname) if empty_tables: del res.tables[:] return res
def new_dataset(args): """ lexibank new-dataset OUTDIR [ID] """ if not args.args: raise ParserError('you must specify an existing directory') outdir = Path(args.args.pop(0)) if not outdir.exists(): raise ParserError('you must specify an existing directory') id_pattern = re.compile('[a-z_0-9]+$') md = {} if args.args: md['id'] = args.args.pop(0) else: md['id'] = input('Dataset ID: ') while not id_pattern.match(md['id']): print( 'dataset id must only consist of lowercase ascii letters, digits and _ (underscore)!' ) md['id'] = input('Dataset ID: ') outdir = outdir / md['id'] if not outdir.exists(): outdir.mkdir() for key in ['title', 'url', 'license', 'conceptlist', 'citation']: md[key] = input('Dataset {0}: '.format(key)) # check license! # check conceptlist! for path in Path( pylexibank.__file__).parent.joinpath('dataset_template').iterdir(): if path.is_file(): if path.suffix in ['.pyc']: continue # pragma: no cover target = path.name content = read_text(path) if '+' in path.name: target = re.sub('\+([a-z]+)\+', lambda m: '{' + m.groups()[0] + '}', path.name).format(**md) if target.endswith('_tmpl'): target = target[:-5] content = content.format(**md) write_text(outdir / target, content) else: target = outdir / path.name if target.exists(): shutil.rmtree(str(target)) shutil.copytree(str(path), str(target)) del md['id'] jsonlib.dump(md, outdir / 'metadata.json', indent=4)
def curate(args): # pragma: no cover datasets = {ds.id: ds for ds in args.cfg.datasets} class TheCompleter(Completer): def get_completions(self, document, complete_event): word_before_cursor = document.get_word_before_cursor(WORD=True) words = document.text_before_cursor.split() if words and words[0] in commands: for ds in fuzzyfinder(word_before_cursor, datasets): yield Completion(ds, start_position=-len(word_before_cursor)) else: # elif word_before_cursor: for c in fuzzyfinder(word_before_cursor, commands): yield Completion(c, start_position=-len(word_before_cursor)) user_input = [] appdir = Path(user_data_dir('lexibank')) if not appdir.exists(): appdir.mkdir(parents=True) while not user_input or user_input[0] != 'quit': try: user_input = prompt( u'lexibank-curator> ', history=FileHistory(str(appdir / 'history.txt')), auto_suggest=AutoSuggestFromHistory(), completer=TheCompleter(), ).split() except EOFError: break except KeyboardInterrupt: break if len(user_input) == 0: continue # ignore empty commands if user_input[0] not in commands: print(colored('Invalid command!', 'red')) continue if len(user_input) > 1 and user_input[1] not in datasets: print(colored('Invalid dataset!', 'red')) continue args.args = user_input[1:] try: s = time() commands[user_input[0]](args) print('[{0:.3f}]'.format(time() - s)) except Exception as e: traceback.print_exc() print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red')) print('see ya!')
def write_info(self, outdir=None): outdir = outdir or self.id if not isinstance(outdir, Path): outdir = Path(outdir) if not outdir.exists(): outdir.mkdir() fname = outdir.joinpath(self.fname('.ini')) self.cfg.write(fname) if os.linesep == '\n': with fname.open(encoding='utf8') as fp: text = fp.read() with fname.open('w', encoding='utf8') as fp: fp.write(text.replace('\n', '\r\n')) return fname
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ # FIXME: instead of removing trees, we should just move the current one # from outdir to build, and then recreate in outdir. builddir = Path(builddir) if builddir else build_path('tree') old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} out = Path(outdir or tree) if not out.parent.exists(): out.parent.mkdir() if out.exists(): if builddir.exists(): try: rmtree(builddir) except: # pragma: no cover pass if builddir.exists(): # pragma: no cover raise ValueError('please remove %s before proceeding' % builddir) # move the old tree out of the way shutil.move(out.as_posix(), builddir.as_posix()) out.mkdir() lffs = lffs or {} languages = {} for lang in read_lff(Level.language, fp=lffs.get(Level.language)): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError('unattached dialect') # pragma: no cover lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
def lff2tree(tree=TREE, outdir=None, builddir=None, lffs=None): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ # FIXME: instead of removing trees, we should just move the current one # from outdir to build, and then recreate in outdir. builddir = Path(builddir) if builddir else build_path("tree") old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} out = Path(outdir or tree) if not out.parent.exists(): out.parent.mkdir() if out.exists(): if builddir.exists(): try: rmtree(builddir) except: # pragma: no cover pass if builddir.exists(): # pragma: no cover raise ValueError("please remove %s before proceeding" % builddir) # move the old tree out of the way shutil.move(out.as_posix(), builddir.as_posix()) out.mkdir() lffs = lffs or {} languages = {} for lang in read_lff(Level.language, fp=lffs.get(Level.language)): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff(Level.dialect, fp=lffs.get(Level.dialect)): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError("unattached dialect") # pragma: no cover lang2tree(lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree)
def lff2tree(tree=TREE, outdir=None, test=False): """ - get mapping glottocode -> Languoid from old tree - assemble new directory tree - for each path component in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - for each language/dialect in lff/dff: - create new dir - copy info file from old tree (possibly updating the name) or - create info file - rm old tree - copy new tree """ out = Path(outdir or build_path('tree')) if not out.parent.exists(): out.parent.mkdir() if out.exists(): rmtree(out) out.mkdir() old_tree = {l.id: l for l in walk_tree(tree)} if tree else {} languages = {} for lang in read_lff('language'): languages[lang.id] = lang lang2tree(lang, lang.lineage, out, old_tree) for lang in read_lff('dialect'): if not lang.lineage or lang.lineage[0][1] not in languages: raise ValueError('unattached dialect') lang2tree( lang, languages[lang.lineage[0][1]].lineage + lang.lineage, out, old_tree) if not test: rmtree(TREE, ignore_errors=True) copytree(out, TREE)
codes[denumber] = { 'ID': '{0}-{1}'.format(fid, denumber), 'Name': dename, 'Parameter_ID': fid, } fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0] ds.write( ValueTable=values, LanguageTable=languages, ParameterTable=[{ 'ID': fid, 'Name': fname, 'Area': aname, 'Authors': fauthors, 'Url': 'http://wals.info/feature/' + fid }], CodeTable=codes.values(), ) if __name__ == '__main__': import sys db = create_engine(sys.argv[1]) feature = sys.argv[2] out = Path('wals_{0}_cldf'.format(feature)) if not out.exists(): out.mkdir() make_cldf(db, out, feature)
def lff2tree(lff, tree=None, outdir='fromlff'): out = Path(outdir) out.mkdir() old_tree = {l.id: l for l in languoids_from_tree(tree)} if tree else {} nodes = set() languages = {} for lang in read_lff(lff, 'language'): groupdir = out languages[lang.id] = lang for name, id_, level in lang.lineage: groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_)) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] assert group.level == level if name != group.name: # rename a subgroup! group.name = name group.write_info(groupdir) else: # TODO: create Languoid, write info file! pass assert id_ in old_tree nodes.add(id_) assert lang.id in old_tree nodes.add(lang.id) old_lang = old_tree[lang.id] assert old_lang.level == lang.level if old_lang.name != lang.name: old_lang.name = lang.name langdir = groupdir.joinpath(lang.fname()) langdir.mkdir() old_lang.write_info(langdir) for lang in read_lff(lff.replace('lff', 'dff'), 'dialect'): groupdir = out if not lang.lineage: # TODO: handle error of un-attached dialects! continue for name, id_, level in languages[lang.lineage[0][1]].lineage + lang.lineage: groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_)) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] assert group.level == level if name != group.name: # rename a subgroup! group.name = name group.write_info(groupdir) else: # TODO: create Languoid, write info file! pass assert id_ in old_tree nodes.add(id_) assert lang.id in old_tree nodes.add(lang.id) old_lang = old_tree[lang.id] assert old_lang.level == lang.level if old_lang.name != lang.name: old_lang.name = lang.name langdir = groupdir.joinpath(lang.fname()) langdir.mkdir() old_lang.write_info(langdir) print len(nodes)
)) codes[denumber] = { 'ID': '{0}-{1}'.format(fid, denumber), 'Name': dename, 'Parameter_ID': fid, } fname, fauthors, aname = list(db.execute(SQL_FEATURE.format(fid)))[0] ds.write( ValueTable=values, LanguageTable=languages, ParameterTable=[{ 'ID': fid, 'Name': fname, 'Area': aname, 'Authors': fauthors, 'Url': 'http://wals.info/feature/' + fid}], CodeTable=codes.values(), ) if __name__ == '__main__': import sys db = create_engine(sys.argv[1]) feature = sys.argv[2] out = Path('wals_{0}_cldf'.format(feature)) if not out.exists(): out.mkdir() make_cldf(db, out, feature)
def lff2tree(lff, tree=None, outdir='fromlff'): out = Path(outdir) out.mkdir() old_tree = {l.id: l for l in languoids_from_tree(tree)} if tree else {} nodes = set() languages = {} for lang in read_lff(lff, 'language'): groupdir = out languages[lang.id] = lang for name, id_, level in lang.lineage: groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_)) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] assert group.level == level if name != group.name: # rename a subgroup! group.name = name group.write_info(groupdir) else: # TODO: create Languoid, write info file! pass assert id_ in old_tree nodes.add(id_) assert lang.id in old_tree nodes.add(lang.id) old_lang = old_tree[lang.id] assert old_lang.level == lang.level if old_lang.name != lang.name: old_lang.name = lang.name langdir = groupdir.joinpath(lang.fname()) langdir.mkdir() old_lang.write_info(langdir) for lang in read_lff(lff.replace('lff', 'dff'), 'dialect'): groupdir = out if not lang.lineage: # TODO: handle error of un-attached dialects! continue for name, id_, level in languages[lang.lineage[0] [1]].lineage + lang.lineage: groupdir = groupdir.joinpath('%s.%s' % (slug(name), id_)) if not groupdir.exists(): groupdir.mkdir() if id_ in old_tree: group = old_tree[id_] assert group.level == level if name != group.name: # rename a subgroup! group.name = name group.write_info(groupdir) else: # TODO: create Languoid, write info file! pass assert id_ in old_tree nodes.add(id_) assert lang.id in old_tree nodes.add(lang.id) old_lang = old_tree[lang.id] assert old_lang.level == lang.level if old_lang.name != lang.name: old_lang.name = lang.name langdir = groupdir.joinpath(lang.fname()) langdir.mkdir() old_lang.write_info(langdir) print len(nodes)