def get_config(p): """Read a config file. :return: dict of ('section.option', value) pairs. """ if not isinstance(p, Path): p = Path(p) cfg = {} parser = ConfigParser() parser.readfp(p.open(encoding='utf8')) for section in parser.sections(): getters = { 'int': partial(parser.getint, section), 'boolean': partial(parser.getboolean, section), 'float': partial(parser.getfloat, section), 'list': lambda option: parser.get(section, option).split(), } default = partial(parser.get, section) for option in parser.options(section): type_ = option.rpartition('_')[2] if '_' in option else None value = getters.get(type_, default)(option) cfg['{0}.{1}'.format(section, option)] = value return cfg
def download_tables(outdir=None): match = ZIP_NAME_PATTERN.search(_open('code_tables/download_tables').read().decode('utf-8-sig')) if not match: raise ValueError('no matching zip file name found') # pragma: no cover target = Path(outdir or '.').joinpath(match.group('name').split('/')[-1]) with target.open('wb') as fp: fp.write(_open(match.group('name')).read()) return target
def write_data_file(comment_text, overwrite): lines = comment_text.split("\n") filename = Path(lines[0].split(":",1)[1].strip()) if filename.exists() and not overwrite: return "Embedded data file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() with filename.open("w", encoding='utf8') as fp: fp.write("\n".join(lines[1:])) return "Wrote embedded data file %s.\n" % filename
def write_data_file(comment_text, overwrite): lines = comment_text.split("\n") filename = Path(lines[0].split(":", 1)[1].strip()) if filename.exists() and not overwrite: return "Embedded data file %s already exists! Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename if not filename.parent.exists(): filename.parent.mkdir() with filename.open("w", encoding='utf8') as fp: fp.write("\n".join(lines[1:])) return "Wrote embedded data file %s.\n" % filename
def load_normalized(_path): """Normalization for quasi-identical strings which are often confused.""" path = Path(_path) if not path.is_file(): path = local_path(_path) norms = {} with path.open(encoding='utf-8') as handle: for line in handle: if not line.startswith('#') and line.strip(): source, target = line.strip().split('\t') norms[eval('"' + source + '"')] = eval('r"' + target + '"') return norms
def from_file(cls, bibFile, encoding='utf8', lowercase=False): """Create bibtex database from a bib-file. @param bibFile: path of the bibtex-database-file to be read. """ if not isinstance(bibFile, Path): bibFile = Path(bibFile) if bibFile.exists(): with bibFile.open(encoding=encoding) as fp: content = fp.read() else: content = '' return cls((Record.from_string('@' + m, lowercase=lowercase) for m in re.split('^\s*@', content, 0, re.MULTILINE)))
def load_alias(_path): """ Alias are one-character sequences which we can convert on a step-by step basis by applying them successively to all subsegments of a segment. """ path = Path(_path) if not path.is_file(): path = local_path(_path) alias = {} with path.open(encoding='utf-8') as handle: for line in handle: if not line.startswith('#') and line.strip(): source, target = line.strip().split('\t') alias[eval('"' + source + '"')] = eval('r"' + target + '"') return alias
def create(self, req, filename=None, verbose=True): p = self.abspath(req) if not p.parent.exists(): # pragma: no cover p.parent.mkdir() tmp = Path('%s.tmp' % p.as_posix()) if self.rdf: # we do not create archives with a readme for rdf downloads, because each # RDF entity points to the dataset and the void description of the dataset # covers all relevant metadata. # # TODO: write test for the file name things!? # with closing( GzipFile(filename=Path(tmp.stem).stem, fileobj=tmp.open('wb'))) as fp: self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) else: with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile: if not filename: fp = self.get_stream() self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) zipfile.writestr(self.name, self.read_stream(fp)) else: # pragma: no cover zipfile.write(filename, self.name) zipfile.writestr( 'README.txt', README.format( req.dataset.name, '=' * (len(req.dataset.name) + len(' data download')), req.dataset.license, TxtCitation(None).render(req.dataset, req)).encode('utf8')) if p.exists(): # pragma: no cover remove(p) move(tmp, p)
def create(self, req, filename=None, verbose=True): p = self.abspath(req) if not p.parent.exists(): # pragma: no cover p.parent.mkdir() tmp = Path('%s.tmp' % p.as_posix()) if self.rdf: # we do not create archives with a readme for rdf downloads, because each # RDF entity points to the dataset and the void description of the dataset # covers all relevant metadata. # # TODO: write test for the file name things!? # with closing(GzipFile( filename=Path(tmp.stem).stem, fileobj=tmp.open('wb') )) as fp: self.before(req, fp) for i, item in enumerate(page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) else: with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile: if not filename: fp = self.get_stream() self.before(req, fp) for i, item in enumerate( page_query(self.query(req), verbose=verbose)): self.dump(req, fp, item, i) self.after(req, fp) zipfile.writestr(self.name, self.read_stream(fp)) else: # pragma: no cover zipfile.write(filename, self.name) zipfile.writestr( 'README.txt', README.format( req.dataset.name, '=' * ( len(req.dataset.name) + len(' data download')), req.dataset.license, TxtCitation(None).render(req.dataset, req)).encode('utf8')) if p.exists(): # pragma: no cover remove(p) move(tmp, p)
def write(self, fname, **kw): if not isinstance(fname, Path): fname = Path(fname) with fname.open('w', encoding='utf8') as fp: fp.write(self.write_string(**kw))
def write(self, fname, **kw): if not isinstance(fname, Path): fname = Path(fname) with fname.open('w', encoding='utf-8') as fp: fp.write(self.write_string(**kw))