Пример #1
0
def get_config(p):
    """Read a config file.

    :return: dict of ('section.option', value) pairs.
    """
    if not isinstance(p, Path):
        p = Path(p)
    cfg = {}

    parser = ConfigParser()
    parser.readfp(p.open(encoding='utf8'))

    for section in parser.sections():
        getters = {
            'int': partial(parser.getint, section),
            'boolean': partial(parser.getboolean, section),
            'float': partial(parser.getfloat, section),
            'list': lambda option: parser.get(section, option).split(),
        }
        default = partial(parser.get, section)
        for option in parser.options(section):
            type_ = option.rpartition('_')[2] if '_' in option else None
            value = getters.get(type_, default)(option)
            cfg['{0}.{1}'.format(section, option)] = value

    return cfg
Пример #2
0
def download_tables(outdir=None):
    match = ZIP_NAME_PATTERN.search(_open('code_tables/download_tables').read().decode('utf-8-sig'))
    if not match:
        raise ValueError('no matching zip file name found')  # pragma: no cover
    target = Path(outdir or '.').joinpath(match.group('name').split('/')[-1])
    with target.open('wb') as fp:
        fp.write(_open(match.group('name')).read())
    return target
Пример #3
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":",1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #4
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":", 1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
Пример #5
0
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
Пример #6
0
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
Пример #7
0
    def from_file(cls, bibFile, encoding='utf8', lowercase=False):
        """Create bibtex database from a bib-file.

        @param bibFile: path of the bibtex-database-file to be read.
        """
        if not isinstance(bibFile, Path):
            bibFile = Path(bibFile)
        if bibFile.exists():
            with bibFile.open(encoding=encoding) as fp:
                content = fp.read()
        else:
            content = ''

        return cls((Record.from_string('@' + m, lowercase=lowercase)
                    for m in re.split('^\s*@', content, 0, re.MULTILINE)))
Пример #8
0
def load_alias(_path):
    """
    Alias are one-character sequences which we can convert on a step-by step
    basis by applying them successively to all subsegments of a segment.
    """
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)

    alias = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                alias[eval('"' + source + '"')] = eval('r"' + target + '"')
    return alias
Пример #9
0
def load_alias(_path):
    """
    Alias are one-character sequences which we can convert on a step-by step
    basis by applying them successively to all subsegments of a segment.
    """
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)

    alias = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                alias[eval('"' + source + '"')] = eval('r"' + target + '"')
    return alias
Пример #10
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(
                    GzipFile(filename=Path(tmp.stem).stem,
                             fileobj=tmp.open('wb'))) as fp:
                self.before(req, fp)
                for i, item in enumerate(
                        page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (len(req.dataset.name) + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset,
                                                 req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
Пример #11
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(GzipFile(
                    filename=Path(tmp.stem).stem, fileobj=tmp.open('wb')
            )) as fp:
                self.before(req, fp)
                for i, item in enumerate(page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (
                            len(req.dataset.name)
                            + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset, req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
Пример #12
0
 def write(self, fname, **kw):
     if not isinstance(fname, Path):
         fname = Path(fname)
     with fname.open('w', encoding='utf8') as fp:
         fp.write(self.write_string(**kw))
Пример #13
0
 def write(self, fname, **kw):
     if not isinstance(fname, Path):
         fname = Path(fname)
     with fname.open('w', encoding='utf-8') as fp:
         fp.write(self.write_string(**kw))