Пример #1
0
def send_to_dw(doc):

    client = dw.api_client()

    username = '******'

    title = doc.find_first_value("Root.Title")
    key = join(username, slugify(title))

    d = dict(title=doc.find_first_value("Root.Title"),
             description=doc.find_first_value("Root.Description"),
             summary=doc.markdown,
             visibility='OPEN',
             files=get_resource_urls(doc))

    try:

        ds = client.get_dataset(
            key)  # Raise an error if the dataset does not exist

        ds = client.replace_dataset(key, **d)

        ds = client.get_dataset(key)

    except RestApiError:

        ds = client.create_dataset('ericbusboom', **d)

        ds = client.get_dataset(key)
Пример #2
0
def add_single_resource(doc, ref, cache, seen_names):
    from metatab.util import slugify

    t = doc.find_first('Root.Datafile', value=ref)

    if t:
        prt("Datafile exists for '{}', deleting".format(ref))
        doc.remove_term(t)

    term_name = classify_url(ref)

    path, name = extract_path_name(ref)

    # If the name already exists, try to create a new one.
    # 20 attempts ought to be enough.
    if name in seen_names:
        base_name = re.sub(r'-?\d+$', '', name)

        for i in range(1, 20):
            name = "{}-{}".format(base_name, i)
            if name not in seen_names:
                break

    seen_names.add(name)

    encoding = start_line = None
    header_lines = []

    try:
        encoding, ri = run_row_intuit(path, cache)
        prt("Added resource for '{}', name = '{}' ".format(ref, name))
        start_line = ri.start_line
        header_lines = ri.header_lines
    except RowIntuitError as e:
        warn("Failed to intuit '{}'; {}".format(path, e))

    except SourceError as e:
        warn("Source Error: '{}'; {}".format(path, e))

    except Exception as e:
        warn("Error: '{}'; {}".format(path, e))

    if not name:
        from hashlib import sha1
        name = sha1(slugify(path).encode('ascii')).hexdigest()[:12]

        # xlrd gets grouchy if the name doesn't start with a char
        try:
            int(name[0])
            name = 'a' + name[1:]
        except:
            pass

    return doc['Resources'].new_term(term_name, ref, name=name,
                                     startline=start_line,
                                     headerlines=','.join(str(e) for e in header_lines),
                                     encoding=encoding)
Пример #3
0
    def cleanse(self):
        """Clean up some terms, like ensuring that the name is a slug"""
        from .util import slugify

        self.ensure_identifier()

        try:
            self.update_name()
        except MetatabError:

            identifier = self['Root'].find_first('Root.Identifier')

            name = self['Root'].find_first('Root.Name')

            if name and name.value:
                name.value = slugify(name.value)
            elif name:
                name.value = slugify(identifier.value)
            else:
                self['Root'].get_or_new_term('Root.Name').value = slugify(identifier.value)
Пример #4
0
def package_info(doc):

    client = dw.api_client()

    username = '******'

    title = doc.find_first_value("Root.Title")
    key = join(username, slugify(title))

    try:
        ds = client.get_dataset(key)
        prt(json.dumps(ds, indent=4))
    except RestApiError as e:
        err(e)
Пример #5
0
    def _load_documentation_files(self):

        from metapack_jupyter.exporters import DocumentationExporter

        notebook_docs = []

        # First find and remove notebooks from the docs. These wil get processed to create
        # normal documents.
        try:
            for term in list(
                    self.doc['Documentation'].find('Root.Documentation')):

                u = parse_app_url(term.value)
                if u is not None and u.target_format == 'ipynb' and u.proto == 'file':
                    notebook_docs.append(term)
                    self.doc.remove_term(term)
        except KeyError:
            self.warn("No documentation defined in metadata")

        # Process all of the normal files
        super()._load_documentation_files()

        fw = FilesWriter()
        fw.build_directory = join(self.package_path.path, 'docs')

        # Now, generate the notebook documents directly into the filesystem package
        for term in notebook_docs:

            de = DocumentationExporter(
                base_name=term.name or slugify(term.title))

            u = parse_app_url(term.value)

            nb_path = join(self.source_dir,
                           u.path)  # Only works if the path is relative.

            try:
                output, resources = de.from_filename(nb_path)
                fw.write(output,
                         resources,
                         notebook_name=de.base_name +
                         '_full')  # Write notebook html with inputs

                de.update_metatab(self.doc, resources)
            except Exception as e:
                from metapack.cli.core import warn
                warn("Failed to convert document for {}: {}".format(
                    term.name, e))
Пример #6
0
    def add_single_resource(self, ref, **properties):
        """ Add a single resource, without trying to enumerate it's contents
        :param ref:
        :return:
        """

        t = self.doc.find_first('Root.Datafile', value=ref)

        if t:
            self.prt("Datafile exists for '{}', deleting".format(ref))
            self.doc.remove_term(t)

        term_name = self.classify_url(ref)

        ref, path, name = self.extract_path_name(ref)

        self.prt("Adding resource for '{}'".format(ref))

        try:
            encoding, ri = self.run_row_intuit(path, self._cache)
        except Exception as e:
            self.warn("Failed to intuit '{}'; {}".format(path, e))
            return None

        if not name:
            from hashlib import sha1
            name = sha1(slugify(path).encode('ascii')).hexdigest()[:12]

            # xlrd gets grouchy if the name doesn't start with a char
            try:
                int(name[0])
                name = 'a' + name[1:]
            except:
                pass

        if 'name' in properties:
            name = properties['name']
            del properties['name']

        return self.sections.resources.new_term(
            term_name,
            ref,
            name=name,
            startline=ri.start_line,
            headerlines=','.join(str(e) for e in ri.header_lines),
            encoding=encoding,
            **properties)
Пример #7
0
    def _load_documentation_files(self):
        """Copy all of the Datafile entries into the Excel file"""
        from rowgenerators.generators import get_dflo, download_and_cache
        from rowgenerators import SourceSpec
        from rowgenerators.exceptions import DownloadError
        from os.path import basename, splitext

        for doc in self.doc.find('Root.Documentation'):

            ss = SourceSpec(doc.value)

            try:
                d = download_and_cache(ss, self._cache)
            except DownloadError as e:
                self.warn("Failed to load documentation for '{}'".format(
                    doc.value))
                continue

            dflo = get_dflo(ss, d['sys_path'])

            f = dflo.open('rb')

            try:
                # FOr file file, the target_file may actually be a regex, so we have to resolve the
                # regex before using it as a filename
                real_name = basename(
                    dflo.memo[1].name
                )  # Internal detail of how Zip files are accessed
            except (AttributeError, TypeError):

                real_name = basename(ss.target_file)

            # Prefer the slugified title to the base name, because in cases of collections
            # of many data releases, like annual datasets, documentation files may all have the same name,
            # but the titles should be different.
            real_name_base, ext = splitext(real_name)
            name = doc.get_value('title') if doc.get_value(
                'title') else real_name_base

            real_name = slugify(name) + ext

            self._load_documentation(doc, f.read(), real_name)

            f.close()
Пример #8
0
    def _generate_identity_name(self):

        name_t = self.find_first('Root.Name', section='Root')

        name = name_t.value

        datasetname = name_t.get_value('Name.Dataset',
                                       self.get_value('Root.Dataset'))
        version = name_t.get_value('Name.Version',
                                   self.get_value('Root.Version'))
        origin = name_t.get_value('Name.Origin', self.get_value('Root.Origin'))
        time = name_t.get_value('Name.Time', self.get_value('Root.Time'))
        space = name_t.get_value('Name.Space', self.get_value('Root.Space'))
        grain = name_t.get_value('Name.Grain', self.get_value('Root.Grain'))

        parts = [
            slugify(e.replace('-', '_'))
            for e in (origin, datasetname, time, space, grain, version)
            if e and str(e).strip()
        ]

        return '-'.join(parts)
Пример #9
0
    def save(self, path=None):

        self.check_is_ready()

        root_dir = slugify(self.doc.find_first_value('Root.Name'))

        self.prt(
            "Creating ZIP Package at '{}' from filesystem package at '{}'".
            format(self.package_path, self.source_dir))

        self.zf = zipfile.ZipFile(self.package_path.path, 'w',
                                  zipfile.ZIP_DEFLATED)

        for root, dirs, files in walk(self.source_dir):
            for f in files:
                source = join(root, f)
                rel = source.replace(self.source_dir, '').strip('/')
                dest = join(root_dir, rel)

                self.zf.write(source, dest)

        self.zf.close()

        return self.package_path
Пример #10
0
    def _generate_identity_name(self, mod_version=False):

        """mod_version == False -> dpn't change the version
           mod_version == None -> remove the version
           mod_version == Something else -> change the versino to somethig else"""

        datasetname = self.find_first_value('Root.Dataset', section='Root')
        origin = self.find_first_value('Root.Origin', section='Root')
        time = self.find_first_value('Root.Time', section='Root')
        space = self.find_first_value('Root.Space', section='Root')
        grain = self.find_first_value('Root.Grain', section='Root')
        variant = self.find_first_value('Root.Variant', section='Root')

        self.update_version()

        if self._has_semver():
            # Modifyable part is the patch
            mv = self['Root'].get_value('Version.Patch')

            def set_ver(mv):
                self['Root'].find_first('Version.Patch').value = mv

        else:
            # Modifyable part is the whole version
            mv = self['Root'].get_value('Version')

            def set_ver(mv):
                self['Root'].find_first('Version').value = mv


        if mod_version is False:
            # Don't change the version
            pass
        elif mod_version is None:
            # Set the version to nothing -- the nonver name
            mv = None

        elif str(mod_version)[0] == '+' or str(mod_version)[0] == '-':

            # Increment or decrement the version

            try:
                int(mv)
            except ValueError:
                raise MetatabError(
                    "When specifying version math, version value in Root.Version or Version.Patch  term must be an integer")

            # Excel likes to make integers into floats
            try:
                if int(mv) == float(mv):
                    mv = int(mv)

            except (ValueError, TypeError):
                pass

            if mod_version[0] == '+':
                mv = str(int(mv) + int(mod_version[1:] if mod_version[1:] else 1))
            else:
                mv = str(int(mv) - int(mod_version[1:] if mod_version[1:] else 1))

        else:
            # Set the version to a specific string
            mv = mod_version

        if mv is None:
            version = None
        else:
            set_ver(mv)
            version = self.update_version()

        parts = [slugify(str(e).replace('-', '_')) for e in (
            origin, datasetname, time, space, grain, variant, version)
                 if e and str(e).strip()]

        return '-'.join(parts)