示例#1
0
def add_resource(mt_file, ref, cache):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
    the same reference"""

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
    else:
        doc = MetapackDoc(mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(ref)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    for e in entries:
        add_single_resource(doc, e, cache=cache, seen_names=seen_names)

    write_doc(doc, mt_file)
示例#2
0
def process_schemas(mt_file, resource=None, cache=None, clean=False, report_found=True, force=False, min_rows=5000,
                    allow_codes=True):
    from metapack import MetapackDoc, MetapackResourceUrl, MetapackDocumentUrl

    if isinstance(mt_file, MetapackDoc):
        doc = mt_file
        write_doc_to_file = False
    else:
        doc = MetapackDoc(mt_file)
        write_doc_to_file = True

    try:
        if clean:
            doc['Schema'].clean()
        else:
            doc['Schema']

    except KeyError:
        doc.new_section('Schema', ['DataType', 'AltName', 'Description'])

    schemas_processed = 0

    for r in doc['Resources'].find('Root.Resource'):

        if resource and r.name != resource:
            continue

        schema_term = r.schema_term

        col_count = len(list(r.columns()))
        datatype_count = sum(1 for c in r.columns() if c['datatype'])

        if schema_term and col_count == datatype_count and force is False:
            if report_found:
                prt("Found table for '{}'; skipping".format(r.schema_name))
            continue

        if col_count != datatype_count:
            prt("Found table for '{}'; but {} columns don't have datatypes"
                .format(r.schema_name, col_count - datatype_count))

        schemas_processed += 1

        rr = r.resolved_url

        rmtree(get_materialized_data_cache(doc), ignore_errors=True)

        if isinstance(rr, MetapackDocumentUrl):
            warn('{} is a MetapackDocumentUrl; skipping', r.name)
        elif isinstance(rr, MetapackResourceUrl):
            _process_metapack_resource(doc, r, force)
        else:
            _process_normal_resource(doc, r, force, skip_start=min_rows, allow_codes=allow_codes)

    if write_doc_to_file and schemas_processed:
        write_doc(doc, mt_file)
示例#3
0
def run_update(args):
    m = MetapackCliMemo(args, downloader)

    m.doc  # Trigger an error if the doc can't be found

    if m.args.schemas:
        update_schemas(m)

    if m.args.schema_properties:
        update_schema_props(m)

    if m.args.clean_properties:
        clean_properties(m)

    if m.args.alt_name:
        move_alt_names(m)

    if m.args.categories:
        update_categories(m)

    if m.args.descriptions:
        update_descriptions(m)

    if m.args.giturl:
        from metapack.cli.core import add_giturl
        add_giturl(m.doc, force=True)
        write_doc(m.doc)

    if m.args.promote:
        update_promote(m)

    if m.args.custom_update:
        update_custom(m)

    if m.args.files:
        add_files(m)

    if m.mtfile_url.scheme == 'file' and m.args.name:
        mod_version = m.args.version if m.args.version \
            else '+' if m.args.increment \
            else False

        update_name(m.mt_file,
                    fail_on_missing=True,
                    force=m.args.force,
                    mod_version=mod_version)

    if m.args.coverage:
        update_coverage(m)

    if m.args.touch:
        touch_metadata(m)

    if m.args.semantic:
        to_semantic_version(m)
示例#4
0
def change_cmd(m):
    doc = m.doc

    t = doc.find_first(m.term, section=m.section)

    t.value = m.value

    if m.args.echo:
        print(doc.as_csv())

    write_doc(doc)
示例#5
0
def add_cmd(m):
    doc = m.doc

    args = dict(a.split('=', 1) for a in m.args.arg)

    doc[m.section].new_term(m.term, m.value, **args)

    if m.args.echo:
        print(doc.as_csv())

    write_doc(doc)
示例#6
0
def rewrite_schema(r, df, doc=None):
    """Rebuild the schema for a resource based on a dataframe and re-write the doc"""

    from metapack.cli.core import write_doc

    if doc is None:
        doc = open_source_package()

    rebuild_schema(doc, r, df)

    write_doc(doc, doc.ref)
示例#7
0
def update_descriptions(m):
    doc = m.doc

    for ref in doc.resources():
        ref['Description'] = ref.description

        print(ref.name, id(ref))
        print("Updated '{}' to '{}'".format(ref.name, ref.description))

    for ref in doc.references():
        ref.find_first('Description')  # Is this necessary?

        print(ref.name, id(ref), ref.description)

    write_doc(doc)
示例#8
0
def run_url_add(args):
    """Add a resources entry, downloading the intuiting the file, replacing entries with
        the same reference"""

    m = MetapackCliMemo(args, downloader)

    update_name(m.mt_file, fail_on_missing=False, report_unchanged=False)

    if isinstance(m.mt_file, MetapackDoc):
        doc = m.mt_file
    else:
        doc = MetapackDoc(m.mt_file)

    if 'Resources' not in doc:
        doc.new_section('Resources')

    doc['Resources'].args = [
        e for e in set(doc['Resources'].args +
                       ['Name', 'StartLine', 'HeaderLines', 'Encoding']) if e
    ]

    seen_names = set()

    u = parse_app_url(args.url)

    # The web and file URLs don't list the same.

    if u.proto == 'file':
        entries = u.list()
    else:
        entries = [ssu for su in u.list() for ssu in su.list()]

    errors = []

    for e in entries:
        if not add_single_resource(
                doc, e, cache=m.cache, seen_names=seen_names):
            errors.append(e)

    if errors:
        prt()
        warn("Found, but failed to add these urls:")
        for e in errors:
            print('    ', e)

    write_doc(doc)
示例#9
0
def run_url_scrape(args):
    m = MetapackCliMemo(args, downloader)

    from metapack.util import scrape_urls_from_web_page

    doc = m.doc
    url = m.args.url

    doc['resources'].new_term('DownloadPage', url)

    d = scrape_urls_from_web_page(url)

    if d.get('error'):
        err(d.get('error'))

    new_resources = 0
    new_documentation = 0

    if not args.no_resources:
        for k, v in d['sources'].items():
            u = parse_app_url(v['url'])
            t = doc['Resources'].new_term('DataFile',
                                          v['url'],
                                          name=u.fspath.stem,
                                          description=v.get('description'))
            new_resources += 1
            if args.verbose:
                prt(t, t.props)

    if not args.no_docs:
        for k, v in d['external_documentation'].items():
            term_name = classify_url(v['url'])
            u = parse_app_url(v['url'])
            t = doc['Documentation'].new_term(term_name,
                                              v['url'],
                                              name=u.fspath.stem,
                                              description=v.get('description'))
            new_documentation += 1
            if args.verbose:
                prt(t, t.props)

    prt("Added {} resource and {} documentation terms".format(
        new_resources, new_documentation))

    if not args.dry_run:
        write_doc(doc)
示例#10
0
def update_promote(m):
    sections = m.args.promote

    if not sections or sections == 'RR':
        sections = ['resources', 'references']
    elif sections == '*':
        sections = [e.lower() for e in m.doc.sections.keys()]
    else:
        sections = [sections.lower()]

    for section_name, section in m.doc.sections.items():
        if section_name.lower() in sections:
            for arg in promote_terms(section):
                prt("Move {} to header in section {} ".format(
                    arg, section_name))

    write_doc(m.doc)
示例#11
0
def delete_cmd(m):
    doc = m.doc

    terms = doc.find(m.term, section=m.section)

    if not terms:
        warn("No terms found for: '{}' in section '{}' ".format(
            m.term, m.section))

    for term in terms:
        doc.remove_term(term)

    terms = doc.find(m.term, section=m.section)

    if terms:
        warn("Delete failed")

    if m.args.echo:
        print(doc.as_csv())

    write_doc(doc)
示例#12
0
def move_alt_names(m):
    doc = m.doc

    for t in doc['Schema'].find('Root.Table'):
        moved = 0
        for c in t.find('Table.Column'):

            altname = c.get('AltName')
            if altname:

                if not c.get('Description'):
                    c.description = (c.name or '').replace('\n', ' ')

                c.name = altname.value
                c.remove_child(altname)

                moved += 1

        prt("Moved {} names in '{}'".format(moved, t.name))

    write_doc(doc)
示例#13
0
def run_init_cmd(m):

    from git.exc import GitCommandError

    g = Github(get_token())

    remote_r = get_or_new_github_repo(g, m)
    local_r = get_or_init_local_repo(m)

    try:
        origin = local_r.remote('origin')
    except (ValueError, GitCommandError) as e:
        print(e)
        origin = local_r.create_remote('origin', remote_r.clone_url)
        #local_r.create_head('master', origin.refs.master)
        local_r.git.push('--set-upstream', 'origin', 'master')

    add_giturl(m.doc, force=True)
    write_doc(m.doc)

    prt(f'Initialized local and remote {origin.refs.master} at {remote_r.clone_url}'
        )
示例#14
0
def convert_metatab_notebook(m):
    m.doc['Documentation'].get_or_new_term('Root.Readme').value = get_readme(m)

    return

    source = None  # Path(source)

    if source.suffix == '.csv':
        dest = source.with_suffix('.ipynb')
        doc = MetapackDoc(source)
        doc.ensure_identifier()
        doc.update_name(create_term=True)
        # _write_metatab_notebook(doc, dest)

    elif source.suffix == '.ipynb':
        dest = source.with_suffix('.csv')

        doc = None  # extract_notebook_metatab(source)
        doc.ensure_identifier()
        doc.update_name(create_term=True)
        write_doc(doc, dest)

    else:
        err("Source file must be either .ipynb or .csv")
示例#15
0
def clean_properties(m):
    doc = m.doc

    doc.clean_unused_schema_terms()

    write_doc(doc)
示例#16
0
def new_cmd(args):
    from metapack import MetapackDoc
    from metapack.util import make_metatab_file, datetime_now, ensure_dir
    from metapack.cli.core import write_doc, prt, err
    from os.path import exists, join, expanduser
    from metatab import DEFAULT_METATAB_FILE
    from os import getenv

    if args.config:
        config_file = args.config
    elif getenv("METAPACK_CONFIG"):
        config_file = getenv("METAPACK_DEFAULTS")
    elif expanduser('~/.metapack-default.csv'):
        config_file = expanduser('~/.metapack-defaults.csv')
    else:
        config_file = None

    if config_file and exists(config_file):
        prt(f"Using defaults file '{config_file}'")
        config = MetapackDoc(config_file)
    else:
        config = MetapackDoc()

    if args.jupyter:
        import tempfile

        with tempfile.NamedTemporaryFile(suffix='.ipynb', delete=False) as fp:

            r = requests.get(TEMPLATE_NOTEBOOK, allow_redirects=True)
            r.raise_for_status()

            fp.write(r.content)
            nb_path = Path(fp.name)

        doc = MetapackDoc(str(nb_path))

    else:

        doc = make_metatab_file(args.template)

    doc['Root']['Created'] = datetime_now()

    origin = args.origin or config.get_value('Root.Origin')

    if not origin:
        err("Must specify a value for origin, either on command line or in defaults file"
            )

    (doc['Root'].find_first('Root.Origin') or et).value = origin
    (doc['Root'].find_first('Root.Dataset') or et).value = args.dataset
    (doc['Root'].find_first('Root.Space')
     or et).value = args.space or config.get_value('Root.Space')
    (doc['Root'].find_first('Root.Time')
     or et).value = args.time or config.get_value('Root.Time')
    (doc['Root'].find_first('Root.Grain')
     or et).value = args.grain or config.get_value('Root.Grain')
    (doc['Root'].find_first('Root.Variant')
     or et).value = args.variant or config.get_value('Root.Variant')

    v = doc['Root'].get_or_new_term('Root.Version')
    v.get_or_new_child(
        'Version.Major'
    ).value = args.revision or config.get_value('Root.Version')
    v.get_or_new_child('Version.Minor').value = 1
    v.get_or_new_child('Version.Patch').value = 1

    # Copy contacts in
    if 'Contacts' in config:
        for c in config['Contacts']:
            doc['Contacts'].add_term(c)

    if args.title:
        doc['Root'].find_first('Root.Title').value = args.title.strip()

    nv_name = doc.as_version(None)

    if args.example:
        doc['Resources'].new_term(
            'Root.Datafile',
            'http://public.source.civicknowledge.com/example.com/sources/random-names.csv',
            name='random_names')

        doc['Documentation'].new_term('Root.Homepage',
                                      'http://metatab.org',
                                      title='Metatab Home Page')

    doc.ensure_identifier()
    doc.update_name(create_term=True)

    if getattr(args, 'jupyter'):  # b/c maybe metatab_jupyter is not installed

        from metapack_jupyter.convert import write_metatab_notebook
        from metapack_jupyter.core import edit_notebook, set_cell_source, get_cell_source

        new_nb_path = Path(f'{nv_name}.ipynb')

        doc['Resources'].new_term(
            'Root.Datafile',
            './' + str(new_nb_path) + "#df",
            name='local_dataframe',
            description='Example of using a local Dataframe')

        if new_nb_path.exists():
            err(f"Directory {nb_path} already exists")

        copyfile(nb_path, new_nb_path)

        write_metatab_notebook(doc, new_nb_path)

        with edit_notebook(new_nb_path) as nb:
            init = get_cell_source(nb, 'init')
            init += f"\nthis_package_name = '{str(new_nb_path.name)}'"
            set_cell_source(nb, 'init', init)

        nb_path.unlink()
    else:

        doc['Documentation'].new_term('Root.Documentation',
                                      'file:README.md',
                                      title='README')

        if exists(nv_name):
            err(f"Directory {nv_name} already exists")

        if args.csv:
            fn = doc.nonver_name + '.csv'
            write_doc(doc, fn)
            prt(f"Writing to {fn}")

        else:
            ensure_dir(nv_name)

            pylib_dir = join(nv_name, 'pylib')
            ensure_dir(pylib_dir)
            with open(join(pylib_dir, '__init__.py'),
                      'w') as f_out, open(pylib.__file__) as f_in:
                f_out.write(f_in.read())

            if args.example:
                doc['Resources'].new_term('Root.Datafile',
                                          'python:pylib#row_generator',
                                          name='row_generator')

            prt(f"Writing to '{nv_name}'")

            write_doc(doc, join(nv_name, DEFAULT_METATAB_FILE))

            add_missing_files(nv_name)

            if args.title:
                readme = '# {}\n'.format(args.title)
            else:
                readme = '# {}\n'.format(doc.get_value('Root.Name'))

            with open(join(nv_name, 'README.md'), 'w') as f:
                f.write(readme)
示例#17
0
def update_schema_props(m):
    doc = m.doc

    update_schema_properties(doc, force=m.args.force)

    write_doc(doc)