Пример #1
0
def fetch(resource_id, **kwargs):
    """Downloads a filestore resource"""
    verbose = not kwargs['quiet']
    filepath = kwargs['destination']
    name_from_id = kwargs.get('name_from_id')
    chunksize = kwargs.get('chunksize_bytes')
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        fkwargs = {
            'headers': r.headers,
            'name_from_id': name_from_id,
            'resource_id': resource_id}

        filepath = tup.make_filepath(filepath, **fkwargs)
        tio.write(filepath, r.iter_content, chunksize=chunksize)

        # save encoding to extended attributes
        x = xattr(filepath)

        if verbose and r.encoding:
            print('saving encoding %s to extended attributes' % r.encoding)

        if r.encoding:
            x['com.ckanny.encoding'] = r.encoding

        print(filepath)
Пример #2
0
def migrate(resource_id, **kwargs):
    """Copies a filestore resource from one ckan instance to another"""
    src_remote, dest_remote = kwargs['src_remote'], kwargs['dest_remote']

    if src_remote == dest_remote:
        msg = (
            'ERROR: `dest-remote` of %s is the same as `src-remote` of %s.\n'
            'The dest and src remotes must be different.\n' % (src_remote,
            dest_remote))

        sys.exit(msg)

    verbose = not kwargs['quiet']
    chunksize = kwargs['chunksize_bytes']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    src_ckan = CKAN(remote=src_remote, **ckan_kwargs)
    dest_ckan = CKAN(remote=dest_remote, **ckan_kwargs)

    try:
        r = src_ckan.fetch_resource(resource_id)
        filepath = NamedTemporaryFile(delete=False).name
    except api.NotAuthorized as err:
        sys.exit('ERROR: %s\n' % str(err))
    except Exception as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        tio.write(filepath, r.raw.read(), chunksize=chunksize)
        resource = dest_ckan.update_filestore(resource_id, filepath=filepath)

        if resource and verbose:
            print('Success! Resource %s updated.' % resource_id)
        elif not resource:
            sys.exit('Error uploading file!')
    finally:
        if verbose:
            print('Removing tempfile...')

        unlink(filepath)
Пример #3
0
def customize(org_id, **kwargs):
    """Introspects custom organization values"""
    verbose = not kwargs['quiet']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    image_sq = kwargs.get('image_sq')
    image_rect = kwargs.get('image_rect')
    sanitize = kwargs.get('sanitize')
    three_dub_id = kwargs.get('3w')
    geojson_id = kwargs.get('geojson')
    topline_id = kwargs.get('topline')

    ckan = CKAN(**ckan_kwargs)
    organization = ckan.organization_show(id=org_id, include_datasets=True)
    org_packages = organization['packages']
    hdx = ckan.organization_show(id='hdx', include_datasets=True)
    extras = {e['key']: e['value'] for e in organization['extras']}

    if three_dub_id:
        three_dub_set_id = ckan.get_package_id(three_dub_id)
    else:
        ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w')
        three_dub_set_id = ids['pname']
        three_dub_id = ids['rid']

    if not three_dub_id:
        sys.exit(1)

    if not topline_id:
        topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid']

    if geojson_id:
        geojson_set_id = ckan.get_package_id(geojson_id)
    else:
        country = org_id.split('-')[1]
        hkwargs = {'pnamed': 'json-repository', 'rnamed': country}
        ids = ckan.find_ids(hdx['packages'], **hkwargs)
        geojson_set_id = ids['pname']
        geojson_id = ids['rid']

    viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id)
    three_dub_r = ckan.fetch_resource(three_dub_id)
    _fields = three_dub_r.iter_lines().next().split(',')
    three_dub_fields = tup.underscorify(_fields) if sanitize else _fields

    if geojson_id:
        geojson_r = ckan.fetch_resource(geojson_id)
        geojson_fields = geojson_r.json()['features'][0]['properties'].keys()
    else:
        geojson_fields = []

    if verbose:
        print('3w fields:')
        pprint(three_dub_fields)
        print('geojson fields:')
        pprint(geojson_fields)

    def_where = tup.find(three_dub_fields, geojson_fields) or ''
    who_column = find_field(three_dub_fields, 'who', **kwargs)
    what_column = find_field(three_dub_fields, 'what', **kwargs)
    where_column = find_field(three_dub_fields, 'where', def_where, **kwargs)

    where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs)
    name_column = kwargs.get('where') or def_where

    if 'http' not in image_sq:
        gdocs = 'https://docs.google.com'
        image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq)

    if 'http' not in image_rect:
        gdocs = 'https://docs.google.com'
        image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect)

    data = {
        'name': org_id,
        'resource_id_1': three_dub_id,
        'resource_id_2': geojson_id,
        'topline_resource': topline_id,
        'datatype_1': kwargs.get('datatype_1') or 'datastore',
        'datatype_2': kwargs.get('datatype_2') or 'filestore',
        'org_url': extras['org_url'],
        'description': organization['description'],
        'title': organization['title'],
        'image_sq': image_sq,
        'image_rect': image_rect,
        'highlight_color': kwargs.get('color'),
        'dataset_id_1': three_dub_set_id,
        'dataset_id_2': geojson_set_id,
        'who_column': deref_field(three_dub_fields, who_column),
        'what_column': deref_field(three_dub_fields, what_column),
        'where_column': deref_field(three_dub_fields, where_column),
        'where_column_2': deref_field(geojson_fields, where_column_2),
        'map_district_name_column': deref_field(geojson_fields, name_column),
        'viz_data_link_url': viz_url,
        'visualization_select': kwargs.get('viz_type', '3W-dashboard'),
        'viz_title': kwargs.get('viz_title', "Who's doing what and where?"),
        'colors': [
            '#c6d5ed', '#95b5df', '#659ad2', '#026bb5',
            '#659ad2', '#213b68', '#101d4e', '#000035'],
        'use_org_color': True,
        'modified_at': int(time()),
    }

    control_sheet_data = [data[k] for k in control_sheet_keys]

    if verbose:
        print('\nCustom pages control sheet data:')
        print(control_sheet_data)

    return control_sheet_data
Пример #4
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Пример #5
0
def update(resource_id, force=None, **kwargs):
    """Updates a datastore table based on the current filestore resource"""
    verbose = not kwargs.get('quiet')
    chunk_bytes = kwargs.get('chunk_bytes', api.CHUNKSIZE_BYTES)
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    hash_kwargs = {'chunksize': chunk_bytes, 'verbose': verbose}
    ckan = CKAN(**ckan_kwargs)

    try:
        r = ckan.fetch_resource(resource_id)
    except (api.NotFound, api.NotAuthorized) as err:
        sys.exit('ERROR: %s\n' % str(err))
    else:
        f = SpooledTemporaryFile(suffix='.xlsx', mode='r+b')
        write_kwargs = {
            'length': r.headers.get('content-length'),
            'chunksize': chunk_bytes
        }

        tio.write(f, r.iter_content, **write_kwargs)

        try:
            old_hash = ckan.get_hash(resource_id)
        except api.NotFound as err:
            item = err.args[0]['item']

            if item == 'package':
                orgs = ckan.organization_list(permission='admin_group')
                owner_org = (
                    o['id'] for o in orgs
                    if o['display_name'] == kwargs['hash_group']).next()

                package_kwargs = {
                    'name': kwargs['hash_table'],
                    'owner_org': owner_org,
                    'package_creator': 'Hash Table',
                    'dataset_source': 'Multiple sources',
                    'notes': 'Datastore resource hash table'
                }

                ckan.hash_table_pack = ckan.package_create(**package_kwargs)

            if item in {'package', 'resource'}:
                fileobj = StringIO('datastore_id,hash\n')
                create_kwargs = {'fileobj': fileobj, 'name': api.DEF_HASH_RES}
                table = kwargs['hash_table']
                resource = ckan.create_resource(table, **create_kwargs)
                ckan.hash_table_id = resource['id']

            ckan.create_hash_table(verbose)
            old_hash = ckan.get_hash(resource_id)

        new_hash = tio.hash_file(f, **hash_kwargs)
        changed = new_hash != old_hash if old_hash else True

        if verbose:
            print(get_message(changed, force))

        if not (changed or force):
            sys.exit(0)

        kwargs['encoding'] = r.encoding
        kwargs['content_type'] = r.headers['content-type']
        updated = ckan.update_datastore(resource_id, f, **kwargs)

        if updated and verbose:
            print('Success! Resource %s updated.' % resource_id)

        if updated and changed:
            ckan.update_hash_table(resource_id, new_hash, verbose)
        elif not updated:
            sys.exit('ERROR: resource %s not updated.' % resource_id)
Пример #6
0
def customize(org_id, **kwargs):
    """Introspects custom organization values"""
    verbose = not kwargs['quiet']
    ckan_kwargs = {k: v for k, v in kwargs.items() if k in api.CKAN_KEYS}
    image_sq = kwargs.get('image_sq')
    image_rect = kwargs.get('image_rect')
    sanitize = kwargs.get('sanitize')
    three_dub_id = kwargs.get('3w')
    geojson_id = kwargs.get('geojson')
    topline_id = kwargs.get('topline')

    ckan = CKAN(**ckan_kwargs)
    organization = ckan.organization_show(id=org_id, include_datasets=True)
    org_packages = organization['packages']
    hdx = ckan.organization_show(id='hdx', include_datasets=True)
    extras = {e['key']: e['value'] for e in organization['extras']}

    if three_dub_id:
        three_dub_set_id = ckan.get_package_id(three_dub_id)
    else:
        ids = ckan.find_ids(org_packages, pnamed='3w', ptagged='3w')
        three_dub_set_id = ids['pname']
        three_dub_id = ids['rid']

    if not three_dub_id:
        sys.exit(1)

    if not topline_id:
        topline_id = ckan.find_ids(org_packages, pnamed='topline')['rid']

    if geojson_id:
        geojson_set_id = ckan.get_package_id(geojson_id)
    else:
        country = org_id.split('-')[1]
        hkwargs = {'pnamed': 'json-repository', 'rnamed': country}
        ids = ckan.find_ids(hdx['packages'], **hkwargs)
        geojson_set_id = ids['pname']
        geojson_id = ids['rid']

    viz_url = '%s/dataset/%s' % (kwargs['remote'], three_dub_set_id)
    three_dub_r = ckan.fetch_resource(three_dub_id)
    _fields = three_dub_r.iter_lines().next().split(',')
    three_dub_fields = tup.underscorify(_fields) if sanitize else _fields

    if geojson_id:
        geojson_r = ckan.fetch_resource(geojson_id)
        geojson_fields = geojson_r.json()['features'][0]['properties'].keys()
    else:
        geojson_fields = []

    if verbose:
        print('3w fields:')
        pprint(three_dub_fields)
        print('geojson fields:')
        pprint(geojson_fields)

    def_where = tup.find(three_dub_fields, geojson_fields) or ''
    who_column = find_field(three_dub_fields, 'who', **kwargs)
    what_column = find_field(three_dub_fields, 'what', **kwargs)
    where_column = find_field(three_dub_fields, 'where', def_where, **kwargs)

    where_column_2 = find_field(geojson_fields, 'where', def_where, **kwargs)
    name_column = kwargs.get('where') or def_where

    if 'http' not in image_sq:
        gdocs = 'https://docs.google.com'
        image_sq = '%s/uc?id=%s&export=download' % (gdocs, image_sq)

    if 'http' not in image_rect:
        gdocs = 'https://docs.google.com'
        image_rect = '%s/uc?id=%s&export=download' % (gdocs, image_rect)

    data = {
        'name':
        org_id,
        'resource_id_1':
        three_dub_id,
        'resource_id_2':
        geojson_id,
        'topline_resource':
        topline_id,
        'datatype_1':
        kwargs.get('datatype_1') or 'datastore',
        'datatype_2':
        kwargs.get('datatype_2') or 'filestore',
        'org_url':
        extras['org_url'],
        'description':
        organization['description'],
        'title':
        organization['title'],
        'image_sq':
        image_sq,
        'image_rect':
        image_rect,
        'highlight_color':
        kwargs.get('color'),
        'dataset_id_1':
        three_dub_set_id,
        'dataset_id_2':
        geojson_set_id,
        'who_column':
        deref_field(three_dub_fields, who_column),
        'what_column':
        deref_field(three_dub_fields, what_column),
        'where_column':
        deref_field(three_dub_fields, where_column),
        'where_column_2':
        deref_field(geojson_fields, where_column_2),
        'map_district_name_column':
        deref_field(geojson_fields, name_column),
        'viz_data_link_url':
        viz_url,
        'visualization_select':
        kwargs.get('viz_type', '3W-dashboard'),
        'viz_title':
        kwargs.get('viz_title', "Who's doing what and where?"),
        'colors': [
            '#c6d5ed', '#95b5df', '#659ad2', '#026bb5', '#659ad2', '#213b68',
            '#101d4e', '#000035'
        ],
        'use_org_color':
        True,
        'modified_at':
        int(time()),
    }

    control_sheet_data = [data[k] for k in control_sheet_keys]

    if verbose:
        print('\nCustom pages control sheet data:')
        print(control_sheet_data)

    return control_sheet_data