Пример #1
0
def main(base_url, api_key):
    # Fetch the list of resource IDs from csv2rdf.
    logger.info("Getting the list of resource IDs")
    r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list')
    assert r.ok, r
    logger.info("Parsing the list of resource IDs")
    resource_ids = json.loads(r.content)

    # Add/update the RDF links in the CKAN database.
    for resource_id in resource_ids:
        fetch(resource_id, base_url, api_key)

    # Remove RDF links from the CKAN database, for any resources no longer
    # in the list of resource IDs from csv2rdf.
    logger.info("Getting package_list")
    response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                 'package_list',
                                                 api_key=api_key)
    assert response['success'] is True, response
    dataset_names = response['result']
    for dataset_name in dataset_names:
        logger.debug('package_show: {0}'.format(dataset_name))
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                     'package_show',
                                                     data={'id': dataset_name},
                                                     api_key=api_key)
        assert response['success'] is True
        dataset = response['result']
        for resource in dataset['resources']:
            if resource['id'] not in resource_ids:
                if 'rdf_mapping' in resource or 'rdf_data' in resource:
                    del resource['rdf_mapping']
                    del resource['rdf_data']
                    logger.info("Removing RDF links from resource: {0}".format(
                        display_name(resource)))
                    response = post_to_ckan_api.post_to_ckan_api(
                        base_url,
                        'resource_update',
                        data=resource,
                        api_key=api_key)
                    assert response['success'] is True, response
                    updated_resource = response['result']
                    assert 'rdf_mapping' not in updated_resource
                    assert 'rdf_data' not in updated_resource
                else:
                    logger.debug("Resource already has no rdf_data or "
                                 "rdf_mapping: {0}".format(
                                     display_name(resource)))
            else:
                logger.debug("Resource already updated: {0}".format(
                    display_name(resource)))
Пример #2
0
def fetch(resource_id, base_url, api_key):
    # Get the resource dict from CKAN.
    logger.debug("resource_show: {0}".format(resource_id))
    data_dict = {'id': resource_id}
    response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                 'resource_show',
                                                 data=data_dict,
                                                 api_key=api_key)
    if response['success'] is False:
        logger.warn(
            "failed to get response for resource {0}".format(resource_id))
        return

    resource = response['result']

    # Generate the rdf_mapping and rdf_data URLs, add them to data_dict
    # if they are not already in resource.
    update = False
    rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format(
        resource_id)
    if resource.get('rdf_mapping') != rdf_mapping:
        resource['rdf_mapping'] = rdf_mapping
        update = True
    rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}'
                '_default-tranformation-configuration.rdf'.format(resource_id))
    if resource.get('rdf_data') != rdf_data:
        resource['rdf_data'] = rdf_data
        update = True

    # Update the resource, if necessary.
    if update:
        logger.info("Adding RDF links to resource: {0}".format(
            display_name(resource)))
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                     'resource_update',
                                                     data=resource,
                                                     api_key=api_key)
        #assert response['success'] is True, response
        if response['success'] is False:
            logger.warn("failed to update resource {0}".format(resource_id))
            return
        updated_resource = response['result']
        if updated_resource.get('rdf_mapping') != rdf_mapping:
            logger.warn("failed to update resource {0}".format(resource_id))
            return
        #assert updated_resource.get('rdf_mapping') == rdf_mapping
        #assert updated_resource.get('rdf_data') == rdf_data
    else:
        logger.debug("RDF links already present in resource: {0}".format(
            display_name(resource)))
Пример #3
0
def fetch(resource_id, base_url, api_key):
    # Get the resource dict from CKAN.
    logger.debug("resource_show: {0}".format(resource_id))
    data_dict = {'id': resource_id}
    response = post_to_ckan_api.post_to_ckan_api(base_url,
            'resource_show', data=data_dict, api_key=api_key)
    if response['success'] is False:
        logger.warn("failed to get response for resource {0}".format(
            resource_id))
        return

    resource = response['result']

    # Generate the rdf_mapping and rdf_data URLs, add them to data_dict
    # if they are not already in resource.
    update = False
    rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format(
            resource_id)
    if resource.get('rdf_mapping') != rdf_mapping:
        resource['rdf_mapping'] = rdf_mapping
        update = True
    rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}'
        '_default-tranformation-configuration.rdf'.format(resource_id))
    if resource.get('rdf_data') != rdf_data:
        resource['rdf_data'] = rdf_data
        update = True

    # Update the resource, if necessary.
    if update:
        logger.info("Adding RDF links to resource: {0}".format(
            display_name(resource)))
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                'resource_update', data=resource, api_key=api_key)
        #assert response['success'] is True, response
        if response['success'] is False:
            logger.warn("failed to update resource {0}".format(
                resource_id))
            return
        updated_resource = response['result']
        if updated_resource.get('rdf_mapping') != rdf_mapping:
            logger.warn("failed to update resource {0}".format(
                resource_id))
            return
        #assert updated_resource.get('rdf_mapping') == rdf_mapping
        #assert updated_resource.get('rdf_data') == rdf_data
    else:
        logger.debug("RDF links already present in resource: {0}".format(
            display_name(resource)))
Пример #4
0
def main(base_url, api_key):
    # Fetch the list of resource IDs from csv2rdf.
    logger.info("Getting the list of resource IDs")
    r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list')
    assert r.ok, r
    logger.info("Parsing the list of resource IDs")
    resource_ids = json.loads(r.content)

    # Add/update the RDF links in the CKAN database.
    for resource_id in resource_ids:
        fetch(resource_id, base_url, api_key)

    # Remove RDF links from the CKAN database, for any resources no longer
    # in the list of resource IDs from csv2rdf.
    logger.info("Getting package_list")
    response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list',
            api_key=api_key)
    assert response['success'] is True, response
    dataset_names = response['result']
    for dataset_name in dataset_names:
        logger.debug('package_show: {0}'.format(dataset_name))
        response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show',
                data={'id': dataset_name}, api_key=api_key)
        assert response['success'] is True
        dataset = response['result']
        for resource in dataset['resources']:
            if resource['id'] not in resource_ids:
                if 'rdf_mapping' in resource or 'rdf_data' in resource:
                    del resource['rdf_mapping']
                    del resource['rdf_data']
                    logger.info("Removing RDF links from resource: {0}".format(
                        display_name(resource)))
                    response = post_to_ckan_api.post_to_ckan_api(base_url,
                            'resource_update', data=resource, api_key=api_key)
                    assert response['success'] is True, response
                    updated_resource = response['result']
                    assert 'rdf_mapping' not in updated_resource
                    assert 'rdf_data' not in updated_resource
                else:
                    logger.debug("Resource already has no rdf_data or "
                        "rdf_mapping: {0}".format(display_name(resource)))
            else:
                logger.debug("Resource already updated: {0}".format(
                    display_name(resource)))
Пример #5
0
def main(base_url, api_key):
    # Fetch the list of resource IDs from csv2rdf.
    logger.info("Getting the list of resource IDs")
    r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list')
    assert r.ok, r
    logger.info("Parsing the list of resource IDs")
    resource_ids = json.loads(r.content)

    # Add/update the RDF links in the CKAN database.
    for resource_id in resource_ids:

        # Get the resource dict from CKAN.
        logger.debug("resource_show: {0}".format(resource_id))
        data_dict = {'id': resource_id}
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                     'resource_show',
                                                     data=data_dict,
                                                     api_key=api_key)
        assert response['success'] is True, response
        resource = response['result']

        # Generate the rdf_mapping and rdf_data URLs, add them to data_dict
        # if they are not already in resource.
        update = False
        rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format(
            resource_id)
        if resource.get('rdf_mapping') != rdf_mapping:
            resource['rdf_mapping'] = rdf_mapping
            update = True
        rdf_data = (
            'http://csv2rdf.aksw.org/sparqlified/{0}'
            '_default-tranformation-configuration.rdf'.format(resource_id))
        if resource.get('rdf_data') != rdf_data:
            resource['rdf_data'] = rdf_data
            update = True

        # Update the resource, if necessary.
        if update:
            logger.info("Adding RDF links to resource: {0}".format(
                display_name(resource)))
            response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                         'resource_update',
                                                         data=resource,
                                                         api_key=api_key)
            assert response['success'] is True, response
            updated_resource = response['result']
            assert updated_resource.get('rdf_mapping') == rdf_mapping
            assert updated_resource.get('rdf_data') == rdf_data
        else:
            logger.debug("RDF links already present in resource: {0}".format(
                display_name(resource)))

    # Remove RDF links from the CKAN database, for any resources no longer
    # in the list of resource IDs from csv2rdf.
    logger.info("Getting package_list")
    response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                 'package_list',
                                                 api_key=api_key)
    assert response['success'] is True, response
    dataset_names = response['result']
    for dataset_name in dataset_names:
        logger.debug('package_show: {0}'.format(dataset_name))
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                                                     'package_show',
                                                     data={'id': dataset_name},
                                                     api_key=api_key)
        assert response['success'] is True
        dataset = response['result']
        for resource in dataset['resources']:
            if resource['id'] not in resource_ids:
                if 'rdf_mapping' in resource or 'rdf_data' in resource:
                    del resource['rdf_mapping']
                    del resource['rdf_data']
                    logger.info("Removing RDF links from resource: {0}".format(
                        display_name(resource)))
                    response = post_to_ckan_api.post_to_ckan_api(
                        base_url,
                        'resource_update',
                        data=resource,
                        api_key=api_key)
                    assert response['success'] is True, response
                    updated_resource = response['result']
                    assert 'rdf_mapping' not in updated_resource
                    assert 'rdf_data' not in updated_resource
                else:
                    logger.debug("Resource already has no rdf_data or "
                                 "rdf_mapping: {0}".format(
                                     display_name(resource)))
            else:
                logger.debug("Resource already updated: {0}".format(
                    display_name(resource)))
def main(base_url, api_key):
    # Fetch the list of resource IDs from csv2rdf.
    logger.info("Getting the list of resource IDs")
    r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list')
    assert r.ok, r
    logger.info("Parsing the list of resource IDs")
    resource_ids = json.loads(r.content)

    # Add/update the RDF links in the CKAN database.
    for resource_id in resource_ids:

        # Get the resource dict from CKAN.
        logger.debug("resource_show: {0}".format(resource_id))
        data_dict = {'id': resource_id}
        response = post_to_ckan_api.post_to_ckan_api(base_url,
                'resource_show', data=data_dict, api_key=api_key)
        assert response['success'] is True, response
        resource = response['result']

        # Generate the rdf_mapping and rdf_data URLs, add them to data_dict
        # if they are not already in resource.
        update = False
        rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format(
                resource_id)
        if resource.get('rdf_mapping') != rdf_mapping:
            resource['rdf_mapping'] = rdf_mapping
            update = True
        rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}'
            '_default-tranformation-configuration.rdf'.format(resource_id))
        if resource.get('rdf_data') != rdf_data:
            resource['rdf_data'] = rdf_data
            update = True

        # Update the resource, if necessary.
        if update:
            logger.info("Adding RDF links to resource: {0}".format(
                display_name(resource)))
            response = post_to_ckan_api.post_to_ckan_api(base_url,
                    'resource_update', data=resource, api_key=api_key)
            assert response['success'] is True, response
            updated_resource = response['result']
            assert updated_resource.get('rdf_mapping') == rdf_mapping
            assert updated_resource.get('rdf_data') == rdf_data
        else:
            logger.debug("RDF links already present in resource: {0}".format(
                display_name(resource)))

    # Remove RDF links from the CKAN database, for any resources no longer
    # in the list of resource IDs from csv2rdf.
    logger.info("Getting package_list")
    response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list',
            api_key=api_key)
    assert response['success'] is True, response
    dataset_names = response['result']
    for dataset_name in dataset_names:
        logger.debug('package_show: {0}'.format(dataset_name))
        response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show',
                data={'id': dataset_name}, api_key=api_key)
        assert response['success'] is True
        dataset = response['result']
        for resource in dataset['resources']:
            if resource['id'] not in resource_ids:
                if 'rdf_mapping' in resource or 'rdf_data' in resource:
                    del resource['rdf_mapping']
                    del resource['rdf_data']
                    logger.info("Removing RDF links from resource: {0}".format(
                        display_name(resource)))
                    response = post_to_ckan_api.post_to_ckan_api(base_url,
                            'resource_update', data=resource, api_key=api_key)
                    assert response['success'] is True, response
                    updated_resource = response['result']
                    assert 'rdf_mapping' not in updated_resource
                    assert 'rdf_data' not in updated_resource
                else:
                    logger.debug("Resource already has no rdf_data or "
                        "rdf_mapping: {0}".format(display_name(resource)))
            else:
                logger.debug("Resource already updated: {0}".format(
                    display_name(resource)))