def main(base_url, api_key): # Fetch the list of resource IDs from csv2rdf. logger.info("Getting the list of resource IDs") r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list') assert r.ok, r logger.info("Parsing the list of resource IDs") resource_ids = json.loads(r.content) # Add/update the RDF links in the CKAN database. for resource_id in resource_ids: fetch(resource_id, base_url, api_key) # Remove RDF links from the CKAN database, for any resources no longer # in the list of resource IDs from csv2rdf. logger.info("Getting package_list") response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list', api_key=api_key) assert response['success'] is True, response dataset_names = response['result'] for dataset_name in dataset_names: logger.debug('package_show: {0}'.format(dataset_name)) response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show', data={'id': dataset_name}, api_key=api_key) assert response['success'] is True dataset = response['result'] for resource in dataset['resources']: if resource['id'] not in resource_ids: if 'rdf_mapping' in resource or 'rdf_data' in resource: del resource['rdf_mapping'] del resource['rdf_data'] logger.info("Removing RDF links from resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api( base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert 'rdf_mapping' not in updated_resource assert 'rdf_data' not in updated_resource else: logger.debug("Resource already has no rdf_data or " "rdf_mapping: {0}".format( display_name(resource))) else: logger.debug("Resource already updated: {0}".format( display_name(resource)))
def fetch(resource_id, base_url, api_key): # Get the resource dict from CKAN. logger.debug("resource_show: {0}".format(resource_id)) data_dict = {'id': resource_id} response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_show', data=data_dict, api_key=api_key) if response['success'] is False: logger.warn( "failed to get response for resource {0}".format(resource_id)) return resource = response['result'] # Generate the rdf_mapping and rdf_data URLs, add them to data_dict # if they are not already in resource. update = False rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format( resource_id) if resource.get('rdf_mapping') != rdf_mapping: resource['rdf_mapping'] = rdf_mapping update = True rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}' '_default-tranformation-configuration.rdf'.format(resource_id)) if resource.get('rdf_data') != rdf_data: resource['rdf_data'] = rdf_data update = True # Update the resource, if necessary. if update: logger.info("Adding RDF links to resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) #assert response['success'] is True, response if response['success'] is False: logger.warn("failed to update resource {0}".format(resource_id)) return updated_resource = response['result'] if updated_resource.get('rdf_mapping') != rdf_mapping: logger.warn("failed to update resource {0}".format(resource_id)) return #assert updated_resource.get('rdf_mapping') == rdf_mapping #assert updated_resource.get('rdf_data') == rdf_data else: logger.debug("RDF links already present in resource: {0}".format( display_name(resource)))
def fetch(resource_id, base_url, api_key): # Get the resource dict from CKAN. logger.debug("resource_show: {0}".format(resource_id)) data_dict = {'id': resource_id} response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_show', data=data_dict, api_key=api_key) if response['success'] is False: logger.warn("failed to get response for resource {0}".format( resource_id)) return resource = response['result'] # Generate the rdf_mapping and rdf_data URLs, add them to data_dict # if they are not already in resource. update = False rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format( resource_id) if resource.get('rdf_mapping') != rdf_mapping: resource['rdf_mapping'] = rdf_mapping update = True rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}' '_default-tranformation-configuration.rdf'.format(resource_id)) if resource.get('rdf_data') != rdf_data: resource['rdf_data'] = rdf_data update = True # Update the resource, if necessary. if update: logger.info("Adding RDF links to resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) #assert response['success'] is True, response if response['success'] is False: logger.warn("failed to update resource {0}".format( resource_id)) return updated_resource = response['result'] if updated_resource.get('rdf_mapping') != rdf_mapping: logger.warn("failed to update resource {0}".format( resource_id)) return #assert updated_resource.get('rdf_mapping') == rdf_mapping #assert updated_resource.get('rdf_data') == rdf_data else: logger.debug("RDF links already present in resource: {0}".format( display_name(resource)))
def main(base_url, api_key): # Fetch the list of resource IDs from csv2rdf. logger.info("Getting the list of resource IDs") r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list') assert r.ok, r logger.info("Parsing the list of resource IDs") resource_ids = json.loads(r.content) # Add/update the RDF links in the CKAN database. for resource_id in resource_ids: fetch(resource_id, base_url, api_key) # Remove RDF links from the CKAN database, for any resources no longer # in the list of resource IDs from csv2rdf. logger.info("Getting package_list") response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list', api_key=api_key) assert response['success'] is True, response dataset_names = response['result'] for dataset_name in dataset_names: logger.debug('package_show: {0}'.format(dataset_name)) response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show', data={'id': dataset_name}, api_key=api_key) assert response['success'] is True dataset = response['result'] for resource in dataset['resources']: if resource['id'] not in resource_ids: if 'rdf_mapping' in resource or 'rdf_data' in resource: del resource['rdf_mapping'] del resource['rdf_data'] logger.info("Removing RDF links from resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert 'rdf_mapping' not in updated_resource assert 'rdf_data' not in updated_resource else: logger.debug("Resource already has no rdf_data or " "rdf_mapping: {0}".format(display_name(resource))) else: logger.debug("Resource already updated: {0}".format( display_name(resource)))
def main(base_url, api_key): # Fetch the list of resource IDs from csv2rdf. logger.info("Getting the list of resource IDs") r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list') assert r.ok, r logger.info("Parsing the list of resource IDs") resource_ids = json.loads(r.content) # Add/update the RDF links in the CKAN database. for resource_id in resource_ids: # Get the resource dict from CKAN. logger.debug("resource_show: {0}".format(resource_id)) data_dict = {'id': resource_id} response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_show', data=data_dict, api_key=api_key) assert response['success'] is True, response resource = response['result'] # Generate the rdf_mapping and rdf_data URLs, add them to data_dict # if they are not already in resource. update = False rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format( resource_id) if resource.get('rdf_mapping') != rdf_mapping: resource['rdf_mapping'] = rdf_mapping update = True rdf_data = ( 'http://csv2rdf.aksw.org/sparqlified/{0}' '_default-tranformation-configuration.rdf'.format(resource_id)) if resource.get('rdf_data') != rdf_data: resource['rdf_data'] = rdf_data update = True # Update the resource, if necessary. if update: logger.info("Adding RDF links to resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert updated_resource.get('rdf_mapping') == rdf_mapping assert updated_resource.get('rdf_data') == rdf_data else: logger.debug("RDF links already present in resource: {0}".format( display_name(resource))) # Remove RDF links from the CKAN database, for any resources no longer # in the list of resource IDs from csv2rdf. logger.info("Getting package_list") response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list', api_key=api_key) assert response['success'] is True, response dataset_names = response['result'] for dataset_name in dataset_names: logger.debug('package_show: {0}'.format(dataset_name)) response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show', data={'id': dataset_name}, api_key=api_key) assert response['success'] is True dataset = response['result'] for resource in dataset['resources']: if resource['id'] not in resource_ids: if 'rdf_mapping' in resource or 'rdf_data' in resource: del resource['rdf_mapping'] del resource['rdf_data'] logger.info("Removing RDF links from resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api( base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert 'rdf_mapping' not in updated_resource assert 'rdf_data' not in updated_resource else: logger.debug("Resource already has no rdf_data or " "rdf_mapping: {0}".format( display_name(resource))) else: logger.debug("Resource already updated: {0}".format( display_name(resource)))
def main(base_url, api_key): # Fetch the list of resource IDs from csv2rdf. logger.info("Getting the list of resource IDs") r = requests.get('http://csv2rdf.aksw.org/get_exposed_rdf_list') assert r.ok, r logger.info("Parsing the list of resource IDs") resource_ids = json.loads(r.content) # Add/update the RDF links in the CKAN database. for resource_id in resource_ids: # Get the resource dict from CKAN. logger.debug("resource_show: {0}".format(resource_id)) data_dict = {'id': resource_id} response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_show', data=data_dict, api_key=api_key) assert response['success'] is True, response resource = response['result'] # Generate the rdf_mapping and rdf_data URLs, add them to data_dict # if they are not already in resource. update = False rdf_mapping = 'http://wiki.publicdata.eu/wiki/Csv2rdf:{0}'.format( resource_id) if resource.get('rdf_mapping') != rdf_mapping: resource['rdf_mapping'] = rdf_mapping update = True rdf_data = ('http://csv2rdf.aksw.org/sparqlified/{0}' '_default-tranformation-configuration.rdf'.format(resource_id)) if resource.get('rdf_data') != rdf_data: resource['rdf_data'] = rdf_data update = True # Update the resource, if necessary. if update: logger.info("Adding RDF links to resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert updated_resource.get('rdf_mapping') == rdf_mapping assert updated_resource.get('rdf_data') == rdf_data else: logger.debug("RDF links already present in resource: {0}".format( display_name(resource))) # Remove RDF links from the CKAN database, for any resources no longer # in the list of resource IDs from csv2rdf. logger.info("Getting package_list") response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_list', api_key=api_key) assert response['success'] is True, response dataset_names = response['result'] for dataset_name in dataset_names: logger.debug('package_show: {0}'.format(dataset_name)) response = post_to_ckan_api.post_to_ckan_api(base_url, 'package_show', data={'id': dataset_name}, api_key=api_key) assert response['success'] is True dataset = response['result'] for resource in dataset['resources']: if resource['id'] not in resource_ids: if 'rdf_mapping' in resource or 'rdf_data' in resource: del resource['rdf_mapping'] del resource['rdf_data'] logger.info("Removing RDF links from resource: {0}".format( display_name(resource))) response = post_to_ckan_api.post_to_ckan_api(base_url, 'resource_update', data=resource, api_key=api_key) assert response['success'] is True, response updated_resource = response['result'] assert 'rdf_mapping' not in updated_resource assert 'rdf_data' not in updated_resource else: logger.debug("Resource already has no rdf_data or " "rdf_mapping: {0}".format(display_name(resource))) else: logger.debug("Resource already updated: {0}".format( display_name(resource)))