def _create_datastorer_task(self, resource): user = get_action('get_site_user')({'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) context = json.dumps({ 'site_url': h.url_for_static('/', qualified=True), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), }) data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() datastorer_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'datastorer', 'key': u'celery_task_id', 'value': task_id, 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(archiver_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[context, data], task_id=task_id)
def __init__(self, url=u'', format=u'', description=u'', hash=u'', extras=None, package_id=None, **kwargs): self.id = _types.make_uuid() self.url = url self.format = format self.description = description self.hash = hash self.package_id = package_id # The base columns historically defaulted to empty strings # not None (Null). This is why they are seperate here. base_columns = ['url', 'format', 'description', 'hash'] for key in set(CORE_RESOURCE_COLUMNS) - set(base_columns): setattr(self, key, kwargs.pop(key, None)) self.extras = extras or {} extra_columns = self.get_extra_columns() for field in extra_columns: value = kwargs.pop(field, None) if value is not None: setattr(self, field, value) if kwargs: raise TypeError('unexpected keywords %s' % kwargs)
def _create_task(self, resource): user = get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True }, {}) context = json.dumps({ 'site_url': self.site_url, 'apikey': user.get('apikey') }) data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'qa', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.now().isoformat() } task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(task_context, task_status) celery.send_task("qa.update", args=[context, data], task_id=task_id)
def _create_task(self, resource): user = get_action('get_site_user')({'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) context = json.dumps({ 'site_url': self.site_url, 'apikey': user.get('apikey') }) data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'qa', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.now().isoformat() } task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(task_context, task_status) celery.send_task("qa.update", args=[context, data], task_id=task_id)
def _create_datastorer_task(self, resource): user = get_action('get_site_user')({'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) if not hasattr(self, 'site_url'): from pylons import config self.site_url = config.get('ckan.site_url_internally') or \ config.get('ckan.site_url') context = json.dumps({ 'site_url': self.site_url, 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), }) data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() datastorer_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'datastorer', 'key': u'celery_task_id', 'value': task_id, 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(archiver_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[context, data], task_id=task_id)
def identify_resource(resource): # With resource_dictize we get the correct resource url # even if dataset is in draft state task_id = make_uuid() resource_dict = resource_dictize(resource, {'model': model}) context = _make_default_context() celery.send_task('vectorstorer.identify', args=[resource_dict, context], countdown=15, task_id=task_id) res_identify = model.Session.query(ResourceIngest).filter( ResourceIngest.resource_id == resource.id).first() if res_identify: # This is when a user had previously rejected the ingestion workflow, # but now wants to re-identify the resource model.Session.delete(res_identify) new_res_identify = ResourceIngest(task_id, resource.id, ResourceStorerType.VECTOR) model.Session.add(new_res_identify) model.Session.commit() else: # A newly created/updated resource needs to be identified new_res_identify = ResourceIngest(task_id, resource.id, ResourceStorerType.VECTOR) model.Session.add(new_res_identify)
def create_identify_resource_task(resource): """ Creates the celery task to identify the resource :param resource: the resource to be identified """ task_id = make_uuid() # We are using resource_dictize() just to force CKAN to provide an absolute url # Note Maybe a more clean way to achive this would be to call something like # url_for(controller='package', action='resource_download', id=package_id, resource_id=resource_id) package_id = resource.as_dict()['package_id'] resource_dict = resource_dictize(resource, {'model': model}) resource_dict['package_id'] = package_id context = _make_default_context() context['resource_dict'] = resource_dict celery.send_task('rasterstorer.identify', args=[context], task_id=task_id) res_identify = model.Session.query(ResourceIngest).filter( ResourceIngest.resource_id == resource.id).first() if res_identify: # This is when a user had previously rejected the ingestion workflow, # but now wants to re-identify the resource model.Session.delete(res_identify) new_res_identify = ResourceIngest(task_id, resource.id, ResourceStorerType.RASTER) model.Session.add(new_res_identify) model.Session.commit() else: # A newly created/updated resource needs to be identified new_res_identify = ResourceIngest(task_id, resource.id, ResourceStorerType.RASTER) model.Session.add(new_res_identify)
def __init__(self, org_id, key, label, default_value, revision_id, state, validator, custom, presettable, readonly, field_type, sort_order, min_value=0.0, max_value=0.0): self.id = make_uuid() self.org_id = org_id self.key = key self.default_value = default_value self.label = label self.revision_id = revision_id self.state = state self.validator = validator self.custom = custom self.presettable = presettable self.readonly = readonly self.field_type = field_type self.sort_order = sort_order self.min_value = min_value self.max_value = max_value self.modified_date = datetime.datetime.now()
def _create_archiver_task(self, resource): from ckan.lib.base import c site_user = get_action('get_site_user')( {'model': model, 'ignore_auth': True, 'defer_commit': True}, {} ) user = model.User.by_name(c.user) context = json.dumps({ 'site_url': self.site_url, 'apikey': user.apikey, 'username': user.name, 'cache_url_root': self.cache_url_root, 'site_user_apikey': site_user['apikey'] }) data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() archiver_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'archiver', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': site_user['name'], 'ignore_auth': True } get_action('task_status_update')(archiver_task_context, archiver_task_status) celery.send_task("archiver.update", args=[context, data], task_id=task_id)
def create_qa_update_package_task(package, queue): from pylons import config task_id = '%s-%s' % (package.name, make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) celery.send_task('qa.update_package', args=[ckan_ini_filepath, package.id], task_id=task_id, queue=queue) log.debug('QA of package put into celery queue %s: %s', queue, package.name)
def enqueue_document(user, filename, publisher): """ Uses the provided data to send a job to the priority celery queue so that the spreadsheet is processed. We should create a job_started message header_row to ensure that the user sees immediately that something is happening. """ from pylons import config from ckan import model from ckan.model.types import make_uuid from ckan.lib.celery_app import celery site_user = get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True }, {}) task_id = make_uuid() # Create the task for the queue context = json.dumps({ 'username': user.name, 'site_url': config.get('ckan.site_url_internally') or config.get('ckan.site_url'), 'apikey': user.apikey, 'site_user_apikey': site_user['apikey'] }) data = json.dumps({ 'file': filename, 'publisher': publisher.name, 'publisher_id': publisher.id, 'jobid': task_id }) celery.send_task("inventory.process", args=[context, data], task_id=task_id, queue='priority') # Create a task status.... and update it so that the user knows it has been started. inventory_task_status = { 'entity_id': task_id, 'entity_type': u'inventory', 'task_type': u'inventory', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'state': 'Started', 'last_updated': datetime.datetime.now().isoformat() } inventory_task_context = { 'model': model, 'user': user.name, 'ignore_auth': True } res = get_action('task_status_update')(inventory_task_context, inventory_task_status) return res['id'], inventory_task_status['last_updated']
def command(self): """ Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ["--help", "-h", "help"]: print Webstorer.__doc__ return cmd = self.args[0] self._load_config() # import after load config so CKAN_CONFIG evironment variable can be set from ckan.lib.celery_app import celery import tasks user = get_action("get_site_user")({"model": model, "ignore_auth": True}, {}) context = json.dumps( { "site_url": config["ckan.site_url"], "apikey": user.get("apikey"), "username": user.get("name"), "webstore_url": config.get("ckan.webstore_url"), } ) api_url = urlparse.urljoin(config["ckan.site_url"], "api/action") if cmd == "update": response = requests.post(api_url + "/current_package_list_with_resources", "{}") packages = json.loads(response.content).get("result") for package in packages: for resource in package.get("resources", []): data = json.dumps(resource, {"model": model}) if resource["webstore_url"]: continue mimetype = resource["mimetype"] if mimetype and ( mimetype not in tasks.DATA_FORMATS or resource["format"] not in tasks.DATA_FORMATS ): continue logger.info( "Webstoring resource from resource %s from package %s" % (resource["url"], package["name"]) ) task_id = make_uuid() datastorer_task_status = { "entity_id": resource["id"], "entity_type": u"resource", "task_type": u"datastorer", "key": u"celery_task_id", "value": task_id, "last_updated": datetime.now().isoformat(), } datastorer_task_context = {"model": model, "user": user.get("name")} get_action("task_status_update")(datastorer_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[context, data], task_id=task_id) else: logger.error("Command %s not recognized" % (cmd,))
def _create_task(self, dataset): import ckan.lib.celery_app as celery_app send_task = celery_app.celery.send_task site_user = t.get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True }, {}) context = json.dumps({ 'site_url': self.site_url, 'site_user_apikey': site_user['apikey'], 'spatial_datastore_jdbc_url': self.spatial_datastore_jdbc_url, 'spatial_ingester_filepath': self.spatial_ingester_filepath, }) dataset_dict = package_dictize(dataset, {'model': model}) data = json.dumps(dataset_dict) task_id = make_uuid() queue = 'priority' send_task("os.spatial_ingest", args=[context, data], task_id=task_id, queue=queue) log.debug( 'Spatial Ingest put into celery queue %s: %s site_user=%s site_url=%s', queue, dataset.name, site_user['name'], self.site_url)
def _create_archiver_task(self, resource): from ckan.lib.base import c site_user = get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True }, {}) # If the code that triggers this is run from the command line, the c # stacked object proxy variable will not have been set up by the paste # registry so will give an error saying no object has been registered # for this thread. The easiest thing to do is to catch this, but it # would be nice to have a config option so that the behaviour can be # specified. try: c.user except TypeError: # This is no different from running the archiver from the command line: # See https://github.com/okfn/ckanext-archiver/blob/master/ckanext/archiver/commands.py username = site_user['name'] userapikey = site_user['apikey'] else: user = model.User.by_name(c.user) username = user.name userapikey = user.apikey context = json.dumps({ 'site_url': self.site_url, 'apikey': userapikey, 'username': username, 'cache_url_root': self.cache_url_root, 'site_user_apikey': site_user['apikey'] }) res_dict = resource_dictize(resource, {'model': model}) data = json.dumps(res_dict) task_id = make_uuid() archiver_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'archiver', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': site_user['name'], 'ignore_auth': True } get_action('task_status_update')(archiver_task_context, archiver_task_status) celery.send_task("archiver.update", args=[context, data], task_id=task_id) log.debug( 'Archival of resource put into celery queue: %s url=%r user=%s site_user=%s site_url=%s', resource.id, res_dict.get('url'), username, site_user['name'], self.site_url)
def after_update(self, context, data): if self._has_certs_config(): log.debug("Scheduling new certificate task for existing '%s' dataset", data['name']) celery.send_task( 'certificate.new', args=[self._get_task_context(context), self._get_package_data(data), True], task_id=make_uuid() )
def create_archiver_task(resource, queue): from pylons import config package = resource.resource_group.package task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) celery.send_task('archiver.update', args=[ckan_ini_filepath, resource.id, queue], task_id=task_id, queue=queue) log.debug('Archival of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url)
def create_delete_resource_task(resource): """ Creates the celery task for raster resource deletion :param resource: the resource to be deleted """ context = _make_default_context() context['resource_dict'] = resource task_id = make_uuid() celery.send_task('rasterstorer.delete', args=[context], task_id=task_id)
def create_archiver_package_task(package, queue): from pylons import config task_id = '%s/%s' % (package.name, make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config['__file__']) celery.send_task('archiver.update_package', args=[ckan_ini_filepath, package.id, queue], task_id=task_id, queue=queue) log.debug('Archival of package put into celery queue %s: %s', queue, package.name)
def __init__(self, user_id, object_id, activity_type, data=None): self.id = _types.make_uuid() self.timestamp = datetime.datetime.utcnow() self.user_id = user_id self.object_id = object_id self.activity_type = activity_type if data is None: self.data = {} else: self.data = data
def create_ingest_resource_task(resource): """ Creates the celery task for raster resource ingestion :param resource: the resource to be ingested """ task_id = make_uuid() context = _make_default_context() resource_dict = resource.as_dict() context['resource_dict'] = resource_dict celery.send_task('rasterstorer.import', args=[context], task_id=task_id)
def create_qa_update_task(resource, queue): from pylons import config import tasks if p.toolkit.check_ckan_version(max_version='2.2.99'): package = resource.resource_group.package else: package = resource.package task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) tasks.update(ckan_ini_filepath, resource.id)
def create_qa_update_task(resource, queue): from pylons import config package = resource.resource_group.package task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) celery.send_task('qa.update', args=[ckan_ini_filepath, resource.id], task_id=task_id, queue=queue) log.debug('QA of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url)
def create_package_task(package, queue): from pylons import config from ckan.model.types import make_uuid log = __import__('logging').getLogger(__name__) task_id = '%s/%s' % (package.name, make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config['__file__']) celery.send_task('gemini_postprocess.process_package', args=[ckan_ini_filepath, package.id, queue], task_id=task_id, queue=queue) log.debug('Gemini PostProcess of package put into celery queue %s: %s', queue, package.name)
def create_qa_update_task(resource, queue): from pylons import config if p.toolkit.check_ckan_version(max_version='2.2.99'): package = resource.resource_group.package else: package = resource.package task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) celery.send_task('qa.update', args=[ckan_ini_filepath, resource.id], task_id=task_id, queue=queue) log.debug('QA of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url)
def create_delete_resource_task(resource): """ Creates the celery task for raster resource deletion :param resource: the resource to be deleted """ context = _make_default_context() context['resource_dict'] = resource task_id = make_uuid() celery.send_task( 'rasterstorer.delete', args=[context], task_id=task_id )
def _create_archiver_task(self, resource): from ckan.lib.base import c site_user = get_action('get_site_user')( {'model': model, 'ignore_auth': True, 'defer_commit': True}, {} ) # If the code that triggers this is run from the command line, the c # stacked object proxy variable will not have been set up by the paste # registry so will give an error saying no object has been registered # for this thread. The easiest thing to do is to catch this, but it # would be nice to have a config option so that the behaviour can be # specified. try: c.user except TypeError: # This is no different from running the archiver from the command line: # See https://github.com/okfn/ckanext-archiver/blob/master/ckanext/archiver/commands.py username = site_user['name'] userapikey = site_user['apikey'] else: user = model.User.by_name(c.user) username = user.name userapikey = user.apikey context = json.dumps({ 'site_url': self.site_url, 'apikey': userapikey, 'username': username, 'cache_url_root': self.cache_url_root, 'site_user_apikey': site_user['apikey'] }) res_dict = resource_dictize(resource, {'model': model}) data = json.dumps(res_dict) task_id = make_uuid() archiver_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'archiver', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': site_user['name'], 'ignore_auth': True } get_action('task_status_update')(archiver_task_context, archiver_task_status) celery.send_task("archiver.update", args=[context, data], task_id=task_id) log.debug('Archival of resource put into celery queue: %s url=%r user=%s site_user=%s site_url=%s', resource.id, res_dict.get('url'), username, site_user['name'], self.site_url)
def create_ingest_resource_task(resource): """ Creates the celery task for raster resource ingestion :param resource: the resource to be ingested """ task_id = make_uuid() context = _make_default_context() resource_dict = resource.as_dict() context['resource_dict'] = resource_dict celery.send_task( 'rasterstorer.import', args=[context], task_id=task_id )
def __init__(self, user_id: str, object_id: str, activity_type: str, data: Optional[dict[str, Any]] = None) -> None: self.id = _types.make_uuid() self.timestamp = datetime.datetime.utcnow() self.user_id = user_id self.object_id = object_id self.activity_type = activity_type if data is None: self.data = {} else: self.data = data
def create_archiver_resource_task(resource, queue): from pylons import config if p.toolkit.check_ckan_version(max_version='2.2.99'): # earlier CKANs had ResourceGroup package = resource.resource_group.package else: package = resource.package task_id = '%s/%s/%s' % (package.name, resource.id[:4], make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config['__file__']) celery.send_task('archiver.update_resource', args=[ckan_ini_filepath, resource.id, queue], task_id=task_id, queue=queue) log.debug('Archival of resource put into celery queue %s: %s/%s url=%r', queue, package.name, resource.id, resource.url)
def enqueue_document(user, filename, publisher): """ Uses the provided data to send a job to the priority celery queue so that the spreadsheet is processed. We should create a job_started message header_row to ensure that the user sees immediately that something is happening. """ from pylons import config from ckan import model from ckan.model.types import make_uuid from ckan.lib.celery_app import celery site_user = get_action('get_site_user')( {'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) task_id = make_uuid() # Create the task for the queue context = json.dumps({ 'username': user.name, 'site_url': config.get('ckan.site_url_internally') or config.get('ckan.site_url'), 'apikey': user.apikey, 'site_user_apikey': site_user['apikey'] }) data = json.dumps({ 'file': filename, 'publisher': publisher.name, 'publisher_id': publisher.id, 'jobid': task_id }) celery.send_task("inventory.process", args=[context, data], task_id=task_id, queue='priority') # Create a task status.... and update it so that the user knows it has been started. inventory_task_status = { 'entity_id': task_id, 'entity_type': u'inventory', 'task_type': u'inventory', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'state': 'Started', 'last_updated': datetime.datetime.now().isoformat() } inventory_task_context = { 'model': model, 'user': user.name, 'ignore_auth': True } res = get_action('task_status_update')(inventory_task_context, inventory_task_status) return res['id'], inventory_task_status['last_updated']
def update_vector_storer_task(resource): user = _get_site_user() resource_package_id = resource.as_dict()['package_id'] resource_list_to_delete = _get_child_resources(resource.as_dict()) context = json.dumps({'resource_list_to_delete': resource_list_to_delete, 'package_id': resource_package_id, 'site_url': _get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'user': user.get('name'), 'db_params': config['ckan.datastore.write_url']}) geoserver_context = _get_geoserver_context() data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() celery.send_task('vectorstorer.update', args=[geoserver_context, context, data], task_id=task_id)
def migrate_v2(): log.debug('Migrating apps tables to v2. This may take a while...') statements=''' CREATE TABLE application_tag_temp AS SELECT * FROM application_tag; CREATE TABLE idea_tag_temp AS SELECT * FROM idea_tag; ''' Session.execute(statements) Session.commit() application_tag_table.drop() idea_tag_table.drop() application_tag_table.create() idea_tag_table.create() Session.commit() apps_tags = Session.execute('SELECT application_id,tag_id from application_tag_temp') ideas_tags = Session.execute('SELECT idea_id,tag_id from idea_tag_temp') for app_tag in apps_tags: Session.execute('''INSERT INTO application_tag (id,application_id,tag_id) VALUES ('%s','%s','%s')''' % (make_uuid(), app_tag[0],app_tag[1])) for idea_tag in ideas_tags: Session.execute('''INSERT INTO idea_tag (id,idea_id,tag_id) VALUES ('%s','%s','%s')''' % (make_uuid(), idea_tag[0],idea_tag[1])) statements=''' DROP TABLE application_tag_temp; DROP TABLE idea_tag_temp; ''' Session.execute(statements) Session.commit() log.info('Apps tables migrated to v2')
def identify_resource(resource_obj): user_api_key = _get_site_user()['apikey'] res_dict = resource_dictize(resource_obj, {'model': model}) resource=resource_obj.as_dict() '''With resource_dictize we get the correct resource url even if dataset is in draft state ''' resource['url']=res_dict['url'] task_id = make_uuid() data = json.dumps(resource) celery.send_task('vectorstorer.identify_resource', args=[data,user_api_key], task_id=task_id) res_identify = ResourceIdentify(task_id,resource['id']) ckan.model.Session.add(res_identify)
def update_ingest_resource(resource): package_id = resource.as_dict()['package_id'] resource_list_to_delete = _get_child_resources(resource.as_dict()) context = _make_default_context() context.update({ 'resource_list_to_delete': resource_list_to_delete, 'package_id': package_id, 'db_params': config['ckan.datastore.write_url'], }) geoserver_context = _make_geoserver_context() resource_dict = resource_dictize(resource, {'model': model}) task_id = make_uuid() celery.send_task('vectorstorer.update', args=[resource_dict, context, geoserver_context], task_id=task_id)
def update(self, user, context): from ckan.model.types import make_uuid from ckan.logic import get_action from ckan import model # import tasks after load config so CKAN_CONFIG evironment variable # can be set import tasks for package_dict in self._package_list(): self.log.info('Spatial Ingest for dataset being added to Celery queue "%s": %s (%d resources)' % \ (self.options.queue, package_dict.get('name'), len(package_dict.get('resources', [])))) data = json.dumps(package_dict) task_id = make_uuid() tasks.spatial_ingest.apply_async(args=[context, data], task_id=task_id, queue=self.options.queue)
def create_vector_storer_task(resource, extra_params = None): user = _get_site_user() resource_package_id = resource.as_dict()['package_id'] cont = {'package_id': resource_package_id, 'site_url': _get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'user': user.get('name'), 'db_params': config['ckan.datastore.write_url']} if extra_params: for key, value in extra_params.iteritems(): cont[key] = value context = json.dumps(cont) geoserver_context = _get_geoserver_context() data = json.dumps(resource_dictize(resource, {'model': model})) task_id = make_uuid() celery.send_task('vectorstorer.upload', args=[geoserver_context, context, data], task_id=task_id)
def create_identify_resource_task(resource): """ Creates the celery task to identify the resource :param resource: the resource to be identified """ task_id = make_uuid() # We are using resource_dictize() just to force CKAN to provide an absolute url # Note Maybe a more clean way to achive this would be to call something like # url_for(controller='package', action='resource_download', id=package_id, resource_id=resource_id) package_id = resource.as_dict()['package_id'] resource_dict = resource_dictize(resource, {'model': model}) resource_dict['package_id'] = package_id context = _make_default_context() context['resource_dict'] = resource_dict celery.send_task( 'rasterstorer.identify', args=[context], task_id=task_id ) res_identify = model.Session.query(ResourceIngest).filter( ResourceIngest.resource_id == resource.id).first() if res_identify: # This is when a user had previously rejected the ingestion workflow, # but now wants to re-identify the resource model.Session.delete(res_identify) new_res_identify = ResourceIngest( task_id, resource.id, ResourceStorerType.RASTER ) model.Session.add(new_res_identify) model.Session.commit() else: # A newly created/updated resource needs to be identified new_res_identify = ResourceIngest( task_id, resource.id, ResourceStorerType.RASTER ) model.Session.add(new_res_identify)
def create_ingest_resource(resource, layer_params): package_id = resource.as_dict()['package_id'] context = _make_default_context() context.update({ 'package_id': package_id, 'db_params': config['ckan.datastore.write_url'], 'layer_params': layer_params }) geoserver_context = _make_geoserver_context() resource_dict = resource_dictize(resource, {'model': model}) task_id = make_uuid() celery.send_task('vectorstorer.upload', args=[resource_dict, context, geoserver_context], task_id=task_id) res_ingest = model.Session.query(ResourceIngest).filter( ResourceIngest.resource_id == resource.id).first() res_ingest.status = IngestStatus.PUBLISHED res_ingest.celery_task_id = task_id model.Session.commit()
def _create_datastorer_task(self, resource): user = get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True }, {}) context = { 'site_url': self._get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), } if self.sample_size: context['sample_size'] = self.sample_size data = resource_dictize(resource, {'model': model}) task_id = make_uuid() datastorer_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'datastorer', 'key': u'celery_task_id', 'value': task_id, 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(archiver_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[json.dumps(context), json.dumps(data)], countdown=15, task_id=task_id) logger.info('Sent task: datastorer.upload id=%s context=%r' % (task_id, context))
def delete_vector_storer_task(resource, pkg_delete = False): user = _get_site_user() data = None resource_list_to_delete = None if (resource['format'] == settings.WMS_FORMAT or resource['format'] == settings.DB_TABLE_FORMAT) and resource.has_key('vectorstorer_resource'): data = json.dumps(resource) if pkg_delete: resource_list_to_delete = _get_child_resources(resource) else: data = json.dumps(resource) resource_list_to_delete = _get_child_resources(resource) context = json.dumps({'resource_list_to_delete': resource_list_to_delete, 'site_url': _get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'user': user.get('name'), 'db_params': config['ckan.datastore.write_url']}) geoserver_context = _get_geoserver_context() task_id = make_uuid() celery.send_task('vectorstorer.delete', args=[geoserver_context, context, data], task_id=task_id) if resource.has_key('vectorstorer_resource') and not pkg_delete: _delete_child_resources(resource)
def delete_ingest_resource(resource, pkg_delete=False): resource_dict = resource_dictize(resource, {'model': model}) resource_list_to_delete = None if ((resource_dict['format'] == WMSResource.FORMAT or resource_dict['format'] == DBTableResource.FORMAT) and 'vectorstorer_resource' in resource_dict): if pkg_delete: resource_list_to_delete = _get_child_resources(resource) else: resource_list_to_delete = _get_child_resources(resource) context = _make_default_context() context.update({ 'resource_list_to_delete': resource_list_to_delete, 'db_params': config['ckan.datastore.write_url'] }) geoserver_context = _make_geoserver_context() task_id = make_uuid() celery.send_task('vectorstorer.delete', args=[resource_dict, context, geoserver_context], task_id=task_id) if 'vectorstorer_resource' in resource and not pkg_delete: _delete_child_resources(resource)
def _create_helloresource_task(self, resource): ''' Create the context for the task to run ''' user = toolkit.get_action('get_site_user')({ 'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) context = { 'site_url': self._get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), } context['emulate_long_process'] = 5 # virtual steps context['emulate_retried_failure'] = True resource_dict = resource_dictize(resource, {'model': model}) task_id = make_uuid() celery.send_task( "helloresource.upload", args=[context, resource_dict], task_id=task_id, countdown=10) # The task is queued log1.info('Queued a helloresource.upload celery task (%s)' % (task_id)) # Provide a URL to poll for this task result result_url = toolkit.url_for( 'helloresource-task-result', task_id=task_id, qualified=True) h.flash_notice('A task has been created, see:\n%s' % (result_url))
def _create_datastorer_task(self, resource): user = get_action('get_site_user')({'model': model, 'ignore_auth': True, 'defer_commit': True}, {}) context = { 'site_url': self._get_site_url(), 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), } if self.sample_size: context['sample_size'] = self.sample_size data = resource_dictize(resource, {'model': model}) task_id = make_uuid() datastorer_task_status = { 'entity_id': resource.id, 'entity_type': u'resource', 'task_type': u'datastorer', 'key': u'celery_task_id', 'value': task_id, 'last_updated': datetime.now().isoformat() } archiver_task_context = { 'model': model, 'user': user.get('name'), } get_action('task_status_update')(archiver_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[json.dumps(context), json.dumps(data)], countdown=15, task_id=task_id) logger.info('Sent task: datastorer.upload id=%s context=%r' %(task_id, context))
def _create_task(self, dataset): site_user = t.get_action("get_site_user")({"model": model, "ignore_auth": True, "defer_commit": True}, {}) context = json.dumps( { "site_url": self.site_url, "site_user_apikey": site_user["apikey"], "spatial_datastore_jdbc_url": self.spatial_datastore_jdbc_url, "spatial_ingester_filepath": self.spatial_ingester_filepath, } ) dataset_dict = package_dictize(dataset, {"model": model}) data = json.dumps(dataset_dict) task_id = make_uuid() queue = "priority" send_task("os.spatial_ingest", args=[context, data], task_id=task_id, queue=queue) log.debug( "Spatial Ingest put into celery queue %s: %s site_user=%s site_url=%s", queue, dataset.name, site_user["name"], self.site_url, )
def command(self): """ Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ['--help', '-h', 'help']: print QACommand.__doc__ return cmd = self.args[0] self._load_config() # Now we can import ckan and create logger, knowing that loggers # won't get disabled self.log = logging.getLogger('ckanext.qa') from ckan import model from ckan.model.types import make_uuid # import tasks after load config so CKAN_CONFIG evironment variable # can be set import tasks user = p.toolkit.get_action('get_site_user')({ 'model': model, 'ignore_auth': True }, {}) context = json.dumps({ 'site_url': config['ckan.site_url'], 'apikey': user.get('apikey'), 'username': user.get('name'), }) if cmd == 'update': for package in self._package_list(): self.log.info( "QA on dataset being added to Celery queue: %s (%d resources)" % (package.get('name'), len(package.get('resources', [])))) for resource in package.get('resources', []): resource['package'] = package['name'] resource['position'] = 0 data = json.dumps(resource) task_id = make_uuid() task_status = { 'entity_id': resource['id'], 'entity_type': u'resource', 'task_type': u'qa', 'key': u'celery_task_id', 'value': task_id, 'error': u'', 'last_updated': datetime.datetime.now().isoformat() } task_context = {'model': model, 'user': user.get('name')} p.toolkit.get_action('task_status_update')(task_context, task_status) tasks.update.apply_async(args=[context, data], task_id=task_id) elif cmd == 'clean': self.log.error('Command "%s" not implemented' % (cmd, )) else: self.log.error('Command "%s" not recognized' % (cmd, ))
def set_api_key(): if asbool(config.get('ckan.auth.create_default_api_keys', False)): return _types.make_uuid() return None
def command(self): """ Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ['--help', '-h', 'help']: print Datastorer.__doc__ return cmd = self.args[0] self._load_config() #import after load config so CKAN_CONFIG evironment variable can be set from ckan.lib.celery_app import celery import tasks user = get_action('get_site_user')({'model': model, 'ignore_auth': True}, {}) context = { 'site_url': config['ckan.site_url'], 'apikey': user.get('apikey'), 'site_user_apikey': user.get('apikey'), 'username': user.get('name'), 'webstore_url': config.get('ckan.webstore_url') } if not config['ckan.site_url']: raise Exception('You have to set the "ckan.site_url" property in your ini file.') api_url = urlparse.urljoin(config['ckan.site_url'], 'api/action') if cmd in ('update', 'queue'): headers = { 'content-type:': 'application/json' } if len(self.args) == 2: response = requests.post(api_url + '/package_show', json.dumps({"id": self.args[1]}), headers=headers) if response.status_code == 200: packages = [json.loads(response.content).get('result')] elif response.status_code == 404: logger.error('Dataset %s not found' % self.args[1]) sys.exit(1) else: logger.error('Error getting dataset %s' % self.args[1]) sys.exit(1) else: packages = self._get_all_packages(api_url, headers) for package in packages: for resource in package.get('resources', []): data = json.dumps(resource, {'model': model}) # skip update if the datastore is already active (a table exists) if resource.get('datastore_active'): continue mimetype = resource['mimetype'] if mimetype and not(mimetype in tasks.DATA_FORMATS or resource['format'].lower() in tasks.DATA_FORMATS): logger.info(u'Skipping resource %s from package %s ' u'because MIME type %s and format %s are ' u'unrecognized' % (resource['url'], package['name'], mimetype, resource['format'])) continue logger.info(u'Datastore resource from resource %s from ' u'package %s' % (resource['url'], package['name'])) if cmd == "update": logger.setLevel(0) tasks._datastorer_upload(context, resource, logger) elif cmd == "queue": task_id = make_uuid() datastorer_task_status = { 'entity_id': resource['id'], 'entity_type': u'resource', 'task_type': u'datastorer', 'key': u'celery_task_id', 'value': task_id, 'last_updated': datetime.now().isoformat() } datastorer_task_context = { 'model': model, 'user': user.get('name') } get_action('task_status_update')(datastorer_task_context, datastorer_task_status) celery.send_task("datastorer.upload", args=[json.dumps(context), data], task_id=task_id) else: logger.error('Command %s not recognized' % (cmd,))
def create_qa_update_package_task(package, queue): from pylons import config import tasks task_id = '%s-%s' % (package.name, make_uuid()[:4]) ckan_ini_filepath = os.path.abspath(config.__file__) tasks.update_package(ckan_ini_filepath, package.id)
if not 'upload' in request.POST or not hasattr(request.POST['upload'], "filename"): h.flash_error( "No file was selected, please choose a file before uploading", allow_html=True) return h.redirect_to( controller= "ckanext.dgu.controllers.inventory:InventoryController", action="edit", id=c.group.name) incoming = request.POST['upload'].filename file_root = config.get('inventory.temporary.storage', '/tmp') filename = os.path.join(file_root, make_uuid()) + "-{0}".format(incoming) # Ensure the directory for exists, the uploaded filename may contain a path directory = os.path.dirname(filename) if not os.path.exists(directory): os.makedirs(directory) with inventory_lib.UploadFileHelper(incoming, request.POST['upload'].file) as f: open(filename, 'wb').write(f.read()) job_id, timestamp = inventory_lib.enqueue_document( c.userobj, filename, c.group) jobdict = json.loads(c.group.extras.get('inventory.jobs', '{}')) jobdict[job_id] = timestamp
def command(self): """ Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ["--help", "-h", "help"]: print self.usage return cmd = self.args[0] self._load_config() # Initialise logger after the config is loaded, so it is not disabled. self.log = logging.getLogger(__name__) # import after load config so CKAN_CONFIG evironment variable can be set import tasks user = get_action("get_site_user")({"model": model, "ignore_auth": True}, {}) context = json.dumps( { "site_url": config["ckan.site_url"], "apikey": user.get("apikey"), "username": user.get("name"), "cache_url_root": config.get("ckan.cache_url_root"), } ) api_url = urlparse.urljoin(config["ckan.site_url"], "api/action") if cmd == "update": if len(self.args) > 1: data = json.dumps({"id": unicode(self.args[1])}) response = requests.post(api_url + "/package_show", data) packages = [json.loads(response.content).get("result")] else: response = requests.post(api_url + "/current_package_list_with_resources", "{}") packages = json.loads(response.content).get("result") self.log.info("Number of datasets to archive: %d" % len(packages)) for package in packages: self.log.info( "Archiving dataset: %s (%d resources)" % (package.get("name"), len(package.get("resources", []))) ) for resource in package.get("resources", []): data = json.dumps(resource, {"model": model}) task_id = make_uuid() archiver_task_status = { "entity_id": resource["id"], "entity_type": u"resource", "task_type": u"archiver", "key": u"celery_task_id", "value": task_id, "error": u"", "last_updated": datetime.now().isoformat(), } archiver_task_context = {"model": model, "user": user.get("name")} get_action("task_status_update")(archiver_task_context, archiver_task_status) tasks.update.apply_async(args=[context, data], task_id=task_id) elif cmd == "clean": tasks.clean.delay() else: self.log.error("Command %s not recognized" % (cmd,))
abort(401, 'Unauthorized to read group %s' % id) try: context['group'] = c.group check_access('organization_update', context) except NotAuthorized, e: abort(401, 'User %r not authorized to upload inventory' % (c.user)) if not 'upload' in request.POST or not hasattr(request.POST['upload'], "filename"): h.flash_error("No file was selected, please choose a file before uploading", allow_html=True) return h.redirect_to( controller="ckanext.dgu.controllers.inventory:InventoryController", action="edit", id=c.group.name) incoming = request.POST['upload'].filename file_root = config.get('inventory.temporary.storage', '/tmp') filename = os.path.join(file_root, make_uuid()) + "-{0}".format(incoming) with inventory_lib.UploadFileHelper(incoming, request.POST['upload'].file) as f: open(filename, 'wb').write(f.read()) job_id, timestamp = inventory_lib.enqueue_document(c.userobj, filename, c.group) jobdict = json.loads(c.group.extras.get('inventory.jobs', '{}')) jobdict[job_id] = timestamp # Update the jobs list for this group # inventory.jobs will become a str when dictized, so serialize now. c.group.extras['inventory.jobs'] = json.dumps(jobdict) model.repo.new_revision() model.Session.add(c.group) model.Session.commit()
def set_api_key() -> Optional[str]: if config.get_value('ckan.auth.create_default_api_keys'): return _types.make_uuid() return None