def bulk_action(action, context, data_dicts, async): """ Carry out multiple invocations of an action, optionally asynchronously. :param action: name of the action function :param context: context to be passed to each action call (each gets its own copy of the context) :param data_dicts: list of dicts to be passed to each action call (as the data_dict) :param async: True to make the action calls asynchronously :returns: { total_count, error_count } :rtype: dict """ error_count = 0 for data_dict in data_dicts: if async: async_context = context.copy() del async_context['session'], async_context['model'] tk.enqueue_job(_call_action, [action, async_context, data_dict]) else: if not _call_action(action, context.copy(), data_dict): error_count += 1 return { 'total_count': len(data_dicts), 'error_count': error_count, }
def create_questionnaire_resource(self, context, pkg_dict, survey): """ Create a resource for the questionnaire """ from ckanext.unhcr.jobs import download_kobo_export # create empty resources to be updated later f = tempfile.NamedTemporaryFile() resource = { 'package_id': pkg_dict['id'], 'upload': FlaskFileStorage(filename=f.name, stream=open(f.name, 'rb')), 'name': 'Questionnaire XLS', 'description': 'Questionnaire imported from the KoBo survey', 'format': 'xls', 'url_type': 'upload', 'type': 'attachment', 'visibility': 'public', 'file_type': 'questionnaire', 'kobo_type': 'questionnaire', 'kobo_details': { 'kobo_asset_id': self.kobo_asset_id, 'kobo_download_status': 'pending', 'kobo_download_attempts': 0, 'kobo_last_updated': datetime.datetime.utcnow().isoformat() } } action = toolkit.get_action("resource_create") resource = action(context, resource) # Start a job to download the questionnaire toolkit.enqueue_job(download_kobo_export, [resource['id']], title='Download KoBoToolbox questionnaire') return resource
def enqueue_update_zip(dataset_name, dataset_id, operation): # skip task if the dataset is already queued queue = DEFAULT_QUEUE_NAME jobs = toolkit.get_action('job_list')({ 'ignore_auth': True }, { 'queues': [queue] }) if jobs: for job in jobs: if not job['title']: continue match = re.match(r'DownloadAll \w+ "[^"]*" ([\w-]+)', job[u'title']) if match: queued_dataset_id = match.groups()[0] if dataset_id == queued_dataset_id: log.info('Already queued dataset: {} {}'.format( dataset_name, dataset_id)) return # add this dataset to the queue log.debug(u'Queuing job update_zip: {} {}'.format(operation, dataset_name)) toolkit.enqueue_job(update_zip, [dataset_id], title=u'DownloadAll {} "{}" {}'.format( operation, dataset_name, dataset_id), queue=queue)
def after_create(self, context, resource): # a.s. send a msg to OCE distribution group recipient = {} email_context = context['package'] email_resource = email_context.resources[-1] email_dict = { "subject": u'AVIN Resource has been added', "body": u'New Resource has been created: ' + '\n\n' + \ u'--------------------------------' + '\n' + \ u'Package Name: ' + email_context.name + '\n' + \ u'Package Title: ' + email_context.title + '\n' + \ u'Package Author: ' + email_context.author + '\n' + \ u'Package Maintainer: ' + email_context.maintainer + \ '\n' + \ u'Package Notes: ' + email_context.notes + '\n' + \ u'--------------------------------' + '\n' + \ u'Resource Name: ' + email_resource.name + '\n' + \ u'Resource URL: ' + email_resource.url + '\n' + \ u'Resource Description: ' + email_resource.description + \ '\n' + \ u'--------------------------------' + '\n' } recipient['display_name'] = os.environ['oce_email_distribution_group'] recipient['email'] = os.environ['oce_email_distribution_group'] toolkit.enqueue_job(_mail_recipient, [recipient, email_dict]) log.info('create.py.resource_create: a.s. - email to OCE distribution ' 'group sent')
def issue_pid(self, resource_id, statement, projection, sort, q): now = datetime.now(pytz.UTC) fields = self._get_resource_fields(resource_id) schema = fields.find() if q: statement = transform_query_to_statement(q, schema) else: statement = transform_filter_to_statement(statement, schema) projection = transform_projection(projection, fields.find()) sort = transform_sort(sort) fields_metadata = [ field for field in fields.find() if field[u'id'] in projection.keys() ] query, meta_data = self.querystore.store_query( resource_id, { 'filter': statement, 'projection': projection, 'sort': sort }, str(now), None, HASH_ALGORITHM().name, fields_metadata) toolkit.enqueue_job(calculate_resultset_hash_job, [query.id], queue=self.queue_name) return query.id
def ckan_worker_job_monitor(): try: toolkit.enqueue_job(jobs.ckan_worker_job_monitor, title='CKAN job worker monitor') click.secho(u"CKAN job worker monitor added to worker queue", fg=u"green") except Exception as e: log.error(e)
def command(self): self._load_config() datasets = self._get_all_datasets() for dataset in datasets: if self._needs_check(dataset): toolkit.enqueue_job(check_dataset_resources_job, [dataset], title='Resource check for {}'.format( dataset['name'])) logger.info('Enqueued resource check jobs for {} resources'.format( len(datasets)))
def request_access(package_type, dataset_id): message = toolkit.request.form.get('message') context = {'model': model, 'user': toolkit.c.user} try: dataset = toolkit.get_action('package_show')(context.copy(), { 'id': dataset_id }) except toolkit.ObjectNotFound: return toolkit.abort(404, 'Dataset not found') except toolkit.NotAuthorized: return toolkit.abort(403, 'Not Authorized') if toolkit.h.can_download(dataset): toolkit.h.flash_notice( u'You already have access to download resources from {}'.format( dataset['title'])) return toolkit.redirect_to('dataset.read', id=dataset['id']) try: toolkit.get_action('access_request_create')(context.copy(), { 'object_id': dataset['id'], 'object_type': 'package', 'message': message, 'role': 'member', }) except toolkit.ObjectNotFound as e: return toolkit.abort(404, str(e)) except toolkit.NotAuthorized: return toolkit.abort(403, 'Not Authorized') except toolkit.ValidationError as e: if e.error_dict and 'message' in e.error_dict: return toolkit.abort( 400, e.error_dict['message'][0].replace('package', 'dataset')) return toolkit.abort(400, 'Bad Request') org_admins = mailer.get_dataset_request_access_email_recipients(dataset) for recipient in org_admins: subj = mailer.compose_dataset_request_access_email_subj(dataset) body = mailer.compose_request_access_email_body( 'dataset', recipient, dataset, toolkit.c.userobj, message, ) toolkit.enqueue_job(mailer.mail_user_by_id, [recipient['name'], subj, body]) toolkit.h.flash_success( u'Requested access to download resources from {}'.format( dataset['title'])) return toolkit.redirect_to('dataset.read', id=dataset['id'])
def request_access(container_id): message = toolkit.request.form.get('message') deposit = helpers.get_data_deposit() if container_id == deposit['id']: return toolkit.abort(403, 'Not Authorized') action_context = {'model': model, 'user': toolkit.c.user} try: container = toolkit.get_action('organization_show')(action_context, { 'id': container_id }) except toolkit.ObjectNotFound: return toolkit.abort(404, 'Dataset not found') except toolkit.NotAuthorized: return toolkit.abort(403, 'Not Authorized') if toolkit.c.userobj.id in [u['id'] for u in container['users']]: toolkit.h.flash_notice('You are already a member of {}'.format( container['display_name'])) return toolkit.redirect_to('data-container.read', id=container_id) try: toolkit.get_action('access_request_create')( action_context, { 'object_id': container['id'], 'object_type': 'organization', 'message': message, 'role': 'member', }) except toolkit.ObjectNotFound as e: return toolkit.abort(404, str(e)) except toolkit.NotAuthorized: return toolkit.abort(403, 'Not Authorized') except toolkit.ValidationError as e: if e.error_dict and 'message' in e.error_dict: return toolkit.abort( 400, e.error_dict['message'][0].replace('organization', 'container')) return toolkit.abort(400, 'Bad Request') org_admins = mailer.get_container_request_access_email_recipients( container) for recipient in org_admins: subj = mailer.compose_container_request_access_email_subj(container) body = mailer.compose_request_access_email_body( 'container', recipient, container, toolkit.c.userobj, message) toolkit.enqueue_job(mailer.mail_user_by_id, [recipient['name'], subj, body]) toolkit.h.flash_success('Requested access to container {}'.format( container['display_name'])) return toolkit.redirect_to('data-container.read', id=container_id)
def syndicate_dataset(package_id: str, topic: Topic, profile: Profile): """Enqueue syndication job. If you need realtime syndication, use `syndicate_sync` action. """ import ckanext.syndicate.tasks as tasks tk.enqueue_job( tasks.sync_package, [package_id, topic, profile], queue=tk.config.get(CONFIG_QUEUE_NAME, DEFAULT_QUEUE_NAME), )
def after_create(self, context, resource): """Generate condensed dataset""" if resource.get('mimetype') in DC_MIME_TYPES: pkg_job_id = f"{resource['package_id']}_{resource['position']}_" jid_condense = pkg_job_id + "condense" if not Job.exists(jid_condense, connection=ckan_redis_connect()): toolkit.enqueue_job(generate_condensed_resource_job, [resource], title="Create condensed dataset", queue="dcor-long", rq_kwargs={"timeout": 3600, "job_id": jid_condense})
def update_zip(dataset_ref, synchronous): u''' update-zip <package-name> Generates zip file for a dataset, downloading its resources.''' if synchronous: tasks.update_zip(dataset_ref) else: toolkit.enqueue_job( tasks.update_zip, [dataset_ref], title=u'DownloadAll {operation} "{name}" {id}'.format( operation='cli-requested', name=dataset_ref, id=dataset_ref), queue=DEFAULT_QUEUE_NAME) click.secho(u'update-zip: SUCCESS', fg=u'green', bold=True)
def compat_enqueue(name, fn, args=None): u''' Enqueue a background job using Celery or RQ. ''' try: # Try to use RQ from ckan.plugins.toolkit import enqueue_job enqueue_job(fn, args=args) except ImportError: # Fallback to Celery import uuid from ckan.lib.celery_app import celery celery.send_task(name, args=args, task_id=str(uuid.uuid4()))
def notify_mailchimp_subscribe_issue(email, error_message): recipient = { "display_name": os.environ['oce_email_distribution_group'], "email": os.environ['oce_email_distribution_group'] } email_dict = { "subject": "Mailchimp subscription error", "body": u"""Error adding/updating mailchimp subscription for email: %s %s """ % (email, error_message) } toolkit.enqueue_job(_mail_recipient, [recipient, email_dict])
def compat_enqueue(name, fn, queue, args=[], kwargs={}): u''' Enqueue a background job using Celery or RQ. ''' try: # Try to use RQ from ckan.plugins.toolkit import enqueue_job nice_name = name + " " + args[1] if (len(args) >= 2) else name enqueue_job(fn, args=args, kwargs=kwargs, queue=queue, title=nice_name) except ImportError: # Fallback to Celery import uuid from ckan.lib.celery_app import celery celery.send_task(name, args=args + [queue], task_id=six.text_type(uuid.uuid4()))
def compat_enqueue(name, fn, queue, args=None): u''' Enqueue a background job using Celery or RQ. ''' try: # Try to use RQ from ckan.plugins.toolkit import enqueue_job enqueue_job(fn, args=args, queue=queue) except ImportError: # Fallback to Celery import uuid from ckan.lib.celery_app import celery celery.send_task(name, args=args + [queue], task_id=str(uuid.uuid4()))
def send_notification_emails(users, template, extra_vars): """ Sets the email body and sends an email notification to each user in the list provided :param users: list of user email addresses to receive the notification :param template: string indicating which email template to use :param extra_vars: dict :return: """ if users: subject = render_jinja2('emails/subjects/{0}.txt'.format(template), extra_vars) body = render_jinja2('emails/bodies/{0}.txt'.format(template), extra_vars) for user in users: toolkit.enqueue_job(send_email, [user, subject, body], title=u'Comment Email')
def _after_create_or_update(self, context, resource): url = resource['url'] print('--Modified resource at ', {url}, ' from package ', context['package']) # Invoke each generator in a separate background job generator = MLDatacardGenerator(resource['package_id']) job = tk.enqueue_job(generator.generate, queue=tk._('datacard'))
def notify(self, entity, operation): u''' Send a notification on entity modification. :param entity: instance of module.Package. :param operation: 'new', 'changed' or 'deleted'. ''' if operation == 'deleted': return print('{} {} {}'.format(operation, type(entity), entity.name)) # add this dataset to the queue, if not already on there toolkit.enqueue_job(update_zip, [entity.id], title=u'DownloadAll {} "{}"'.format( operation, entity.name))
def enqueue_job(*args, **kwargs): """Enqueue an asynchronous job to RQ. """ try: return t.enqueue_job(*args, **kwargs) except AttributeError: from ckanext.rq.jobs import enqueue as enqueue_job_legacy return enqueue_job_legacy(*args, **kwargs)
def review_datasets(data_dict={}): site_user = get_action(u'get_site_user')({u'ignore_auth': True}, {}) context = {u'user': site_user[u'name']} # Action 'get_review_datasets' defaults to using config value 'ckanext.qdes_schema.dataset_review_period' or 12 months if not set review_datasets = get_action('get_review_datasets')(context, data_dict) contact_points = {} for review_dataset in review_datasets: contact_point = review_dataset.get('contact_point', None) datasets = contact_points.get(contact_point, []) title = review_dataset.get('title') name = review_dataset.get('name') url = toolkit.url_for('{}.read'.format(review_dataset.get( 'type', None)), id=name, _external=True) dataset = {'title': title, 'url': url} # Only add dataset if it does not already exist in datasets list datasets.append(dataset) if dataset not in datasets else datasets contact_points[contact_point] = datasets for contact_point in contact_points: datasets = contact_points[contact_point] # Only email contact point if there are datasets if len(datasets) > 0: contact_point_data = get_action('get_secure_vocabulary_record')( context, { 'vocabulary_name': 'point-of-contact', 'query': contact_point }) if contact_point_data: recipient_name = contact_point_data.get('Name', '') recipient_email = contact_point_data.get('Email', '') subject = render('emails/subject/review_datasets.txt') body = render('emails/body/review_datasets.txt', {'datasets': datasets}) body_html = render('emails/body/review_datasets.html', {'datasets': datasets}) # Improvements for job worker visibility when troubleshooting via logs job_title = f'Review datasets: Sending email to {recipient_name}' toolkit.enqueue_job(toolkit.mail_recipient, [ recipient_name, recipient_email, subject, body, body_html ], title=job_title)
def after_create(self, context, resource): # pprint.pprint(context) # pprint.pprint(resource) os.system("pwd") os.system("whoami") sql_file_name = resource["name"] sql_file_url = resource["url"] resource_id = resource["id"] package_id = resource["package_id"] # pprint.pprint(f"----------------------------------------------------------------------------------") # pprint.pprint(f"{resource}") # pprint.pprint(f"----------------------------------------------------------------------------------") pprint.pprint(f"{resource_id}") pprint.pprint(f"{sql_file_name}") pprint.pprint(f"{sql_file_url}") toolkit.enqueue_job( convert_data, [resource_id, sql_file_name, sql_file_url, package_id])
def notify_renewal_request(user_id, message, recipient_ids): """ A user require to renew their account. Ask all related related users to validate this request """ user = model.User.get(user_id) subj = 'Renewal request for {}'.format(user.name) context = {'requesting_user': user, 'message': message, 'h': toolkit.h} if recipient_ids == []: recipient_ids = [sysadmin['id'] for sysadmin in _get_sysadmins()] for recipient_id in recipient_ids: recipient = model.User.get(recipient_id) context['recipient'] = recipient body = render_jinja2( 'emails/access_requests/access_renewal_request.html', context) toolkit.enqueue_job(mail_user_by_id, [recipient_id, subj, body], title="notify_renewal_request") return recipient_ids
def queue(function, request): ''' Generic queueing function which ensures our special queue is setup first. :param function: the function to queue :param request: the queue request object :return: the queued job ''' ensure_importing_queue_exists() return toolkit.enqueue_job(function, args=[request], queue=u'importing', title=unicode(request))
def enqueue_survey_update(): """ Check for updates and if new submissions were found, starts a job to update. Return an API dict response """ user_obj = toolkit.c.userobj kobo_asset_id = toolkit.request.form.get('kobo_asset_id') force = toolkit.asbool(toolkit.request.form.get('force', False)) if not kobo_asset_id: message = 'Missing KoBoToolbox asset ID.' return _make_json_response(status_int=404, error_msg=message) kd = KoboDataset(kobo_asset_id) try: old_submission_count = kd.get_submission_count() except KoboMissingAssetIdError: message = 'Dataset not found for this KoBoToolbox asset ID.' return _make_json_response(status_int=404, error_msg=message) # check if an update is "pending" (but not stalled) if kd.get_import_status() == 'pending': message = 'There is a pending update for this survey.' return _make_json_response(status_int=400, error_msg=message) if force: extra_data = {'forced': True} run_job = True else: # check if there are new submissions try: kobo_api = kd.get_kobo_api(user_obj) except KoBoUserTokenMissingError: profile_url = toolkit.url_for('unhcr_kobo.index') profile_link = '<a href="{}">user profile page</a>'.format( profile_url) message = 'Missing API token. Please provide a valid KoBo Toolbox API token on your {}'.format( profile_link) return _make_json_response(status_int=403, error_msg=message) survey = KoBoSurvey(kobo_asset_id, kobo_api) new_submission_count = survey.get_total_submissions() new_submissions = new_submission_count - old_submission_count extra_data = {'new_submissions': new_submissions, 'forced': False} run_job = new_submissions > 0 if new_submissions == 0: message = "There are no new submissions" if run_job: job = toolkit.enqueue_job(update_pkg_kobo_resources, [kobo_asset_id, user_obj.id], title='Enqueue survey update') message = "Job started {}".format(job.id), return _make_json_response(msg=message, extra_data=extra_data)
def notify(self, entity, operation=None): context = {'model': model, 'ignore_auth': True, 'defer_commit': True} if isinstance(entity, model.Resource): if not operation: #This happens on IResourceURLChange, but I'm not sure whether #to make this into a webhook. return elif operation == DomainObjectOperation.new: topic = 'resource/create' if operation == DomainObjectOperation.changed: topic = 'resource/update' elif operation == DomainObjectOperation.deleted: topic = 'resource/delete' else: return if isinstance(entity, model.Package): if operation == DomainObjectOperation.new: topic = 'dataset/create' elif operation == DomainObjectOperation.changed: topic = 'dataset/update' elif operation == DomainObjectOperation.deleted: topic = 'dataset/delete' else: return webhooks = db.Webhook.find(topic=topic) for hook in webhooks: resource = table_dictize(entity, context) webhook = table_dictize(hook, context) toolkit.enqueue_job( tasks.notify_hooks, [resource, webhook, config.get('ckan.site_url')])
def after_create(self, context, data_dict): if not context.get('job'): toolkit.enqueue_job(jobs.process_dataset_on_create, [data_dict['id']]) if data_dict.get('type') == 'deposited-dataset': user_id = None if context.get('auth_user_obj'): user_id = context['auth_user_obj'].id elif context.get('user'): user = toolkit.get_action('user_show')({ 'ignore_auth': True }, { 'id': context['user'] }) user_id = user['id'] if user_id: helpers.create_curation_activity('dataset_deposited', data_dict['id'], data_dict['name'], user_id)
def submit(package_type, dataset_id): user_id = getattr(toolkit.c.userobj, 'id', None) # Get curation data try: context, dataset, curation = _get_curation_data(dataset_id, user_id) except (toolkit.ObjectNotFound, toolkit.NotAuthorized): message = 'Not authorized to read dataset "%s"' return toolkit.abort(403, message % dataset_id) # Authorize context if 'submit' not in curation['actions']: message = 'This action is not available for the dataset "%s"' toolkit.h.flash_error(message % dataset['title']) return toolkit.redirect_to('deposited-dataset.read', id=dataset['name']) context['ignore_auth'] = True # Update dataset dataset['curation_state'] = 'submitted' dataset = toolkit.get_action('package_update')(context, dataset) # Update activity stream message = toolkit.request.form.get('message') create_curation_activity('dataset_submitted', dataset['id'], dataset['name'], user_id, message=message) # Send notification email message = toolkit.request.form.get('message') curator = curation['contacts']['curator'] # We don't bother all curators if someone is already assigned users = [curator] if curator else helpers.get_data_curation_users(dataset) for user in users: subj = mailer.compose_curation_email_subj(dataset) body = mailer.compose_curation_email_body( dataset, curation, user['display_name'], 'submit', message=message) toolkit.enqueue_job(mailer.mail_user_by_id, [user['id'], subj, body]) # Show flash message and redirect message = 'Dataset "%s" submitted' toolkit.h.flash_success(message % dataset['title']) return toolkit.redirect_to('deposited-dataset.read', id=dataset['name'])
def queue_download(email_address, download_id, query_hash, query, query_version, search, resource_ids_and_versions, separate_files, file_format, ignore_empty_fields): ''' Queues a job which when run will download the data for the resource. :return: the queued job ''' ensure_download_queue_exists() request = DownloadRequest(email_address, download_id, query_hash, query, query_version, search, resource_ids_and_versions, separate_files, file_format, ignore_empty_fields) return toolkit.enqueue_job(download, args=[request], queue=u'download', title=unicode(request))
def update_all_zips(synchronous): u''' update-all-zips <package-name> Generates zip file for all datasets. It is done synchronously.''' context = {'model': model, 'session': model.Session} datasets = toolkit.get_action('package_list')(context, {}) for i, dataset_name in enumerate(datasets): if synchronous: print('Processing dataset {}/{}'.format(i + 1, len(datasets))) tasks.update_zip(dataset_name) else: print('Queuing dataset {}/{}'.format(i + 1, len(datasets))) toolkit.enqueue_job( tasks.update_zip, [dataset_name], title=u'DownloadAll {operation} "{name}" {id}'.format( operation='cli-requested', name=dataset_name, id=dataset_name), queue=DEFAULT_QUEUE_NAME) click.secho(u'update-all-zips: SUCCESS', fg=u'green', bold=True)
def create(self, entity): # a.s. send a msg to OCE distribution group recipient = {} email_dict = { "subject": u'AVIN Dataset has been added', "body": u'New Dataset has been created: ' + '\n\n' + u'--------------------------------' + '\n' + u'Package Name: ' + entity.name + '\n' + u'Package Title: ' + entity.title + '\n' + u'Package Author: ' + entity.author + '\n' + u'Package Maintainer: ' + entity.maintainer + '\n' + u'Package Notes: ' + entity.notes + '\n' + u'--------------------------------' + '\n' } recipient['display_name'] = os.environ['oce_email_distribution_group'] recipient['email'] = os.environ['oce_email_distribution_group'] toolkit.enqueue_job(_mail_recipient, [recipient, email_dict]) log.info('create.py.package_create: a.s. - email to OCE distribution ' 'group sent - s.h. moved to plugin')