def before_update(self, context, current_resource, updated_resource): updated_resource = self._process_schema_fields(updated_resource) if not get_update_mode_from_config() == u'async': return updated_resource needs_validation = False if (( # New file uploaded updated_resource.get(u'upload') or # External URL changed updated_resource.get(u'url') != current_resource.get(u'url') or # Schema changed (updated_resource.get(u'schema') != current_resource.get(u'schema') ) or # Format changed (updated_resource.get(u'format', u'').lower() != current_resource.get(u'format', u'').lower())) and ( # Make sure format is supported updated_resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS)): needs_validation = True if needs_validation: self.resources_to_validate[updated_resource[u'id']] = True return updated_resource
def after_update(self, context, data_dict): if not get_update_mode_from_config() == u'async': return if context.get('_validation_performed'): # Ugly, but needed to avoid circular loops caused by the # validation job calling resource_patch (which calls # package_update) del context['_validation_performed'] return if data_dict.get(u'resources'): # This is a dataset for resource in data_dict[u'resources']: if resource[u'id'] in self.resources_to_validate: # This is part of a resource_update call, it will be # handled on the next `after_update` call continue else: # This is an actual package_update call, validate the # resources if necessary self._handle_validation_for_resource(resource) else: # This is a resource resource_id = data_dict[u'id'] if resource_id in self.resources_to_validate: del self.resources_to_validate[resource_id] _run_async_validation(resource_id)
def after_update(self, context, updated_resource): if not get_update_mode_from_config() == u'async': return resource_id = updated_resource[u'id'] if resource_id in self.resources_to_validate: del self.resources_to_validate[resource_id] _run_async_validation(resource_id)
def get_actions(self): new_actions = { u'resource_validation_run': resource_validation_run, u'resource_validation_show': resource_validation_show, u'resource_validation_delete': resource_validation_delete, } if get_create_mode_from_config() == u'sync': new_actions[u'resource_create'] = custom_resource_create if get_update_mode_from_config() == u'sync': new_actions[u'resource_update'] = custom_resource_update return new_actions
def after_update(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) # Need to allow create as well because resource_create calls # package_update if (not get_update_mode_from_config() == u'async' and not get_create_mode_from_config() == u'async'): return if context.pop('_validation_performed', None): # Ugly, but needed to avoid circular loops caused by the # validation job calling resource_patch (which calls # package_update) return if is_dataset: package_id = data_dict.get('id') if self.packages_to_skip.pop(package_id, None) or context.get( 'save', False): # Either we're updating an individual resource, # or we're updating the package metadata via the web form; # in both cases, we don't need to validate every resource. return for resource in data_dict.get(u'resources', []): if resource[u'id'] in self.resources_to_validate: # This is part of a resource_update call, it will be # handled on the next `after_update` call continue else: # This is an actual package_update call, validate the # resources if necessary self._handle_validation_for_resource(context, resource) else: # This is a resource resource_id = data_dict[u'id'] if resource_id in self.resources_to_validate: for plugin in p.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource %s', data_dict['id']) return del self.resources_to_validate[resource_id] _run_async_validation(resource_id)
def before_update(self, context, current_resource, updated_resource): updated_resource = self._process_schema_fields(updated_resource) # the call originates from a resource API, so don't validate the entire package package_id = updated_resource.get('package_id') if not package_id: existing_resource = t.get_action('resource_show')( context={ 'ignore_auth': True }, data_dict={ 'id': updated_resource['id'] }) if existing_resource: package_id = existing_resource['package_id'] self.packages_to_skip[package_id] = True if not get_update_mode_from_config() == u'async': return updated_resource needs_validation = False if ( # New file uploaded updated_resource.get(u'upload') # External URL changed or updated_resource.get(u'url') != current_resource.get(u'url') # Schema changed or (updated_resource.get(u'schema') != current_resource.get(u'schema')) # Format changed or (updated_resource.get(u'format', u'').lower() != current_resource.get(u'format', u'').lower())) and ( # Make sure format is supported updated_resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS): needs_validation = True if needs_validation: self.resources_to_validate[updated_resource[u'id']] = True return updated_resource
def after_update(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) # Need to allow create as well because resource_create calls # package_update if (not get_update_mode_from_config() == u'async' and not get_create_mode_from_config() == u'async'): return if context.get('_validation_performed'): # Ugly, but needed to avoid circular loops caused by the # validation job calling resource_patch (which calls # package_update) del context['_validation_performed'] return if is_dataset: for resource in data_dict.get(u'resources', []): if resource[u'id'] in self.resources_to_validate: # This is part of a resource_update call, it will be # handled on the next `after_update` call continue else: # This is an actual package_update call, validate the # resources if necessary self._handle_validation_for_resource(context, resource) else: # This is a resource resource_id = data_dict[u'id'] if resource_id in self.resources_to_validate: for plugin in p.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource {}'.format( data_dict['id'])) return del self.resources_to_validate[resource_id] _run_async_validation(resource_id)
def after_update(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) # Need to allow create as well because resource_create calls # package_update if (not get_update_mode_from_config() == u'async' and not get_create_mode_from_config() == u'async'): return if context.get('_validation_performed'): # Ugly, but needed to avoid circular loops caused by the # validation job calling resource_patch (which calls # package_update) del context['_validation_performed'] return if not is_dataset: if context.get('_dont_validate'): # Ugly, but needed to avoid circular loops caused by the # validation job calling resource_patch (which calls # package_update) del context['_dont_validate'] return # This is a resource resource_id = data_dict[u'id'] if resource_id in self.resources_to_validate: for plugin in p.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource {}'.format( data_dict['id'])) return del self.resources_to_validate[resource_id] _run_async_validation(resource_id) if data_dict.get('validate_package'): t.get_action('resource_validation_run_batch')( context, { 'dataset_ids': data_dict.get('package_id') })
def test_config_both_false(self): assert_equals(get_update_mode_from_config(), None) assert_equals(get_create_mode_from_config(), None)
def test_config_update_false_async(self): assert_equals(get_update_mode_from_config(), None)
def test_config_update_true_async(self): assert_equals(get_update_mode_from_config(), 'async')
def test_config_defaults(self): assert_equals(get_update_mode_from_config(), 'async') assert_equals(get_create_mode_from_config(), 'async')
def resource_update(context, data_dict): '''Update a resource. This is duplicate of the CKAN core resource_update action, with just the addition of a synchronous data validation step. This is of course not ideal but it's the only way right now to hook reliably into the creation process without overcomplicating things. Hopefully future versions of CKAN will incorporate more flexible hook points that will allow a better approach. ''' model = context['model'] id = t.get_or_bust(data_dict, "id") if not data_dict.get('url'): data_dict['url'] = '' resource = model.Resource.get(id) context["resource"] = resource old_resource_format = resource.format if not resource: log.debug('Could not find resource %s', id) raise t.ObjectNotFound(t._('Resource was not found.')) t.check_access('resource_update', context, data_dict) del context["resource"] package_id = resource.package.id pkg_dict = t.get_action('package_show')(dict(context, return_type='dict'), {'id': package_id}) for n, p in enumerate(pkg_dict['resources']): if p['id'] == id: break else: log.error('Could not find resource %s after all', id) raise t.ObjectNotFound(t._('Resource was not found.')) # Persist the datastore_active extra if already present and not provided if ('datastore_active' in resource.extras and 'datastore_active' not in data_dict): data_dict['datastore_active'] = resource.extras['datastore_active'] for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_update(context, pkg_dict['resources'][n], data_dict) upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: if hasattr(upload, 'mimetype'): data_dict['mimetype'] = upload.mimetype if 'size' not in data_dict and 'url_type' in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize pkg_dict['resources'][n] = data_dict try: context['defer_commit'] = True context['use_cache'] = False updated_pkg_dict = t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError as e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict) upload.upload(id, uploader.get_max_resource_size()) # Custom code starts if get_update_mode_from_config() == u'sync': run_validation = True for plugin in plugins.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource %s', id) run_validation = False if run_validation: is_local_upload = ( hasattr(upload, 'filename') and upload.filename is not None and isinstance(upload, uploader.ResourceUpload)) _run_sync_validation( id, local_upload=is_local_upload, new_resource=True) # Custom code ends model.repo.commit() resource = t.get_action('resource_show')(context, {'id': id}) if old_resource_format != resource['format']: t.get_action('resource_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': updated_pkg_dict, 'resource': resource}) for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.after_update(context, resource) return resource
def test_config_update_true_async(self): assert get_update_mode_from_config() == 'async'
def test_config_both_false(self): assert get_update_mode_from_config() is None assert get_create_mode_from_config() is None
def test_config_update_false_async(self): assert get_update_mode_from_config() is None
try: context['defer_commit'] = True context['use_cache'] = False updated_pkg_dict = t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError, e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict) upload.upload(id, uploader.get_max_resource_size()) # Custom code starts if get_update_mode_from_config() == u'sync': is_upload = (hasattr(upload, 'filename') and upload.filename is not None) _run_sync_validation(id, upload=is_upload) # Custom code ends model.repo.commit() resource = t.get_action('resource_show')(context, {'id': id}) if old_resource_format != resource['format']: t.get_action('resource_create_default_resource_views')( { 'model': context['model'], 'user': context['user'],
def test_config_defaults(self): assert get_update_mode_from_config() == 'async' assert get_create_mode_from_config() == 'async'