def render_descriptor(data): """Render data descriptor. The rendering is based on descriptor schema and input context. :param data: data instance :type data: :class:`resolwe.flow.models.Data` or :class:`dict` """ if not data.descriptor_schema: return inputs = copy.deepcopy(data.input) if data.process.input_schema: hydrate_input_references(inputs, data.process.input_schema, hydrate_values=False) template_context = inputs # Set default values for field_schema, field, path in iterate_schema( data.descriptor, data.descriptor_schema.schema, 'descriptor'): if 'default' in field_schema and field_schema['name'] not in field: tmpl = field_schema['default'] if field_schema['type'].startswith('list:'): tmpl = [ render_template(data.process, tmp, template_context) if isinstance(tmp, six.string_types) else tmp for tmp in tmpl ] elif isinstance(tmpl, six.string_types): tmpl = render_template(data.process, tmpl, template_context) dict_dot(data, path, tmpl)
def handle_update_model_fields( self, message: Message[Tuple[str, int, Dict[str, Any]]], manager: "Processor" ) -> Response[str]: """Update the value for the given fields. The received message format is (app_name, model name, model primary key, names -> values). Field name can be given in dot notation for JSON fields. :raises RuntimeError: if user has no permissions to modify the object. """ app_name, model_name, model_pk, mapping = message.message_data full_model_name = f"{app_name}.{model_name}" # The most common request is for the data object we are processing. # Avoid hitting the database in such case. if full_model_name == "flow.Data" and model_pk == manager.data_id: model_instance = manager.data model = Data else: model = apps.get_model(app_name, model_name) model_instance = model.objects.filter(pk=model_pk).get() self._permission_manager.can_update( manager.contributor, full_model_name, model_instance, mapping, manager.data ) # Update all fields except m2m. update_fields = [] for field_name, field_value in mapping.items(): # Not exactly sure how to handle this. Output is a JSONField and is # only updated, other JSON fields should probably be replaced. # Compromise: when update is a dict, then only values in dict are # updates, else replaced. if isinstance(model._meta.get_field(field_name), JSONField) and isinstance( field_value, dict ): update_fields.append(field_name) current_value = getattr(model_instance, field_name) for key, value in field_value.items(): dict_dot(current_value, key, value) elif isinstance(model._meta.get_field(field_name), ManyToManyField): assert isinstance( field_value, list ), "Only lists may be assigned to many-to-many relations" field = getattr(model_instance, field_name) field_value_set = set(field_value) current_objects = set(field.all().values_list("pk", flat=True)) objects_to_add = field_value_set - current_objects objects_to_remove = current_objects - field_value_set if objects_to_remove: field.remove(*objects_to_remove) if objects_to_add: field.add(*objects_to_add) else: update_fields.append(field_name) setattr(model_instance, field_name, field_value) model_instance.save(update_fields=update_fields) return message.respond_ok("OK")
def fill_with_defaults(process_input, input_schema): """Fill empty optional fields in input with default values.""" for field_schema, fields, path in iterate_schema( process_input, input_schema, include_groups=True ): if "group" in field_schema and field_schema["name"] not in fields: dict_dot(process_input, path, {}) if "default" in field_schema and field_schema["name"] not in fields: dict_dot(process_input, path, field_schema["default"])
def process_object(self, obj): """Process current object and push it to the ElasticSearch.""" document = self.document_class(meta={'id': self.generate_id(obj)}) # pylint: disable=not-callable for field in document._doc_type.mapping: # pylint: disable=protected-access if field in ['users_with_permissions', 'groups_with_permissions', 'public_permission']: continue # These fields are handled separately try: # use get_X_value function get_value_function = getattr(self, 'get_{}_value'.format(field), None) if get_value_function: setattr(document, field, get_value_function(obj)) # pylint: disable=not-callable continue # use `mapping` dict if field in self.mapping: if callable(self.mapping[field]): setattr(document, field, self.mapping[field](obj)) continue try: object_attr = dict_dot(obj, self.mapping[field]) except (KeyError, AttributeError): object_attr = None if callable(object_attr): # use method on object setattr(document, field, object_attr(obj)) else: # use attribute on object setattr(document, field, object_attr) continue # get value from the object try: object_value = dict_dot(obj, field) setattr(document, field, object_value) continue except KeyError: pass raise AttributeError("Cannot determine mapping for field {}".format(field)) except: # noqa pylint: disable=bare-except logger.exception( "Error occurred while setting value of field '%s' in '%s' Elasticsearch index.", field, self.__class__.__name__, extra={'object_type': self.object_type, 'obj_id': obj.pk} ) permissions = self.get_permissions(obj) document.users_with_permissions = permissions['users'] document.groups_with_permissions = permissions['groups'] document.public_permission = permissions['public'] self.push_queue.append(document)
def process_object(self, obj): """Process current object and push it to the ElasticSearch.""" document = self.document_class(meta={'id': self.generate_id(obj)}) for field in document._doc_type.mapping: # pylint: disable=protected-access if field in ['users_with_permissions', 'groups_with_permissions', 'public_permission']: continue # These fields are handled separately try: # use get_X_value function get_value_function = getattr(self, 'get_{}_value'.format(field), None) if get_value_function: setattr(document, field, get_value_function(obj)) # pylint: disable=not-callable continue # use `mapping` dict if field in self.mapping: if callable(self.mapping[field]): setattr(document, field, self.mapping[field](obj)) continue try: object_attr = dict_dot(obj, self.mapping[field]) except (KeyError, AttributeError): object_attr = None if callable(object_attr): # use method on object setattr(document, field, object_attr(obj)) else: # use attribute on object setattr(document, field, object_attr) continue # get value from the object try: object_value = dict_dot(obj, field) setattr(document, field, object_value) continue except KeyError: pass raise AttributeError("Cannot determine mapping for field {}".format(field)) except Exception: # pylint: disable=broad-except logger.exception( "Error occurred while setting value of field '%s' in '%s' Elasticsearch index.", field, self.__class__.__name__, extra={'object_type': self.object_type, 'obj_id': obj.pk} ) permissions = self.get_permissions(obj) document.users_with_permissions = permissions['users'] document.groups_with_permissions = permissions['groups'] document.public_permission = permissions['public'] self.push_queue.append(document)
def handle_update_output(self, message: Message[Dict[str, Any]], manager: "Processor") -> Response[str]: """Update data output.""" for key, val in message.message_data.items(): if key not in manager.storage_fields: dict_dot(manager.data.output, key, val) else: manager.save_storage(key, val) with transaction.atomic(): manager._update_data({"output": manager.data.output}) return message.respond_ok("OK")
def process_object(self, obj, push=True): """Process current object and push it to the ElasticSearch.""" document = self.document_class(meta={'id': self.generate_id(obj)}) # pylint: disable=not-callable for field in document._doc_type.mapping: # pylint: disable=protected-access if field in ['users_with_permissions', 'groups_with_permissions']: continue # These fields are handled separately # use get_X_value function get_value_function = getattr(self, 'get_{}_value'.format(field), None) if get_value_function: setattr(document, field, get_value_function(obj)) continue # use `mapping` dict if field in self.mapping: if callable(self.mapping[field]): setattr(document, field, self.mapping[field](obj)) continue try: object_attr = dict_dot(obj, self.mapping[field]) except (KeyError, AttributeError): object_attr = None if callable(object_attr): # use method on object setattr(document, field, object_attr(obj)) else: # use attribute on object setattr(document, field, object_attr) continue # get value from the object try: object_value = dict_dot(obj, field) setattr(document, field, object_value) continue except KeyError: pass raise AttributeError('Cannot determine mapping for field {}'.format(field)) permissions = self.get_permissions(obj) document.users_with_permissions = permissions['users'] document.groups_with_permissions = permissions['groups'] document.public_permission = permissions['public'] if push: document.save(refresh=True) else: self.push_queue.append(document)
def handle_annotate(self, message: Message[dict], manager: "Processor") -> Response[str]: """Handle an incoming ``Data`` object annotate request.""" if manager.data.entity is None: raise RuntimeError( f"No entity to annotate for process '{manager.data.process.slug}'" ) for key, val in message.message_data.items(): dict_dot(manager.data.entity.descriptor, key, val) manager.data.entity.save() return message.respond_ok("OK")
def render_descriptor(data): """Render data descriptor. The rendering is based on descriptor schema and input context. :param data: data instance :type data: :class:`resolwe.flow.models.Data` or :class:`dict` """ if not data.descriptor_schema: return # Set default values for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'): if 'default' in field_schema and field_schema['name'] not in field: dict_dot(data, path, field_schema['default'])
def migrate_process_schema(self, process, schema, from_state): """Migrate process schema. :param process: Process instance :param schema: Process schema to migrate :param from_state: Database model state :return: True if the process was migrated, False otherwise """ container = dict_dot(schema, ".".join(self.field[:-1]), default=list) # Ignore processes, which already contain the target field with the # target schema. for field in container: if field["name"] == self.field[-1]: if field == self.schema: return False else: raise ValueError( "Failed to migrate schema for process '{process}' as the field '{field}' " "already exists and has an incompatible schema".format( process=process.slug, field=self.field[-1] ) ) # Add field to container. container.append(self.schema) return True
def assertFiles(self, obj, field_path, fn_list, **kwargs): # pylint: disable=invalid-name """Compare a process's output file to the given correct file. :param obj: object which includes the files to compare :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the list of file names :param list fn_list: list of file names (and relative paths) of files to compare against. Paths should be relative to the ``tests/files`` directory of a Django application. :param str compression: if not ``None``, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are *gzip* and *zip*. :param filter: Function for filtering the contents of output files. It is used in :obj:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns ``True``, the line is excluded from comparison of the two files. :type filter: ~types.FunctionType """ field = dict_dot(obj.output, field_path) if len(field) != len(fn_list): self.fail(msg="Lengths of list:basic:file field and files list are not equal.") for fn_tested, fn_correct in zip(field, fn_list): self._assert_file(obj, fn_tested['file'], fn_correct, **kwargs)
def migrate_process_schema(self, process, schema, from_state): """Migrate process schema. :param process: Process instance :param schema: Process schema to migrate :param from_state: Database model state :return: True if the process was migrated, False otherwise """ container = dict_dot(schema, '.'.join(self.field[:-1]), default=list) # Ignore processes, which already contain the target field with the # target schema. for field in container: if field['name'] == self.field[-1]: if field == self.schema: return False else: raise ValueError( "Failed to migrate schema for process '{process}' as the field '{field}' " "already exists and has an incompatible schema".format( process=process.slug, field=self.field[-1] ) ) # Add field to container. container.append(self.schema) return True
def migrate_process_schema(self, process, schema, from_state): """Migrate process schema. :param process: Process instance :param schema: Process schema to migrate :param from_state: Database model state :return: True if the process was migrated, False otherwise """ container = dict_dot(schema, '.'.join(self.field[:-1]), default=list) # Ignore processes, which already contain the target field. migrate = False for field in container: if field['name'] == self.field[-1]: field['name'] = self.new_field migrate = True break elif field['name'] == self.new_field: # Already has target field. migrate = False break else: if not self.skip_no_field: raise ValueError( "Unable to rename: there is no field with name '{field}' or '{new_field}'.".format( field=self.field[-1], new_field=self.new_field, ) ) return migrate
def render_descriptor(data): """Render data descriptor. The rendering is based on descriptor schema and input context. :param data: data instance :type data: :class:`resolwe.flow.models.Data` or :class:`dict` """ if not data.descriptor_schema: return # Set default values for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'): if 'default' in field_schema and field_schema['name'] not in field: dict_dot(data, path, field_schema['default'])
def migrate_process_schema(self, process, schema, from_state): """Migrate process schema. :param process: Process instance :param schema: Process schema to migrate :param from_state: Database model state :return: True if the process was migrated, False otherwise """ container = dict_dot(schema, ".".join(self.field[:-1]), default=list) # Ignore processes, which already contain the target field. migrate = False for field in container: if field["name"] == self.field[-1]: field["name"] = self.new_field migrate = True break elif field["name"] == self.new_field: # Already has target field. migrate = False break else: if not self.skip_no_field: raise ValueError( "Unable to rename: there is no field with name '{field}' or '{new_field}'.".format( field=self.field[-1], new_field=self.new_field, ) ) return migrate
def assertFile(self, obj, field_path, fn, **kwargs): """Compare a process's output file to the given correct file. :param obj: object that includes the file to compare :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name :param str fn: file name (and relative path) of the correct file to compare against. Path should be relative to the ``tests/files`` directory of a Django application. :param str compression: if not ``None``, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are *gzip* and *zip*. :param filter: function for filtering the contents of output files. It is used in :func:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns ``True``, the line is excluded from comparison of the two files. :type filter: ~types.FunctionType :param bool sort: if set to ``True``, basic sort will be performed on file contents before computing hash value. """ field = dict_dot(obj.output, field_path) self._assert_file(obj, field["file"], fn, **kwargs)
def assertFile(self, obj, field_path, fn, **kwargs): # pylint: disable=invalid-name """Compare a process's output file to the given correct file. :param obj: object that includes the file to compare :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name :param str fn: file name (and relative path) of the correct file to compare against. Path should be relative to the ``tests/files`` directory of a Django application. :param str compression: if not ``None``, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are *gzip* and *zip*. :param filter: function for filtering the contents of output files. It is used in :func:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns ``True``, the line is excluded from comparison of the two files. :type filter: ~types.FunctionType :param bool sort: if set to ``True``, basic sort will be performed on file contents before computing hash value. """ field = dict_dot(obj.output, field_path) self._assert_file(obj, field['file'], fn, **kwargs)
def assertDirStructure(self, obj, field_path, dir_struct, exact=True): """Assert correct tree structure in output field of given object. Only names of directories and files are asserted. Content of files is not compared. :param obj: object that includes the directory to compare :type obj: ~resolwe.flow.models.Data :param str dir_path: path to the directory to compare :param dict dir_struct: correct tree structure of the directory. Dictionary keys are directory and file names with the correct nested structure. Dictionary value associated with each directory is a new dictionary which lists the content of the directory. Dictionary value associated with each file name is ``None`` :param bool exact: if ``True`` tested directory structure must exactly match `dir_struct`. If ``False`` `dir_struct` must be a partial structure of the directory to compare """ self.assertDirExists(obj, field_path) field = dict_dot(obj.output, field_path) dir_path = obj.location.get_path(filename=field["dir"]) self._assert_dir_structure(dir_path, dir_struct, exact)
def input_(data, field_path): """Return a hydrated value of the ``input`` field.""" data_obj = Data.objects.get(id=data["__id"]) inputs = copy.deepcopy(data_obj.input) # XXX: Optimize by hydrating only the required field (major refactoring). hydrate_input_references(inputs, data_obj.process.input_schema) hydrate_input_uploads(inputs, data_obj.process.input_schema) return dict_dot(inputs, field_path)
def input_(data, field_path): """Return a hydrated value of the ``input`` field.""" data_obj = Data.objects.get(id=data['__id']) inputs = copy.deepcopy(data_obj.input) # XXX: Optimize by hydrating only the required field (major refactoring). hydrate_input_references(inputs, data_obj.process.input_schema) hydrate_input_uploads(inputs, data_obj.process.input_schema) return dict_dot(inputs, field_path)
def validate_process_types(queryset=None): """Perform process type validation. :param queryset: Optional process queryset to validate :return: A list of validation error strings """ if not queryset: from .process import Process queryset = Process.objects.all() processes = {} for process in queryset: dict_dot( processes, process.type.replace(":", ".") + "__schema__", process.output_schema, ) errors = [] for path, key, value in iterate_dict( processes, exclude=lambda key, value: key == "__schema__" ): if "__schema__" not in value: continue # Validate with any parent types. for length in range(len(path), 0, -1): parent_type = ".".join(path[:length] + ["__schema__"]) try: parent_schema = dict_dot(processes, parent_type) except KeyError: continue errors += validate_process_subtype( supertype_name=":".join(path[:length]), supertype=parent_schema, subtype_name=":".join(path + [key]), subtype=value["__schema__"], ) return errors
def assertJSON(self, obj, storage, field_path, file_name): """Compare JSON in Storage object to the given correct JSON. :param obj: object to which the :class:`~resolwe.flow.models.Storage` object belongs :type obj: ~resolwe.flow.models.Data :param storage: object or id which contains JSON to compare :type storage: :class:`~resolwe.flow.models.Storage` or :class:`str` :param str field_path: path to JSON subset in the :class:`~resolwe.flow.models.Storage`'s object to compare against. If it is empty, the entire object will be compared. :param str file_name: file name (and relative path) of the file with the correct JSON to compare against. Path should be relative to the ``tests/files`` directory of a Django application. .. note:: The given JSON file should be compresed with *gzip* and have the ``.gz`` extension. """ self.assertEqual( os.path.splitext(file_name)[1], ".gz", msg="File extension must be .gz" ) if not isinstance(storage, Storage): storage = Storage.objects.get(pk=storage) storage_obj = dict_dot(storage.json, field_path) file_path = os.path.join(self.files_path, file_name) if not os.path.isfile(file_path): with gzip.open(file_path, mode="wt") as f: json.dump(storage_obj, f) self.fail(msg="Output file {} missing so it was created.".format(file_name)) with gzip.open(file_path, mode="rt") as f: file_obj = json.load(f) self.assertAlmostEqualGeneric( storage_obj, file_obj, msg="Storage {} field '{}' does not match file {}".format( storage.id, field_path, file_name ) + self._debug_info(obj), )
def migrate_data(self, data, from_state): """Migrate data objects. :param data: Queryset containing all data objects that need to be migrated :param from_state: Database model state """ if not self.default: return self.default.prepare(data, from_state) for instance in data: value = self.default.get_default_for(instance, from_state) if not value and not self.schema.get('required', True): continue # Set default value. container = getattr(instance, self.schema_type, {}) dict_dot(container, '.'.join(self.field), value) setattr(instance, self.schema_type, container) instance.save()
def migrate_data(self, data, from_state): """Migrate data objects. :param data: Queryset containing all data objects that need to be migrated :param from_state: Database model state """ if not self.default: return self.default.prepare(data, from_state) for instance in data: value = self.default.get_default_for(instance, from_state) if not value and not self.schema.get("required", True): continue # Set default value. container = getattr(instance, self.schema_type, {}) dict_dot(container, ".".join(self.field), value) setattr(instance, self.schema_type, container) instance.save()
def validate_process_types(queryset=None): """Perform process type validation. :param queryset: Optional process queryset to validate :return: A list of validation error strings """ if not queryset: from .process import Process queryset = Process.objects.all() processes = {} for process in queryset: dict_dot( processes, process.type.replace(':', '.') + '__schema__', process.output_schema ) errors = [] for path, key, value in iterate_dict(processes, exclude=lambda key, value: key == '__schema__'): if '__schema__' not in value: continue # Validate with any parent types. for length in range(len(path), 0, -1): parent_type = '.'.join(path[:length] + ['__schema__']) try: parent_schema = dict_dot(processes, parent_type) except KeyError: continue errors += validate_process_subtype( supertype_name=':'.join(path[:length]), supertype=parent_schema, subtype_name=':'.join(path + [key]), subtype=value['__schema__'] ) return errors
def assertFileExists(self, obj, field_path): """Ensure a file in the given object's field exists. :param obj: object that includes the file for which to check if it exists :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name/path """ field = dict_dot(obj.output, field_path) output = obj.location.get_path(filename=field["file"]) if not os.path.isfile(output): self.fail(msg="File {} does not exist.".format(field_path))
def assertFileExists(self, obj, field_path): # pylint: disable=invalid-name """Ensure a file in the given object's field exists. :param obj: object that includes the file for which to check if it exists :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name/path """ field = dict_dot(obj.output, field_path) output = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(obj.pk), field['file']) if not os.path.isfile(output): self.fail(msg="File {} does not exist.".format(field_path))
def assertFileExists(self, obj, field_path): # pylint: disable=invalid-name """Ensure a file in the given object's field exists. :param obj: object that includes the file for which to check if it exists :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name/path """ field = dict_dot(obj.output, field_path) output = obj.location.get_path(filename=field['file']) if not os.path.isfile(output): self.fail(msg="File {} does not exist.".format(field_path))
def save_storage(data): """Parse output field and create Storage objects if needed.""" for field_schema, fields, path in iterate_fields( data.output, data.process.output_schema, "" ): name = field_schema["name"] value = fields[name] if field_schema.get("type", "").startswith("basic:json:"): if value and not data.pk: raise ValidationError( "Data object must be `created` before creating `basic:json:` fields" ) if isinstance(value, int): # already in Storage continue if isinstance(value, str): file_path = data.location.get_path(filename=value) if os.path.isfile(file_path): try: with open(file_path) as file_handler: value = json.load(file_handler) except json.JSONDecodeError: with open(file_path) as file_handler: content = file_handler.read() content = content.rstrip() raise ValidationError( "Value of '{}' must be a valid JSON, current: {}".format( name, content ) ) existing_storage_pk = None with suppress(KeyError): existing_storage_pk = dict_dot(data._original_output, path) if isinstance(existing_storage_pk, int): data.storages.filter(pk=existing_storage_pk).update(json=value) fields[name] = existing_storage_pk else: storage = data.storages.create( name="Storage for data id {}".format(data.pk), contributor=data.contributor, json=value, ) fields[name] = storage.pk
def assertJSON(self, obj, storage, field_path, file_name): # pylint: disable=invalid-name """Compare JSON in Storage object to the given correct JSON. :param obj: object to which the :class:`~resolwe.flow.models.Storage` object belongs :type obj: ~resolwe.flow.models.Data :param storage: object or id which contains JSON to compare :type storage: :class:`~resolwe.flow.models.Storage` or :class:`str` :param str field_path: path to JSON subset in the :class:`~resolwe.flow.models.Storage`'s object to compare against. If it is empty, the entire object will be compared. :param str file_name: file name (and relative path) of the file with the correct JSON to compare against. Path should be relative to the ``tests/files`` directory of a Django application. .. note:: The given JSON file should be compresed with *gzip* and have the ``.gz`` extension. """ self.assertEqual(os.path.splitext(file_name)[1], '.gz', msg='File extension must be .gz') if not isinstance(storage, Storage): storage = Storage.objects.get(pk=storage) storage_obj = dict_dot(storage.json, field_path) file_path = os.path.join(self.files_path, file_name) if not os.path.isfile(file_path): with gzip.open(file_path, mode='wt') as f: json.dump(storage_obj, f) self.fail(msg="Output file {} missing so it was created.".format(file_name)) with gzip.open(file_path, mode='rt') as f: file_obj = json.load(f) self.assertAlmostEqualGeneric(storage_obj, file_obj, msg="Storage {} field '{}' does not match file {}".format( storage.id, field_path, file_name) + self._debug_info(obj))
def assertFilesExist(self, obj, field_path): # pylint: disable=invalid-name """Ensure files in the given object's field exists. :param obj: object that includes list of files for which to check existance :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name/path """ field = dict_dot(obj.output, field_path) for item in field: output_file = obj.location.get_path(filename=item['file']) if not os.path.isfile(output_file): self.fail(msg="File {} in output field {} does not exist.".format(item['file'], field_path))
def descriptor(obj, path=''): """Return descriptor of given object. If ``path`` is specified, only the content on that path is returned. """ if isinstance(obj, dict): # Current object is hydrated, so we need to get descriptor from # dict representation. desc = obj['__descriptor'] else: desc = obj.descriptor resp = dict_dot(desc, path) if isinstance(resp, list) or isinstance(resp, dict): return json.dumps(resp) return resp
def descriptor(obj, path=""): """Return descriptor of given object. If ``path`` is specified, only the content on that path is returned. """ if isinstance(obj, dict): # Current object is hydrated, so we need to get descriptor from # dict representation. desc = obj["__descriptor"] else: desc = obj.descriptor resp = dict_dot(desc, path) if isinstance(resp, list) or isinstance(resp, dict): return json.dumps(resp) return resp
def assertDir(self, obj, field_path, fn): """Compare process output directory to correct compressed directory. :param obj: object that includes the directory to compare :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name :param str fn: file name (and relative path) of the correct compressed directory to compare against. Path should be relative to the ``tests/files`` directory of a Django application. Compressed directory needs to be in ``tar.gz`` format. """ self.assertDirExists(obj, field_path) field = dict_dot(obj.output, field_path) dir_path = obj.location.get_path(filename=field["dir"]) self._assert_dir(dir_path, fn)
def handle_update(self, obj, internal_call=False): """Handle an incoming ``Data`` object update request. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'update', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object this command changes], 'changeset': { [keys to be changed] } } :param internal_call: If ``True``, this is an internal delegate call, so a reply to the executor won't be sent. """ data_id = obj[ExecutorProtocol.DATA_ID] changeset = obj[ExecutorProtocol.UPDATE_CHANGESET] if not internal_call: logger.debug(__( "Handling update for Data with id {} (handle_update).", data_id), extra={ 'data_id': data_id, 'packet': obj }) try: d = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.warning("Data object does not exist (handle_update).", extra={ 'data_id': data_id, }) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR }) async_to_sync(consumer.send_event)({ WorkerProtocol.COMMAND: WorkerProtocol.ABORT, WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID], WorkerProtocol.FINISH_COMMUNICATE_EXTRA: { 'executor': getattr(settings, 'FLOW_EXECUTOR', {}).get('NAME', 'resolwe.flow.executors.local'), }, }) return if changeset.get('status', None) == Data.STATUS_ERROR: logger.error(__( "Error occured while running process '{}' (handle_update).", d.process.slug), extra={ 'data_id': data_id, 'api_url': '{}{}'.format( getattr(settings, 'RESOLWE_HOST_URL', ''), reverse('resolwe-api:data-detail', kwargs={'pk': data_id})), }) if d.status == Data.STATUS_ERROR: changeset['status'] = Data.STATUS_ERROR if not d.started: changeset['started'] = now() changeset['modified'] = now() for key, val in changeset.items(): if key in ['process_error', 'process_warning', 'process_info']: # Trim process_* fields to not exceed max length of the database field. for i, entry in enumerate(val): max_length = Data._meta.get_field( key).base_field.max_length # pylint: disable=protected-access if len(entry) > max_length: val[i] = entry[:max_length - 3] + '...' getattr(d, key).extend(val) elif key != 'output': setattr(d, key, val) if 'output' in changeset: if not isinstance(d.output, dict): d.output = {} for key, val in changeset['output'].items(): dict_dot(d.output, key, val) try: d.save(update_fields=list(changeset.keys())) except ValidationError as exc: logger.error(__( "Validation error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc()), extra={'data_id': data_id}) d.refresh_from_db() d.process_error.append(exc.message) d.status = Data.STATUS_ERROR try: d.save(update_fields=['process_error', 'status']) except Exception: # pylint: disable=broad-except pass except Exception: # pylint: disable=broad-except logger.error(__( "Error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc()), extra={'data_id': data_id}) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK })
def create_entity(self): """Create entity if `flow_collection` is defined in process. Following rules applies for adding `Data` object to `Entity`: * Only add `Data object` to `Entity` if process has defined `flow_collection` field * Add object to existing `Entity`, if all parents that are part of it (but not necessary all parents), are part of the same `Entity` * If parents belong to different `Entities` or do not belong to any `Entity`, create new `Entity` """ entity_type = self.process.entity_type # pylint: disable=no-member entity_descriptor_schema = self.process.entity_descriptor_schema # pylint: disable=no-member entity_input = self.process.entity_input # pylint: disable=no-member if entity_type: data_filter = {} if entity_input: input_id = dict_dot(self.input, entity_input, default=lambda: None) if input_id is None: logger.warning("Skipping creation of entity due to missing input.") return if isinstance(input_id, int): data_filter['data__pk'] = input_id elif isinstance(input_id, list): data_filter['data__pk__in'] = input_id else: raise ValueError( "Cannot create entity due to invalid value of field {}.".format(entity_input) ) else: data_filter['data__in'] = self.parents.all() # pylint: disable=no-member entity_query = Entity.objects.filter(type=entity_type, **data_filter).distinct() entity_count = entity_query.count() if entity_count == 0: descriptor_schema = DescriptorSchema.objects.filter( slug=entity_descriptor_schema ).latest() entity = Entity.objects.create( contributor=self.contributor, descriptor_schema=descriptor_schema, type=entity_type, name=self.name, tags=self.tags, ) assign_contributor_permissions(entity) elif entity_count == 1: entity = entity_query.first() copy_permissions(entity, self) else: logger.info("Skipping creation of entity due to multiple entities found.") entity = None if entity: entity.data.add(self) # Inherit collections from entity. for collection in entity.collections.all(): collection.data.add(self)
def create_entity(self): """Create entity if `flow_collection` is defined in process. Following rules applies for adding `Data` object to `Entity`: * Only add `Data object` to `Entity` if process has defined `flow_collwection` field * Add object to existing `Entity`, if all parents that are part of it (but not necessary all parents), are part of the same `Entity` * If parents belong to different `Entities` or do not belong to any `Entity`, create new `Entity` """ entity_type = self.process.entity_type # pylint: disable=no-member entity_descriptor_schema = self.process.entity_descriptor_schema # pylint: disable=no-member entity_input = self.process.entity_input # pylint: disable=no-member if entity_type: data_filter = {} if entity_input: input_id = dict_dot(self.input, entity_input, default=lambda: None) if input_id is None: logger.warning( "Skipping creation of entity due to missing input.") return if isinstance(input_id, int): data_filter['data__pk'] = input_id elif isinstance(input_id, list): data_filter['data__pk__in'] = input_id else: raise ValueError( "Cannot create entity due to invalid value of field {}." .format(entity_input)) else: data_filter['data__in'] = self.parents.all() # pylint: disable=no-member entity_query = Entity.objects.filter(type=entity_type, **data_filter).distinct() entity_count = entity_query.count() if entity_count == 0: descriptor_schema = DescriptorSchema.objects.filter( slug=entity_descriptor_schema).latest() entity = Entity.objects.create( contributor=self.contributor, descriptor_schema=descriptor_schema, type=entity_type, name=self.name, tags=self.tags, ) assign_contributor_permissions(entity) elif entity_count == 1: entity = entity_query.first() copy_permissions(entity, self) else: logger.info( "Skipping creation of entity due to multiple entities found." ) entity = None if entity: entity.data.add(self) # Inherite collections from entity. for collection in entity.collections.all(): collection.data.add(self)
def _handle_entity(obj): """Create entity if `entity.type` is defined in process. Following rules applies for adding `Data` object to `Entity`: * Only add `Data object` to `Entity` if process has defined `entity.type` field * Create new entity if parents do not belong to any `Entity` * Add object to existing `Entity`, if all parents that are part of it (but not necessary all parents), are part of the same `Entity` * If parents belong to different `Entities` don't do anything """ entity_type = obj.process.entity_type entity_descriptor_schema = obj.process.entity_descriptor_schema entity_input = obj.process.entity_input entity_always_create = obj.process.entity_always_create operation = HandleEntityOperation.PASS if entity_type: data_filter = {} if entity_input: input_id = dict_dot(obj.input, entity_input, default=lambda: None) if input_id is None: logger.warning( "Skipping creation of entity due to missing input.") return if isinstance(input_id, int): data_filter["data__pk"] = input_id elif isinstance(input_id, list): data_filter["data__pk__in"] = input_id else: raise ValueError( "Cannot create entity due to invalid value of field {}." .format(entity_input)) else: data_filter["data__in"] = obj.parents.all() entity_query = Entity.objects.filter(type=entity_type, **data_filter).distinct() entity_count = entity_query.count() if entity_count == 0 or entity_always_create: descriptor_schema = DescriptorSchema.objects.filter( slug=entity_descriptor_schema).latest() entity = Entity.objects.create( contributor=obj.contributor, descriptor_schema=descriptor_schema, type=entity_type, name=obj.name, tags=obj.tags, ) assign_contributor_permissions(entity) operation = HandleEntityOperation.CREATE elif entity_count == 1: entity = entity_query.first() obj.tags = entity.tags copy_permissions(entity, obj) operation = HandleEntityOperation.ADD else: logger.info( "Skipping creation of entity due to multiple entities found." ) entity = None if entity: obj.entity = entity obj.save() return operation
def handle_update(self, obj, internal_call=False): """Handle an incoming ``Data`` object update request. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'update', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object this command changes], 'changeset': { [keys to be changed] } } :param internal_call: If ``True``, this is an internal delegate call, so a reply to the executor won't be sent. """ data_id = obj[ExecutorProtocol.DATA_ID] changeset = obj[ExecutorProtocol.UPDATE_CHANGESET] if not internal_call: logger.debug( __("Handling update for Data with id {} (handle_update).", data_id), extra={ 'data_id': data_id, 'packet': obj } ) try: d = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.warning( "Data object does not exist (handle_update).", extra={ 'data_id': data_id, } ) if not internal_call: async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR}) async_to_sync(consumer.send_event)({ WorkerProtocol.COMMAND: WorkerProtocol.ABORT, WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID], WorkerProtocol.FINISH_COMMUNICATE_EXTRA: { 'executor': getattr(settings, 'FLOW_EXECUTOR', {}).get('NAME', 'resolwe.flow.executors.local'), }, }) return if changeset.get('status', None) == Data.STATUS_ERROR: logger.error( __("Error occured while running process '{}' (handle_update).", d.process.slug), extra={ 'data_id': data_id, 'api_url': '{}{}'.format( getattr(settings, 'RESOLWE_HOST_URL', ''), reverse('resolwe-api:data-detail', kwargs={'pk': data_id}) ), } ) if d.status == Data.STATUS_ERROR: changeset['status'] = Data.STATUS_ERROR if not d.started: changeset['started'] = now() changeset['modified'] = now() for key, val in changeset.items(): if key in ['process_error', 'process_warning', 'process_info']: # Trim process_* fields to not exceed max length of the database field. for i, entry in enumerate(val): max_length = Data._meta.get_field(key).base_field.max_length # pylint: disable=protected-access if len(entry) > max_length: val[i] = entry[:max_length - 3] + '...' getattr(d, key).extend(val) elif key != 'output': setattr(d, key, val) if 'output' in changeset: if not isinstance(d.output, dict): d.output = {} for key, val in changeset['output'].items(): dict_dot(d.output, key, val) try: d.save(update_fields=list(changeset.keys())) except ValidationError as exc: logger.error( __( "Validation error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc() ), extra={ 'data_id': data_id } ) d.refresh_from_db() d.process_error.append(exc.message) d.status = Data.STATUS_ERROR try: d.save(update_fields=['process_error', 'status']) except Exception: # pylint: disable=broad-except pass except Exception: # pylint: disable=broad-except logger.error( __( "Error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc() ), extra={ 'data_id': data_id } ) if not internal_call: async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK})
def fill_with_defaults(process_input, input_schema): """Fill empty optional fields in input with default values.""" for field_schema, fields, path in iterate_schema(process_input, input_schema): if 'default' in field_schema and field_schema['name'] not in fields: dict_dot(process_input, path, field_schema['default'])
def handle_annotate(self, obj): """Handle an incoming ``Data`` object annotate request. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'annotate', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object this command annotates], 'annotations': { [annotations to be added/updated] } } """ def report_failure(): self.unlock_all_inputs(obj[ExecutorProtocol.DATA_ID]) async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR }) async_to_sync(consumer.send_event)({ WorkerProtocol.COMMAND: WorkerProtocol.ABORT, WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID], WorkerProtocol.FINISH_COMMUNICATE_EXTRA: { "executor": getattr(settings, "FLOW_EXECUTOR", {}).get("NAME", "resolwe.flow.executors.local"), }, }) data_id = obj[ExecutorProtocol.DATA_ID] annotations = obj[ExecutorProtocol.ANNOTATIONS] logger.debug( __("Handling annotate for Data with id {} (handle_annotate).", data_id), extra={ "data_id": data_id, "packet": obj }, ) try: d = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.warning( "Data object does not exist (handle_annotate).", extra={"data_id": data_id}, ) report_failure() return if d.entity is None: logger.error( __( "No entity to annotate for process '{}' (handle_annotate):\n\n{}", d.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) d.process_error.append( "No entity to annotate for process '{}' (handle_annotate)". format(d.process.slug)) d.status = Data.STATUS_ERROR with suppress(Exception): d.save(update_fields=["process_error", "status"]) report_failure() return for key, val in annotations.items(): logger.debug( __("Annotating entity {}: {} -> {}", d.entity, key, val)) dict_dot(d.entity.descriptor, key, val) try: d.entity.save() async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK }) except ValidationError as exc: logger.error( __( "Validation error when saving Data object of process '{}' (handle_annotate):\n\n{}", d.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) d.refresh_from_db() d.process_error.append(exc.message) d.status = Data.STATUS_ERROR with suppress(Exception): d.save(update_fields=["process_error", "status"]) report_failure()
def handle_update(self, obj, internal_call=False): """Handle an incoming ``Data`` object update request. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'update', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object this command changes], 'changeset': { [keys to be changed] } } :param internal_call: If ``True``, this is an internal delegate call, so a reply to the executor won't be sent. """ data_id = obj[ExecutorProtocol.DATA_ID] changeset = obj[ExecutorProtocol.UPDATE_CHANGESET] if not internal_call: logger.debug( __("Handling update for Data with id {} (handle_update).", data_id), extra={ "data_id": data_id, "packet": obj }, ) try: d = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.warning( "Data object does not exist (handle_update).", extra={"data_id": data_id}, ) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR }) async_to_sync(consumer.send_event)({ WorkerProtocol.COMMAND: WorkerProtocol.ABORT, WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID], WorkerProtocol.FINISH_COMMUNICATE_EXTRA: { "executor": getattr(settings, "FLOW_EXECUTOR", {}).get("NAME", "resolwe.flow.executors.local"), }, }) return if changeset.get("status", None) == Data.STATUS_ERROR: logger.error( __( "Error occured while running process '{}' (handle_update).", d.process.slug, ), extra={ "data_id": data_id, "api_url": "{}{}".format( getattr(settings, "RESOLWE_HOST_URL", ""), reverse("resolwe-api:data-detail", kwargs={"pk": data_id}), ), }, ) self.unlock_all_inputs(data_id) if d.status == Data.STATUS_ERROR: changeset["status"] = Data.STATUS_ERROR if not d.started: changeset["started"] = now() changeset["modified"] = now() for key, val in changeset.items(): if key in ["process_error", "process_warning", "process_info"]: # Trim process_* fields to not exceed max length of the database field. for i, entry in enumerate(val): max_length = Data._meta.get_field( key).base_field.max_length if len(entry) > max_length: val[i] = entry[:max_length - 3] + "..." getattr(d, key).extend(val) elif key != "output": setattr(d, key, val) if "output" in changeset: if not isinstance(d.output, dict): d.output = {} for key, val in changeset["output"].items(): dict_dot(d.output, key, val) try: d.save(update_fields=list(changeset.keys())) except ValidationError as exc: logger.error( __( "Validation error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) d.refresh_from_db() d.process_error.append(exc.message) d.status = Data.STATUS_ERROR with suppress(Exception): d.save(update_fields=["process_error", "status"]) self.unlock_all_inputs(data_id) except Exception: logger.error( __( "Error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) try: # Update referenced files. Since entire output is sent every time # just delete and recreate objects. Computing changes and updating # would probably be slower. if "output" in changeset: storage_location = d.location.default_storage_location ReferencedPath.objects.filter( storage_locations=storage_location).delete() referenced_paths = [ ReferencedPath(path=path) for path in referenced_files(d, include_descriptor=False) ] ReferencedPath.objects.bulk_create(referenced_paths) storage_location.files.add(*referenced_paths) except Exception: logger.error( __( "Error when saving ReferencedFile objects of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK })
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response( { 'collections': [ 'Invalid pk "{}" - object does not exist.'.format( collection_id) ] }, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.has_perm('view_collection', obj=collection): raise exceptions.PermissionDenied( "You don't have `ADD` permission on collection (id: {})." .format(collection_id)) else: raise exceptions.NotFound( "Collection not found (id: {}).".format(collection_id)) # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug) process_query = get_objects_for_user(request.user, 'view_process', process_query) try: process = process_query.latest() except Process.DoesNotExist: return Response( { 'process': [ 'Invalid process slug "{}" - object does not exist.'. format(process_slug) ] }, status=status.HTTP_400_BAD_REQUEST) request.data['process'] = process.pk # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema( process_input, process.input_schema): if 'default' in field_schema and field_schema[ 'name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[ Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP ], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(DataViewSet, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: # TODO: Remove this when all processes are migrated to the # new syntax. if 'flow_collection' in p: if 'entity' in p: self.stderr.write( "Skip processor {}: only one of 'flow_collection' and 'entity' fields " "allowed".format(p['slug']) ) continue p['entity'] = {'type': p.pop('flow_collection')} if p['type'][-1] != ':': p['type'] += ':' if 'category' in p and not p['category'].endswith(':'): p['category'] += ':' for field in ['input', 'output']: for schema, _, _ in iterate_schema({}, p[field] if field in p else {}): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if 'entity' in p: if 'type' not in p['entity']: self.stderr.write( "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug']) ) continue p['entity_type'] = p['entity']['type'] p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type']) p['entity_input'] = p['entity'].get('input', None) p.pop('entity') if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists(): self.stderr.write( "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' " "field.".format(p['slug'], p['entity_descriptor_schema']) ) continue if 'persistence' in p: persistence_mapping = { 'RAW': Process.PERSISTENCE_RAW, 'CACHED': Process.PERSISTENCE_CACHED, 'TEMP': Process.PERSISTENCE_TEMP, } p['persistence'] = persistence_mapping[p['persistence']] if 'scheduling_class' in p: scheduling_class_mapping = { 'interactive': Process.SCHEDULING_CLASS_INTERACTIVE, 'batch': Process.SCHEDULING_CLASS_BATCH } p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']] if 'input' in p: p['input_schema'] = p.pop('input') if 'output' in p: p['output_schema'] = p.pop('output') slug = p['slug'] if 'run' in p: # Set default language to 'bash' if not set. p['run'].setdefault('language', 'bash') # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine(p['run']['language']) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault('output_schema', []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write("Skip processor {}: execution engine '{}' not supported".format( slug, p['run']['language'] )) continue # Validate if container image is allowed based on the configured pattern. # NOTE: This validation happens here and is not deferred to executors because the idea # is that this will be moved to a "container" requirement independent of the # executor. if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'): try: container_image = dict_dot(p, 'requirements.executor.docker.image') if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image): self.stderr.write("Skip processor {}: container image does not match '{}'".format( slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE, )) continue except KeyError: pass version = p['version'] int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write("Skip processor {}: same version installed".format(slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) assign_contributor_permissions(process) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if log_processors: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if log_templates: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: # TODO: Remove this when all processes are migrated to the # new syntax. if "flow_collection" in p: if "entity" in p: self.stderr.write( "Skip processor {}: only one of 'flow_collection' and 'entity' fields " "allowed".format(p["slug"])) continue p["entity"] = {"type": p.pop("flow_collection")} if p["type"][-1] != ":": p["type"] += ":" if "category" in p and not p["category"].endswith(":"): p["category"] += ":" for field in ["input", "output"]: for schema, _, _ in iterate_schema( {}, p[field] if field in p else {}): if not schema["type"][-1].endswith(":"): schema["type"] += ":" # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if "entity" in p: if "type" not in p["entity"]: self.stderr.write( "Skip process {}: 'entity.type' required if 'entity' defined" .format(p["slug"])) continue if "input" in p["entity"] and p["entity"].get( "always_create", False): self.stderr.write( "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' " "is set to true.".format(p["slug"])) continue p["entity_type"] = p["entity"]["type"] p["entity_descriptor_schema"] = p["entity"].get( "descriptor_schema", p["entity_type"]) p["entity_input"] = p["entity"].get("input", None) p["entity_always_create"] = p["entity"].get( "always_create", False) p.pop("entity") if not DescriptorSchema.objects.filter( slug=p["entity_descriptor_schema"]).exists(): self.stderr.write( "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' " "field.".format(p["slug"], p["entity_descriptor_schema"])) continue if "persistence" in p: persistence_mapping = { "RAW": Process.PERSISTENCE_RAW, "CACHED": Process.PERSISTENCE_CACHED, "TEMP": Process.PERSISTENCE_TEMP, } p["persistence"] = persistence_mapping[p["persistence"]] if "scheduling_class" in p: scheduling_class_mapping = { "interactive": Process.SCHEDULING_CLASS_INTERACTIVE, "batch": Process.SCHEDULING_CLASS_BATCH, } p["scheduling_class"] = scheduling_class_mapping[ p["scheduling_class"]] if "input" in p: p["input_schema"] = p.pop("input") if "output" in p: p["output_schema"] = p.pop("output") slug = p["slug"] if "run" in p: # Set default language to 'bash' if not set. p["run"].setdefault("language", "bash") # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine( p["run"]["language"]) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault("output_schema", []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write( "Skip processor {}: execution engine '{}' not supported" .format(slug, p["run"]["language"])) continue # Validate if container image is allowed based on the configured pattern. # NOTE: This validation happens here and is not deferred to executors because the idea # is that this will be moved to a "container" requirement independent of the # executor. if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"): try: container_image = dict_dot( p, "requirements.executor.docker.image") if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image): self.stderr.write( "Skip processor {}: container image does not match '{}'" .format( slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE, )) continue except KeyError: pass version = p["version"] int_version = convert_version_string_to_int( version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate( Max("version"))["version__max"] if latest_version is not None and latest_version > int_version: self.stderr.write( "Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write( "Skip processor {}: same version installed".format( slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) assign_contributor_permissions(process) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if log_processors: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if log_templates: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))