示例#1
0
文件: data.py 项目: tjanez/resolwe
    def perform_create(self, serializer):
        """Create a resource."""
        process = serializer.validated_data.get('process')
        if not process.is_active:
            raise exceptions.ParseError(
                'Process retired (id: {}, slug: {}/{}).'.format(
                    process.id, process.slug, process.version))

        with transaction.atomic():
            instance = serializer.save()

            assign_contributor_permissions(instance)

            # Entity is added to the collection only when it is
            # created - when it only contains 1 Data object.
            entities = Entity.objects.annotate(num_data=Count('data')).filter(
                data=instance, num_data=1)

            # Assign data object to all specified collections.
            collection_pks = self.request.data.get('collections', [])
            for collection in Collection.objects.filter(pk__in=collection_pks):
                collection.data.add(instance)
                copy_permissions(collection, instance)

                # Add entities to which data belongs to the collection.
                for entity in entities:
                    entity.collections.add(collection)
                    copy_permissions(collection, entity)
示例#2
0
文件: data.py 项目: genialis/resolwe
    def create(self, subprocess_parent=None, **kwargs):
        """Create new object with the given kwargs."""
        obj = super().create(**kwargs)

        # Data dependencies
        obj.save_dependencies(obj.input, obj.process.input_schema)
        if subprocess_parent:
            DataDependency.objects.create(
                parent=subprocess_parent,
                child=obj,
                kind=DataDependency.KIND_SUBPROCESS,
            )
            # Data was from a workflow / spawned process
            if not obj.in_container():
                copy_permissions(subprocess_parent, obj)

        # Entity, Collection assignment
        entity_operation = self._handle_entity(obj)
        self._handle_collection(obj, entity_operation=entity_operation)

        # Assign contributor permission only if Data is not in the container.
        if not obj.in_container():
            assign_contributor_permissions(obj)

        return obj
示例#3
0
    def perform_create(self, serializer):
        """Create a resource."""
        process = serializer.validated_data.get('process')
        if not process.is_active:
            raise exceptions.ParseError(
                'Process retired (id: {}, slug: {}/{}).'.format(process.id, process.slug, process.version)
            )

        with transaction.atomic():
            instance = serializer.save()

            assign_contributor_permissions(instance)

            # Entity is added to the collection only when it is
            # created - when it only contains 1 Data object.
            entities = Entity.objects.annotate(num_data=Count('data')).filter(data=instance, num_data=1)

            # Assign data object to all specified collections.
            collection_pks = self.request.data.get('collections', [])
            for collection in Collection.objects.filter(pk__in=collection_pks):
                collection.data.add(instance)
                copy_permissions(collection, instance)

                # Add entities to which data belongs to the collection.
                for entity in entities:
                    entity.collections.add(collection)
                    copy_permissions(collection, entity)
示例#4
0
    def test_copy_different_ctype(self):
        assign_perm('view_collection', self.contributor, self.collection)
        assign_perm('add_collection', self.contributor, self.collection)

        copy_permissions(self.collection, self.dst_process)

        # Only 'view' is copied as process has no 'add' permission.
        self.assertEqual(UserObjectPermission.objects.count(), 3)
示例#5
0
    def test_copy_different_ctype(self):
        assign_perm('view_collection', self.contributor, self.collection)
        assign_perm('add_collection', self.contributor, self.collection)

        copy_permissions(self.collection, self.dst_process)

        # Only 'view' is copied as process has no 'add' permission.
        self.assertEqual(UserObjectPermission.objects.count(), 3)
示例#6
0
 def move_to_collection(self, destination_collection):
     """Move data object to collection."""
     self.validate_change_collection(destination_collection)
     self.collection = destination_collection
     if destination_collection:
         self.tags = destination_collection.tags
         copy_permissions(destination_collection, self)
     self.save()
示例#7
0
    def duplicate(self, contributor=None, inherit_entity=False, inherit_collection=False):
        """Duplicate (make a copy)."""
        if self.status not in [self.STATUS_DONE, self.STATUS_ERROR]:
            raise ValidationError('Data object must have done or error status to be duplicated')

        duplicate = Data.objects.get(id=self.id)
        duplicate.pk = None
        duplicate.slug = None
        duplicate.name = 'Copy of {}'.format(self.name)
        duplicate.duplicated = now()
        if contributor:
            duplicate.contributor = contributor

        duplicate.entity = None
        if inherit_entity:
            if not contributor.has_perm('add_entity', self.entity):
                raise ValidationError("You do not have add permission on entity {}.".format(self.entity))
            duplicate.entity = self.entity

        duplicate.collection = None
        if inherit_collection:
            if not contributor.has_perm('add_collection', self.collection):
                raise ValidationError("You do not have add permission on collection {}.".format(self.collection))
            duplicate.collection = self.collection

        duplicate._perform_save(force_insert=True)  # pylint: disable=protected-access

        # Override fields that are automatically set on create.
        duplicate.created = self.created
        duplicate._perform_save()  # pylint: disable=protected-access

        if self.location:
            self.location.data.add(duplicate)  # pylint: disable=no-member

        duplicate.storages.set(self.storages.all())  # pylint: disable=no-member

        for migration in self.migration_history.order_by('created'):  # pylint: disable=no-member
            migration.pk = None
            migration.data = duplicate
            migration.save(force_insert=True)

        # Inherit existing child dependencies.
        DataDependency.objects.bulk_create([
            DataDependency(child=duplicate, parent=dependency.parent, kind=dependency.kind)
            for dependency in DataDependency.objects.filter(child=self)
        ])
        # Inherit existing parent dependencies.
        DataDependency.objects.bulk_create([
            DataDependency(child=dependency.child, parent=duplicate, kind=dependency.kind)
            for dependency in DataDependency.objects.filter(parent=self)
        ])

        # Permissions
        assign_contributor_permissions(duplicate)
        copy_permissions(duplicate.entity, duplicate)
        copy_permissions(duplicate.collection, duplicate)

        return duplicate
示例#8
0
    def test_copy_permissions(self):
        self.src_process.set_permission(Permission.VIEW, self.contributor)
        self.src_process.set_permission(Permission.VIEW, self.group)

        copy_permissions(self.src_process, self.dst_process)
        self.assertTrue(
            self.contributor.has_perm(Permission.VIEW, self.dst_process))
        # User inherites permission from group
        self.assertTrue(self.user.has_perm(Permission.VIEW, self.dst_process))
示例#9
0
    def test_copy_permissions(self):
        assign_perm('view_process', self.contributor, self.src_process)
        assign_perm('view_process', self.group, self.src_process)

        copy_permissions(self.src_process, self.dst_process)

        self.assertEqual(GroupObjectPermission.objects.count(), 2)
        self.assertEqual(UserObjectPermission.objects.count(), 2)

        self.assertTrue(self.contributor.has_perm('flow.view_process', self.dst_process))
        # User inherites permission from group
        self.assertTrue(self.user.has_perm('flow.view_process', self.dst_process))
示例#10
0
    def test_copy_permissions(self):
        assign_perm('view_process', self.contributor, self.src_process)
        assign_perm('view_process', self.group, self.src_process)

        copy_permissions(self.src_process, self.dst_process)

        self.assertEqual(GroupObjectPermission.objects.count(), 2)
        self.assertEqual(UserObjectPermission.objects.count(), 2)

        self.assertTrue(self.contributor.has_perm('flow.view_process', self.dst_process))
        # User inherites permission from group
        self.assertTrue(self.user.has_perm('flow.view_process', self.dst_process))
示例#11
0
    def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for schema, _, _ in iterate_schema({}, descriptor_schema.get('schema', {})):
                if not schema['type'][-1].endswith(':'):
                    schema['type'] += ':'

            if 'schema' not in descriptor_schema:
                descriptor_schema['schema'] = []

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema['slug']
            version = descriptor_schema.get('version', '0.0.0')
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug))
                continue

            previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug)
            if previous_descriptor_qs.exists():
                previous_descriptor = previous_descriptor_qs.latest()
            else:
                previous_descriptor = None

            descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip descriptor schema {}: same version installed".format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                descriptor = DescriptorSchema.objects.create(contributor=user, **descriptor_schema)
                assign_contributor_permissions(descriptor)
                if previous_descriptor:
                    copy_permissions(previous_descriptor, descriptor)
                log_descriptors.append("Inserted {}".format(slug))

        if log_descriptors and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
示例#12
0
    def move_to_collection(self, source_collection, destination_collection):
        """Move entity to destination collection."""
        if source_collection == destination_collection:
            return

        self.collection = destination_collection
        if destination_collection:
            self.tags = destination_collection.tags
            copy_permissions(destination_collection, self)
        self.save()

        for datum in self.data.all():
            datum.collection = destination_collection
            if destination_collection:
                datum.tags = destination_collection.tags
                copy_permissions(destination_collection, datum)
            datum.save()
示例#13
0
    def _handle_collection(obj, entity_operation=None):
        """Correctly assign Collection to Data and it's Entity.

        There are 2 x 4 possible scenarios how to handle collection
        assignment. One dimension in "decision matrix" is Data.collection:

            1.x Data.collection = None
            2.x Data.collection != None

        Second dimension is about Data.entity:

            x.1 Data.entity is None
            x.2 Data.entity was just created
            x.3 Data.entity already exists and Data.entity.collection = None
            x.4 Data.entity already exists and Data.entity.collection != None
        """
        # 1.2 and 1.3 require no action.

        # 1.1 and 2.1:
        if not obj.entity:
            return
        if entity_operation == HandleEntityOperation.ADD and obj.collection:
            # 2.3
            if not obj.entity.collection:
                raise ValueError(
                    "Created Data has collection {} assigned, but it is added to entity {} that is not "
                    "inside this collection.".format(obj.collection,
                                                     obj.entity))
            # 2.4
            assert obj.collection == obj.entity.collection

        # 1.4
        if not obj.collection and obj.entity and obj.entity.collection:
            obj.collection = obj.entity.collection
            obj.tags = obj.entity.collection.tags
            obj.save()
        # 2.2
        if entity_operation == HandleEntityOperation.CREATE and obj.collection:
            obj.entity.collection = obj.collection
            obj.entity.tags = obj.collection.tags
            obj.entity.save()
            copy_permissions(obj.collection, obj.entity)
示例#14
0
文件: data.py 项目: tjanez/resolwe
    def create_entity(self):
        """Create entity if `flow_collection` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `flow_collwection` field
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` or do not belong to
        any `Entity`, create new `Entity`

        """
        ds_slug = self.process.flow_collection  # pylint: disable=no-member
        if ds_slug:
            entity_query = Entity.objects.filter(
                data__in=self.parents.all()).distinct()  # pylint: disable=no-member

            if entity_query.count() == 1:
                entity = entity_query.first()

                copy_permissions(entity, self)
            else:

                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=ds_slug).latest()
                entity = Entity.objects.create(
                    contributor=self.contributor,
                    descriptor_schema=descriptor_schema,
                    name=self.name,
                    tags=self.tags,
                )

                assign_contributor_permissions(entity)

            entity.data.add(self)

            # Inherite collections from entity.
            for collection in entity.collections.all():
                collection.data.add(self)
示例#15
0
    def perform_create(self, serializer):
        """Create a resource."""
        with transaction.atomic():
            instance = serializer.save()

            assign_contributor_permissions(instance)

            # Entity is added to the collection only when it is
            # created - when it only contains 1 Data object.
            entities = Entity.objects.annotate(num_data=Count('data')).filter(
                data=instance, num_data=1)

            # Assign data object to all specified collections.
            collection_pks = self.request.data.get('collections', [])
            for collection in Collection.objects.filter(pk__in=collection_pks):
                collection.data.add(instance)
                copy_permissions(collection, instance)

                # Add entities to which data belongs to the collection.
                for entity in entities:
                    entity.collections.add(collection)
                    copy_permissions(collection, entity)
示例#16
0
    def duplicate(self, contributor=None, inherit_collection=False):
        """Duplicate (make a copy)."""
        duplicate = Entity.objects.get(id=self.id)
        duplicate.pk = None
        duplicate.slug = None
        duplicate.name = "Copy of {}".format(self.name)
        duplicate.duplicated = now()
        if contributor:
            duplicate.contributor = contributor

        duplicate.collection = None
        if inherit_collection:
            if not contributor.has_perm("edit_collection", self.collection):
                raise ValidationError(
                    "You do not have edit permission on collection {}.".format(
                        self.collection))
            duplicate.collection = self.collection

        duplicate.save(force_insert=True)

        assign_contributor_permissions(duplicate)

        # Override fields that are automatically set on create.
        duplicate.created = self.created
        duplicate.save()

        # Duplicate entity's data objects.
        data = get_objects_for_user(contributor, "view_data", self.data.all())
        duplicated_data = data.duplicate(contributor,
                                         inherit_collection=inherit_collection)
        duplicate.data.add(*duplicated_data)

        # Permissions
        assign_contributor_permissions(duplicate)
        copy_permissions(duplicate.collection, duplicate)

        return duplicate
示例#17
0
    def duplicate(self, contributor=None, inherit_collections=False):
        """Duplicate (make a copy)."""
        duplicate = Entity.objects.get(id=self.id)
        duplicate.pk = None
        duplicate.slug = None
        duplicate.name = 'Copy of {}'.format(self.name)
        duplicate.duplicated = now()
        if contributor:
            duplicate.contributor = contributor

        duplicate.save(force_insert=True)

        assign_contributor_permissions(duplicate)

        # Override fields that are automatically set on create.
        duplicate.created = self.created
        duplicate.save()

        # Duplicate entity's data objects.
        data = get_objects_for_user(contributor, 'view_data', self.data.all())  # pylint: disable=no-member
        duplicated_data = data.duplicate(contributor)
        duplicate.data.add(*duplicated_data)

        if inherit_collections:
            collections = get_objects_for_user(
                contributor,
                'add_collection',
                self.collections.all()  # pylint: disable=no-member
            )
            for collection in collections:
                collection.entity_set.add(duplicate)
                copy_permissions(collection, duplicate)
                collection.data.add(*duplicated_data)
                for datum in duplicated_data:
                    copy_permissions(collection, datum)

        return duplicate
示例#18
0
    def _handle_entity(obj):
        """Create entity if `entity.type` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `entity.type` field
        * Create new entity if parents do not belong to any `Entity`
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` don't do anything

        """
        entity_type = obj.process.entity_type
        entity_descriptor_schema = obj.process.entity_descriptor_schema
        entity_input = obj.process.entity_input
        entity_always_create = obj.process.entity_always_create
        operation = HandleEntityOperation.PASS

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(obj.input,
                                    entity_input,
                                    default=lambda: None)
                if input_id is None:
                    logger.warning(
                        "Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter["data__pk"] = input_id
                elif isinstance(input_id, list):
                    data_filter["data__pk__in"] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}."
                        .format(entity_input))
            else:
                data_filter["data__in"] = obj.parents.all()

            entity_query = Entity.objects.filter(type=entity_type,
                                                 **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0 or entity_always_create:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema).latest()
                entity = Entity.objects.create(
                    contributor=obj.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=obj.name,
                    tags=obj.tags,
                )
                assign_contributor_permissions(entity)
                operation = HandleEntityOperation.CREATE

            elif entity_count == 1:
                entity = entity_query.first()
                obj.tags = entity.tags
                copy_permissions(entity, obj)
                operation = HandleEntityOperation.ADD

            else:
                logger.info(
                    "Skipping creation of entity due to multiple entities found."
                )
                entity = None

            if entity:
                obj.entity = entity
                obj.save()

            return operation
示例#19
0
    def register_processes(self, process_schemas, user, force=False, verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if 'flow_collection' in p:
                if 'entity' in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p['slug'])
                    )
                    continue

                p['entity'] = {'type': p.pop('flow_collection')}

            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema({}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'entity' in p:
                if 'type' not in p['entity']:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug'])
                    )
                    continue

                p['entity_type'] = p['entity']['type']
                p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type'])
                p['entity_input'] = p['entity'].get('input', None)
                p.pop('entity')

                if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p['slug'], p['entity_descriptor_schema'])
                    )
                    continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema', []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write("Skip processor {}: execution engine '{}' not supported".format(
                        slug, p['run']['language']
                    ))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image):
                        self.stderr.write("Skip processor {}: container image does not match '{}'".format(
                            slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                        ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip processor {}: same version installed".format(slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
示例#20
0
文件: data.py 项目: jberci/resolwe
    def create_entity(self):
        """Create entity if `flow_collection` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `flow_collwection` field
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` or do not belong to
        any `Entity`, create new `Entity`

        """
        entity_type = self.process.entity_type  # pylint: disable=no-member
        entity_descriptor_schema = self.process.entity_descriptor_schema  # pylint: disable=no-member
        entity_input = self.process.entity_input  # pylint: disable=no-member

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(self.input,
                                    entity_input,
                                    default=lambda: None)
                if input_id is None:
                    logger.warning(
                        "Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter['data__pk'] = input_id
                elif isinstance(input_id, list):
                    data_filter['data__pk__in'] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}."
                        .format(entity_input))
            else:
                data_filter['data__in'] = self.parents.all()  # pylint: disable=no-member

            entity_query = Entity.objects.filter(type=entity_type,
                                                 **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema).latest()
                entity = Entity.objects.create(
                    contributor=self.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=self.name,
                    tags=self.tags,
                )
                assign_contributor_permissions(entity)

            elif entity_count == 1:
                entity = entity_query.first()
                copy_permissions(entity, self)

            else:
                logger.info(
                    "Skipping creation of entity due to multiple entities found."
                )
                entity = None

            if entity:
                entity.data.add(self)
                # Inherite collections from entity.
                for collection in entity.collections.all():
                    collection.data.add(self)
示例#21
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[
                    p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(
                        p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
示例#22
0
def copy_objects(objects, contributor, name_prefix, obj_processor=None):
    """Make a copy of given queryset.

    Shallow copy given queryset and set contributor to the given value,
    prepend name with the prefix, set slug to a unique value, and set
    ``duplicated`` date to the current time. Special attention is paid
    to keep the ``created`` date to its original value.

    If ``obj_processor`` function is given, each object is passed to it
    and the return value is used instead of it.

    :param objects: A queryset to be copied.
    :type objects: `~resolwe.flow.models.base.BaseQuerySet`

    :param contributor: A Django user that will be assigned to copied objects
        as contributor.
    :type contributor: `~django.contrib.auth.models.User`

    :param str name_prefix: A prefix that will be prepend to the name of copied
        objects.

    """
    first = objects.first()
    if not first:
        return objects

    name_max_length = first._meta.get_field("name").max_length
    model = first._meta.model

    new_objects = []
    for obj in objects:
        new_obj = deepcopy(obj)
        new_obj.pk = None
        new_obj.slug = None
        new_obj.contributor = contributor
        new_obj.name = "{} {}".format(name_prefix, obj.name)
        new_obj._container_attributes = dict()

        if len(new_obj.name) > name_max_length:
            new_obj.name = "{}...".format(new_obj.name[:name_max_length - 3])

        if obj_processor:
            new_obj = obj_processor(new_obj)

        new_objects.append(new_obj)

    try:
        # Add another atomic block to avoid corupting the main one.
        with transaction.atomic():
            model.objects.bulk_create(new_objects)
            # Send the bulk create custom signal, avoid circular import.
            from resolwe.flow.signals import post_duplicate

            post_duplicate.send(sender=model,
                                instances=new_objects,
                                old_instances=objects)
    except IntegrityError:
        # Probably a slug collision occured, try to create objects one by one.
        for obj in new_objects:
            obj.slug = None
            # Call the parent method to skip pre-processing and validation.
            models.Model.save(obj)

    object_permission_group = dict()
    not_in_container = list()
    for old, new in zip(objects, new_objects):
        new.created = old.created
        new.duplicated = timezone.now()

        # Deal with permissions. When object is in container fix the pointer
        # to permission_group object.
        # When object is not in container new PermissionGroup proxy object must
        # be created, assigned to new object and permissions copied from old
        # object to new one.
        if getattr(new, "collection_id", None) or getattr(
                new, "entity_id", None):
            new.permission_group = new.topmost_container.permission_group
        else:
            not_in_container.append((new, old))
            object_permission_group[new] = PermissionGroup()

    PermissionGroup.objects.bulk_create(object_permission_group.values())
    for new, old in not_in_container:
        new.permission_group = object_permission_group[new]
        copy_permissions(old, new)
        assign_contributor_permissions(new, contributor)

    model.objects.bulk_update(new_objects,
                              ["created", "duplicated", "permission_group"])
    return new_objects
示例#23
0
    def register_descriptors(self,
                             descriptor_schemas,
                             user,
                             force=False,
                             verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for schema, _, _ in iterate_schema({},
                                               descriptor_schema.get(
                                                   "schema", {})):
                if not schema["type"][-1].endswith(":"):
                    schema["type"] += ":"

            if "schema" not in descriptor_schema:
                descriptor_schema["schema"] = []

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema["slug"]
            version = descriptor_schema.get("version", "0.0.0")
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(
                slug=slug).aggregate(Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip descriptor schema {}: newer version installed".
                    format(slug))
                continue

            previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug)
            if previous_descriptor_qs.exists():
                previous_descriptor = previous_descriptor_qs.latest()
            else:
                previous_descriptor = None

            descriptor_query = DescriptorSchema.objects.filter(slug=slug,
                                                               version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip descriptor schema {}: same version installed"
                            .format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                descriptor = DescriptorSchema.objects.create(
                    contributor=user, **descriptor_schema)
                assign_contributor_permissions(descriptor)
                if previous_descriptor:
                    copy_permissions(previous_descriptor, descriptor)
                log_descriptors.append("Inserted {}".format(slug))

        if log_descriptors and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
示例#24
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if "flow_collection" in p:
                if "entity" in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p["slug"]))
                    continue

                p["entity"] = {"type": p.pop("flow_collection")}

            if p["type"][-1] != ":":
                p["type"] += ":"

            if "category" in p and not p["category"].endswith(":"):
                p["category"] += ":"

            for field in ["input", "output"]:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema["type"][-1].endswith(":"):
                        schema["type"] += ":"
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if "entity" in p:
                if "type" not in p["entity"]:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined"
                        .format(p["slug"]))
                    continue
                if "input" in p["entity"] and p["entity"].get(
                        "always_create", False):
                    self.stderr.write(
                        "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' "
                        "is set to true.".format(p["slug"]))
                    continue

                p["entity_type"] = p["entity"]["type"]
                p["entity_descriptor_schema"] = p["entity"].get(
                    "descriptor_schema", p["entity_type"])
                p["entity_input"] = p["entity"].get("input", None)
                p["entity_always_create"] = p["entity"].get(
                    "always_create", False)
                p.pop("entity")

                if not DescriptorSchema.objects.filter(
                        slug=p["entity_descriptor_schema"]).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p["slug"],
                                        p["entity_descriptor_schema"]))
                    continue

            if "persistence" in p:
                persistence_mapping = {
                    "RAW": Process.PERSISTENCE_RAW,
                    "CACHED": Process.PERSISTENCE_CACHED,
                    "TEMP": Process.PERSISTENCE_TEMP,
                }

                p["persistence"] = persistence_mapping[p["persistence"]]

            if "scheduling_class" in p:
                scheduling_class_mapping = {
                    "interactive": Process.SCHEDULING_CLASS_INTERACTIVE,
                    "batch": Process.SCHEDULING_CLASS_BATCH,
                }

                p["scheduling_class"] = scheduling_class_mapping[
                    p["scheduling_class"]]

            if "input" in p:
                p["input_schema"] = p.pop("input")

            if "output" in p:
                p["output_schema"] = p.pop("output")

            slug = p["slug"]

            if "run" in p:
                # Set default language to 'bash' if not set.
                p["run"].setdefault("language", "bash")

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p["run"]["language"])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault("output_schema",
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p["run"]["language"]))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"):
                try:
                    container_image = dict_dot(
                        p, "requirements.executor.docker.image")
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p["version"]
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
示例#25
0
    def evaluate(self, data):
        """Evaluate the code needed to compute a given Data object."""
        expression_engine = data.process.requirements.get('expression-engine', None)
        if expression_engine is not None:
            expression_engine = self.get_expression_engine(expression_engine)

        # Parse steps.
        steps = data.process.run.get('program', None)
        if steps is None:
            return

        if not isinstance(steps, list):
            raise ExecutionError('Workflow program must be a list of steps.')

        # Expression engine evaluation context.
        context = {
            'input': data.input,
            'steps': collections.OrderedDict(),
        }

        for index, step in enumerate(steps):
            try:
                step_id = step['id']
                step_slug = step['run']
            except KeyError as error:
                raise ExecutionError('Incorrect definition of step "{}", missing property "{}".'.format(
                    step.get('id', index), error
                ))

            # Fetch target process.
            process = Process.objects.filter(slug=step_slug).order_by('-version').first()
            if not process:
                raise ExecutionError('Incorrect definition of step "{}", invalid process "{}".'.format(
                    step_id, step_slug
                ))

            # Process all input variables.
            step_input = step.get('input', {})
            if not isinstance(step_input, dict):
                raise ExecutionError('Incorrect definition of step "{}", input must be a dictionary.'.format(
                    step_id
                ))

            data_input = self._evaluate_expressions(expression_engine, step_id, step_input, context)

            # Create the data object.
            data_object = Data.objects.create(
                process=process,
                contributor=data.contributor,
                tags=data.tags,
                input=data_input,
            )
            DataDependency.objects.create(
                parent=data,
                child=data_object,
                kind=DataDependency.KIND_SUBPROCESS,
            )

            # Copy permissions.
            copy_permissions(data, data_object)

            # Copy collections.
            for collection in data.collection_set.all():
                collection.data.add(data_object)

            context['steps'][step_id] = data_object.pk

        # Immediately set our status to done and output all data object identifiers.
        data.output = {
            'steps': list(context['steps'].values()),
        }
        data.status = Data.STATUS_DONE
示例#26
0
    def register_descriptors(self,
                             descriptor_schemas,
                             user,
                             force=False,
                             verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for field in ['var', 'schema']:
                for schema, _, _ in iterate_schema({},
                                                   descriptor_schema.get(
                                                       field, {})):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in descriptor_schema:
                descriptor_schema['slug'] = slugify(
                    descriptor_schema.pop('name').replace(':', '-'))
                descriptor_schema['name'] = descriptor_schema.pop('label')

            if 'schema' not in descriptor_schema:
                descriptor_schema['schema'] = []

            if 'static' in descriptor_schema:
                descriptor_schema['schema'].extend(
                    descriptor_schema.pop('static'))
            if 'var' in descriptor_schema:
                descriptor_schema['schema'].extend(
                    descriptor_schema.pop('var'))

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema['slug']
            version = descriptor_schema.get('version', '0.0.0')
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(
                slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip descriptor schema {}: newer version installed".
                    format(slug))
                continue

            previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug)
            if previous_descriptor_qs.exists():
                previous_descriptor = previous_descriptor_qs.latest()
            else:
                previous_descriptor = None

            descriptor_query = DescriptorSchema.objects.filter(slug=slug,
                                                               version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip descriptor schema {}: same version installed"
                            .format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                descriptor = DescriptorSchema.objects.create(
                    contributor=user, **descriptor_schema)
                if previous_descriptor:
                    copy_permissions(previous_descriptor, descriptor)
                log_descriptors.append("Inserted {}".format(slug))

        if len(log_descriptors) > 0 and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
示例#27
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            # get `data_name` from `static`
            if 'static' in p:
                for schema, _, _ in iterate_schema({}, p['static']):
                    if schema['name'] == 'name' and 'default' in schema:
                        p['data_name'] = schema['default']

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in p:
                p['slug'] = slugify(p.pop('name').replace(':', '-'))
                p['name'] = p.pop('label')

                p.pop('var', None)
                p.pop('static', None)

            for field in ['input', 'output', 'var', 'static']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if len(log_processors) > 0:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if len(log_templates) > 0:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
示例#28
0
文件: data.py 项目: genialis/resolwe
    def create_entity(self):
        """Create entity if `flow_collection` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `flow_collection` field
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` or do not belong to
        any `Entity`, create new `Entity`

        """
        entity_type = self.process.entity_type  # pylint: disable=no-member
        entity_descriptor_schema = self.process.entity_descriptor_schema  # pylint: disable=no-member
        entity_input = self.process.entity_input  # pylint: disable=no-member

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(self.input, entity_input, default=lambda: None)
                if input_id is None:
                    logger.warning("Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter['data__pk'] = input_id
                elif isinstance(input_id, list):
                    data_filter['data__pk__in'] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}.".format(entity_input)
                    )
            else:
                data_filter['data__in'] = self.parents.all()  # pylint: disable=no-member

            entity_query = Entity.objects.filter(type=entity_type, **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema
                ).latest()
                entity = Entity.objects.create(
                    contributor=self.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=self.name,
                    tags=self.tags,
                )
                assign_contributor_permissions(entity)

            elif entity_count == 1:
                entity = entity_query.first()
                copy_permissions(entity, self)

            else:
                logger.info("Skipping creation of entity due to multiple entities found.")
                entity = None

            if entity:
                entity.data.add(self)
                # Inherit collections from entity.
                for collection in entity.collections.all():
                    collection.data.add(self)
示例#29
0
 def test_copy_perms_wrong_ctype(self):
     with self.assertRaises(AssertionError):
         copy_permissions(self.src_process, self.collection)
示例#30
0
    def evaluate(self, data):
        """Evaluate the code needed to compute a given Data object."""
        expression_engine = data.process.requirements.get('expression-engine', None)
        if expression_engine is not None:
            expression_engine = self.get_expression_engine(expression_engine)

        # Parse steps.
        steps = data.process.run.get('program', None)
        if steps is None:
            return

        if not isinstance(steps, list):
            raise ExecutionError('Workflow program must be a list of steps.')

        # Expression engine evaluation context.
        context = {
            'input': data.input,
            'steps': collections.OrderedDict(),
        }

        for index, step in enumerate(steps):
            try:
                step_id = step['id']
                step_slug = step['run']
            except KeyError as error:
                raise ExecutionError('Incorrect definition of step "{}", missing property "{}".'.format(
                    step.get('id', index), error
                ))

            # Fetch target process.
            process = Process.objects.filter(slug=step_slug).order_by('-version').first()
            if not process:
                raise ExecutionError('Incorrect definition of step "{}", invalid process "{}".'.format(
                    step_id, step_slug
                ))

            # Process all input variables.
            step_input = step.get('input', {})
            if not isinstance(step_input, dict):
                raise ExecutionError('Incorrect definition of step "{}", input must be a dictionary.'.format(
                    step_id
                ))

            data_input = self._evaluate_expressions(expression_engine, step_id, step_input, context)

            # Create the data object.
            data_object = Data.objects.create(
                process=process,
                contributor=data.contributor,
                input=data_input,
            )
            DataDependency.objects.create(
                parent=data,
                child=data_object,
                kind=DataDependency.KIND_SUBPROCESS,
            )

            # Copy permissions.
            copy_permissions(data, data_object)

            # Copy collections.
            for collection in data.collection_set.all():
                collection.data.add(data_object)

            context['steps'][step_id] = data_object.pk

        # Immediately set our status to done and output all data object identifiers.
        data.output = {
            'steps': list(context['steps'].values()),
        }
        data.status = Data.STATUS_DONE
示例#31
0
    def run(self, data_id, script, verbosity=1):
        """Execute the script and save results."""
        if verbosity >= 1:
            print('RUN: {} {}'.format(data_id, script))

        self.data_id = data_id
        self.process_failed = False

        # Fetch data instance to get any executor requirements.
        self.process = Data.objects.get(pk=data_id).process
        requirements = self.process.requirements
        self.requirements = requirements.get('executor', {}).get(self.name, {})
        self.resources = requirements.get('resources', {})

        data_dir = settings.FLOW_EXECUTOR['DATA_DIR']
        dir_mode = getattr(settings, 'FLOW_EXECUTOR',
                           {}).get('DATA_DIR_MODE', 0o755)

        output_path = os.path.join(data_dir, str(data_id))

        os.mkdir(output_path)
        # os.mkdir is not guaranteed to set the given mode
        os.chmod(output_path, dir_mode)
        os.chdir(output_path)

        log_file = open('stdout.txt', 'w+')
        json_file = open('jsonout.txt', 'w+')

        proc_pid = self.start()

        self.update_data_status(status=Data.STATUS_PROCESSING,
                                started=now(),
                                process_pid=proc_pid)

        # Run processor and handle intermediate results
        self.run_script(script)
        spawn_processors = []
        output = {}
        process_error, process_warning, process_info = [], [], []
        process_progress, process_rc = 0, 0

        # read processor output
        try:
            stdout = self.get_stdout()
            while True:
                line = stdout.readline()
                if not line:
                    break

                try:
                    if line.strip().startswith('run'):
                        # Save processor and spawn if no errors
                        log_file.write(line)
                        log_file.flush()

                        for obj in iterjson(line[3:].strip()):
                            spawn_processors.append(obj)
                    elif line.strip().startswith('export'):
                        file_name = line[6:].strip()

                        export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR']
                        unique_name = 'export_{}'.format(uuid.uuid4().hex)
                        export_path = os.path.join(export_folder, unique_name)

                        self.exported_files_mapper[
                            self.data_id][file_name] = unique_name

                        shutil.move(file_name, export_path)
                    else:
                        # If JSON, save to MongoDB
                        updates = {}
                        for obj in iterjson(line):
                            for key, val in six.iteritems(obj):
                                if key.startswith('proc.'):
                                    if key == 'proc.error':
                                        process_error.append(val)
                                        if not process_rc:
                                            process_rc = 1
                                            updates['process_rc'] = process_rc
                                        updates[
                                            'process_error'] = process_error
                                        updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.warning':
                                        process_warning.append(val)
                                        updates[
                                            'process_warning'] = process_warning
                                    elif key == 'proc.info':
                                        process_info.append(val)
                                        updates['process_info'] = process_info
                                    elif key == 'proc.rc':
                                        process_rc = int(val)
                                        updates['process_rc'] = process_rc
                                        if process_rc != 0:
                                            updates[
                                                'status'] = Data.STATUS_ERROR
                                    elif key == 'proc.progress':
                                        process_progress = int(
                                            float(val) * 100)
                                        updates[
                                            'process_progress'] = process_progress
                                else:
                                    dict_dot(output, key, val)
                                    updates['output'] = output

                        if updates:
                            updates['modified'] = now()
                            self.update_data_status(**updates)

                        if process_rc > 0:
                            log_file.close()
                            json_file.close()
                            os.chdir(CWD)
                            return

                        # Debug output
                        # Not referenced in Data object
                        json_file.write(line)
                        json_file.flush()

                except ValueError as ex:
                    # Ignore if not JSON
                    log_file.write(line)
                    log_file.flush()

        except MemoryError as ex:
            logger.error(__("Out of memory: {}", ex))

        except IOError as ex:
            # TODO: if ex.errno == 28: no more free space
            raise ex
        finally:
            # Store results
            log_file.close()
            json_file.close()
            os.chdir(CWD)

        return_code = self.end()

        if process_rc < return_code:
            process_rc = return_code

        # This transaction is needed to make sure that processing of
        # current data object is finished before manager for spawned
        # processes is triggered.
        with transaction.atomic():
            if spawn_processors and process_rc == 0:
                parent_data = Data.objects.get(pk=self.data_id)

                # Spawn processors
                for d in spawn_processors:
                    d['contributor'] = parent_data.contributor
                    d['process'] = Process.objects.filter(
                        slug=d['process']).latest()

                    for field_schema, fields in iterate_fields(
                            d.get('input', {}), d['process'].input_schema):
                        type_ = field_schema['type']
                        name = field_schema['name']
                        value = fields[name]

                        if type_ == 'basic:file:':
                            fields[name] = self.hydrate_spawned_files(
                                value, data_id)
                        elif type_ == 'list:basic:file:':
                            fields[name] = [
                                self.hydrate_spawned_files(fn, data_id)
                                for fn in value
                            ]

                    with transaction.atomic():
                        d = Data.objects.create(**d)
                        DataDependency.objects.create(
                            parent=parent_data,
                            child=d,
                            kind=DataDependency.KIND_SUBPROCESS,
                        )

                        # Copy permissions.
                        copy_permissions(parent_data, d)

                        # Entity is added to the collection only when it is
                        # created - when it only contains 1 Data object.
                        entities = Entity.objects.filter(data=d).annotate(
                            num_data=Count('data')).filter(num_data=1)

                        # Copy collections.
                        for collection in parent_data.collection_set.all():
                            collection.data.add(d)

                            # Add entities to which data belongs to the collection.
                            for entity in entities:
                                entity.collections.add(collection)

            if process_rc == 0 and not self.process_failed:
                self.update_data_status(status=Data.STATUS_DONE,
                                        process_progress=100,
                                        finished=now())
            else:
                self.update_data_status(status=Data.STATUS_ERROR,
                                        process_progress=100,
                                        process_rc=process_rc,
                                        finished=now())

            try:
                # Cleanup after processor
                data_purge(data_ids=[data_id],
                           delete=True,
                           verbosity=verbosity)
            except:  # pylint: disable=bare-except
                logger.error(__("Purge error:\n\n{}", traceback.format_exc()))
示例#32
0
    def handle_finish(self, obj):
        """Handle an incoming ``Data`` finished processing request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'finish',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data` object
                               this command changes],
                    'process_rc': [exit status of the processing]
                    'spawn_processes': [optional; list of spawn dictionaries],
                    'exported_files_mapper': [if spawn_processes present]
                }
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        logger.debug(__("Finishing Data with id {} (handle_finish).", data_id),
                     extra={
                         'data_id': data_id,
                         'packet': obj
                     })

        with transaction.atomic():
            # Spawn any new jobs in the request.
            spawned = False
            if ExecutorProtocol.FINISH_SPAWN_PROCESSES in obj:
                if is_testing():
                    # NOTE: This is a work-around for Django issue #10827
                    # (https://code.djangoproject.com/ticket/10827), same as in
                    # TestCaseHelpers._pre_setup(). Because the listener is running
                    # independently, it must clear the cache on its own.
                    ContentType.objects.clear_cache()

                spawned = True
                exported_files_mapper = obj[
                    ExecutorProtocol.FINISH_EXPORTED_FILES]
                logger.debug(__(
                    "Spawning new Data objects for Data with id {} (handle_finish).",
                    data_id),
                             extra={'data_id': data_id})

                try:
                    # This transaction is needed because we're running
                    # asynchronously with respect to the main Django code
                    # here; the manager can get nudged from elsewhere.
                    with transaction.atomic():
                        parent_data = Data.objects.get(pk=data_id)

                        # Spawn processes.
                        for d in obj[ExecutorProtocol.FINISH_SPAWN_PROCESSES]:
                            d['contributor'] = parent_data.contributor
                            d['process'] = Process.objects.filter(
                                slug=d['process']).latest()

                            for field_schema, fields in iterate_fields(
                                    d.get('input', {}),
                                    d['process'].input_schema):
                                type_ = field_schema['type']
                                name = field_schema['name']
                                value = fields[name]

                                if type_ == 'basic:file:':
                                    fields[name] = self.hydrate_spawned_files(
                                        exported_files_mapper, value, data_id)
                                elif type_ == 'list:basic:file:':
                                    fields[name] = [
                                        self.hydrate_spawned_files(
                                            exported_files_mapper, fn, data_id)
                                        for fn in value
                                    ]

                            with transaction.atomic():
                                d = Data.objects.create(**d)
                                DataDependency.objects.create(
                                    parent=parent_data,
                                    child=d,
                                    kind=DataDependency.KIND_SUBPROCESS,
                                )

                                # Copy permissions.
                                copy_permissions(parent_data, d)

                                # Entity is added to the collection only when it is
                                # created - when it only contains 1 Data object.
                                entities = Entity.objects.filter(
                                    data=d).annotate(
                                        num_data=Count('data')).filter(
                                            num_data=1)

                                # Copy collections.
                                for collection in parent_data.collection_set.all(
                                ):
                                    collection.data.add(d)

                                    # Add entities to which data belongs to the collection.
                                    for entity in entities:
                                        entity.collections.add(collection)

                except Exception:  # pylint: disable=broad-except
                    logger.error(__(
                        "Error while preparing spawned Data objects of process '{}' (handle_finish):\n\n{}",
                        parent_data.process.slug, traceback.format_exc()),
                                 extra={'data_id': data_id})

            # Data wrap up happens last, so that any triggered signals
            # already see the spawned children. What the children themselves
            # see is guaranteed by the transaction we're in.
            if ExecutorProtocol.FINISH_PROCESS_RC in obj:
                process_rc = obj[ExecutorProtocol.FINISH_PROCESS_RC]

                try:
                    d = Data.objects.get(pk=data_id)
                except Data.DoesNotExist:
                    logger.warning(
                        "Data object does not exist (handle_finish).",
                        extra={
                            'data_id': data_id,
                        })
                    async_to_sync(self._send_reply)(obj, {
                        ExecutorProtocol.RESULT:
                        ExecutorProtocol.RESULT_ERROR
                    })
                    return

                if process_rc == 0 and not d.status == Data.STATUS_ERROR:
                    changeset = {
                        'status': Data.STATUS_DONE,
                        'process_progress': 100,
                        'finished': now()
                    }
                else:
                    changeset = {
                        'status': Data.STATUS_ERROR,
                        'process_progress': 100,
                        'process_rc': process_rc,
                        'finished': now()
                    }
                obj[ExecutorProtocol.UPDATE_CHANGESET] = changeset
                self.handle_update(obj, internal_call=True)

                if not getattr(settings, 'FLOW_MANAGER_KEEP_DATA', False):
                    try:
                        # Clean up after process
                        data_purge(data_ids=[data_id],
                                   delete=True,
                                   verbosity=self._verbosity)
                    except Exception:  # pylint: disable=broad-except
                        logger.error(__("Purge error:\n\n{}",
                                        traceback.format_exc()),
                                     extra={'data_id': data_id})

        # Notify the executor that we're done.
        async_to_sync(self._send_reply)(
            obj, {
                ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK
            })

        # Now nudge the main manager to perform final cleanup. This is
        # needed even if there was no spawn baggage, since the manager
        # may need to know when executors have finished, to keep count
        # of them and manage synchronization.
        async_to_sync(consumer.send_event)({
            WorkerProtocol.COMMAND:
            WorkerProtocol.FINISH,
            WorkerProtocol.DATA_ID:
            data_id,
            WorkerProtocol.FINISH_SPAWNED:
            spawned,
            WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                'executor':
                getattr(settings, 'FLOW_EXECUTOR',
                        {}).get('NAME', 'resolwe.flow.executors.local'),
            },
        })
示例#33
0
    def handle_finish(self, obj):
        """Handle an incoming ``Data`` finished processing request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'finish',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data` object
                               this command changes],
                    'process_rc': [exit status of the processing]
                    'spawn_processes': [optional; list of spawn dictionaries],
                    'exported_files_mapper': [if spawn_processes present]
                }
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        logger.debug(
            __("Finishing Data with id {} (handle_finish).", data_id),
            extra={
                'data_id': data_id,
                'packet': obj
            }
        )
        spawning_failed = False
        with transaction.atomic():
            # Spawn any new jobs in the request.
            spawned = False
            if ExecutorProtocol.FINISH_SPAWN_PROCESSES in obj:
                if is_testing():
                    # NOTE: This is a work-around for Django issue #10827
                    # (https://code.djangoproject.com/ticket/10827), same as in
                    # TestCaseHelpers._pre_setup(). Because the listener is running
                    # independently, it must clear the cache on its own.
                    ContentType.objects.clear_cache()

                spawned = True
                exported_files_mapper = obj[ExecutorProtocol.FINISH_EXPORTED_FILES]
                logger.debug(
                    __("Spawning new Data objects for Data with id {} (handle_finish).", data_id),
                    extra={
                        'data_id': data_id
                    }
                )

                try:
                    # This transaction is needed because we're running
                    # asynchronously with respect to the main Django code
                    # here; the manager can get nudged from elsewhere.
                    with transaction.atomic():
                        parent_data = Data.objects.get(pk=data_id)

                        # Spawn processes.
                        for d in obj[ExecutorProtocol.FINISH_SPAWN_PROCESSES]:
                            d['contributor'] = parent_data.contributor
                            d['process'] = Process.objects.filter(slug=d['process']).latest()
                            d['tags'] = parent_data.tags

                            for field_schema, fields in iterate_fields(d.get('input', {}), d['process'].input_schema):
                                type_ = field_schema['type']
                                name = field_schema['name']
                                value = fields[name]

                                if type_ == 'basic:file:':
                                    fields[name] = self.hydrate_spawned_files(
                                        exported_files_mapper, value, data_id
                                    )
                                elif type_ == 'list:basic:file:':
                                    fields[name] = [self.hydrate_spawned_files(exported_files_mapper, fn, data_id)
                                                    for fn in value]

                            with transaction.atomic():
                                d = Data.objects.create(**d)
                                DataDependency.objects.create(
                                    parent=parent_data,
                                    child=d,
                                    kind=DataDependency.KIND_SUBPROCESS,
                                )

                                # Copy permissions.
                                copy_permissions(parent_data, d)

                                # Entity is added to the collection only when it is
                                # created - when it only contains 1 Data object.
                                entities = Entity.objects.filter(data=d).annotate(num_data=Count('data')).filter(
                                    num_data=1)

                                # Copy collections.
                                for collection in parent_data.collection_set.all():
                                    collection.data.add(d)

                                    # Add entities to which data belongs to the collection.
                                    for entity in entities:
                                        entity.collections.add(collection)

                except Exception:  # pylint: disable=broad-except
                    logger.error(
                        __(
                            "Error while preparing spawned Data objects of process '{}' (handle_finish):\n\n{}",
                            parent_data.process.slug,
                            traceback.format_exc()
                        ),
                        extra={
                            'data_id': data_id
                        }
                    )
                    spawning_failed = True

            # Data wrap up happens last, so that any triggered signals
            # already see the spawned children. What the children themselves
            # see is guaranteed by the transaction we're in.
            if ExecutorProtocol.FINISH_PROCESS_RC in obj:
                process_rc = obj[ExecutorProtocol.FINISH_PROCESS_RC]

                try:
                    d = Data.objects.get(pk=data_id)
                except Data.DoesNotExist:
                    logger.warning(
                        "Data object does not exist (handle_finish).",
                        extra={
                            'data_id': data_id,
                        }
                    )
                    async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR})
                    return

                changeset = {
                    'process_progress': 100,
                    'finished': now(),
                }

                if spawning_failed:
                    changeset['status'] = Data.STATUS_ERROR
                    changeset['process_error'] = ["Error while preparing spawned Data objects"]

                elif process_rc == 0 and not d.status == Data.STATUS_ERROR:
                    changeset['status'] = Data.STATUS_DONE

                else:
                    changeset['status'] = Data.STATUS_ERROR
                    changeset['process_rc'] = process_rc

                obj[ExecutorProtocol.UPDATE_CHANGESET] = changeset
                self.handle_update(obj, internal_call=True)

        if not getattr(settings, 'FLOW_MANAGER_KEEP_DATA', False):
            # Purge worker is not running in test runner, so we should skip triggering it.
            if not is_testing():
                channel_layer = get_channel_layer()
                try:
                    async_to_sync(channel_layer.send)(
                        CHANNEL_PURGE_WORKER,
                        {
                            'type': TYPE_PURGE_RUN,
                            'location_id': d.location.id,
                            'verbosity': self._verbosity,
                        }
                    )
                except ChannelFull:
                    logger.warning(
                        "Cannot trigger purge because channel is full.",
                        extra={'data_id': data_id}
                    )

        # Notify the executor that we're done.
        async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK})

        # Now nudge the main manager to perform final cleanup. This is
        # needed even if there was no spawn baggage, since the manager
        # may need to know when executors have finished, to keep count
        # of them and manage synchronization.
        async_to_sync(consumer.send_event)({
            WorkerProtocol.COMMAND: WorkerProtocol.FINISH,
            WorkerProtocol.DATA_ID: data_id,
            WorkerProtocol.FINISH_SPAWNED: spawned,
            WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                'executor': getattr(settings, 'FLOW_EXECUTOR', {}).get('NAME', 'resolwe.flow.executors.local'),
            },
        })