def test_checksum_consistency(self): process = Process(version='1.0.0', slug='my-process') data = Data() data.input = {'tss': 0, 'genome': 'HG19'} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c') data.input = {'genome': 'HG19', 'tss': 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
def test_checksum_consistency(self): process = Process(version='1.0.0', slug='my-process') data = Data() data.input = {'tss': 0, 'genome': 'HG19'} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c') data.input = {'genome': 'HG19', 'tss': 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
def test_checksum_consistency(self): process = Process(version="1.0.0", slug="my-process") data = Data() data.input = {"tss": 0, "genome": "HG19"} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c") data.input = {"genome": "HG19", "tss": 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c")
def save(self, render_name=False, *args, **kwargs): # pylint: disable=keyword-arg-before-vararg """Save the data model.""" if self.name != self._original_name: self.named_by_user = True create = self.pk is None if create: fill_with_defaults(self.input, self.process.input_schema) # pylint: disable=no-member if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum( self.input, self.process.slug, self.process.version) # pylint: disable=no-member elif render_name: self._render_name() self.save_storage(self.output, self.process.output_schema) # pylint: disable=no-member if self.status != Data.STATUS_ERROR: hydrate_size(self) # If only specified fields are updated (e.g. in executor), size needs to be added if 'update_fields' in kwargs: kwargs['update_fields'].append('size') # Input Data objects are validated only upon creation as they can be deleted later. skip_missing_data = not create validate_schema( self.input, self.process.input_schema, skip_missing_data=skip_missing_data # pylint: disable=no-member ) render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) # pylint: disable=no-member self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError("`descriptor_schema` must be defined if `descriptor` is given") if self.status != Data.STATUS_ERROR: output_schema = self.process.output_schema # pylint: disable=no-member if self.status == Data.STATUS_DONE: validate_schema( self.output, output_schema, data_location=self.location, skip_missing_data=True ) else: validate_schema( self.output, output_schema, data_location=self.location, test_required=False ) with transaction.atomic(): self._perform_save(*args, **kwargs)
def save(self, render_name=False, *args, **kwargs): """Save the data model.""" if self.name != self._original_name: self.named_by_user = True try: jsonschema.validate( self.process_resources, validation_schema("process_resources") ) except jsonschema.exceptions.ValidationError as exception: # Re-raise as Django ValidationError raise ValidationError(exception.message) create = self.pk is None if create: fill_with_defaults(self.input, self.process.input_schema) if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum( self.input, self.process.slug, self.process.version ) validate_schema(self.input, self.process.input_schema) hydrate_size(self) # If only specified fields are updated (e.g. in executor), size needs to be added if "update_fields" in kwargs: kwargs["update_fields"].append("size") elif render_name: self._render_name() render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError( "`descriptor_schema` must be defined if `descriptor` is given" ) with transaction.atomic(): self._perform_save(*args, **kwargs) self._original_output = self.output
def perform_get_or_create(self, request, *args, **kwargs): """Perform "get_or_create" - return existing object if found.""" serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) process = serializer.validated_data.get('process') process_input = request.data.get('input', {}) fill_with_defaults(process_input, process.input_schema) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data)
def save(self, render_name=False, *args, **kwargs): # pylint: disable=keyword-arg-before-vararg """Save the data model.""" # Generate the descriptor if one is not already set. if self.name != self._original_name: self.named_by_user = True create = self.pk is None if create: fill_with_defaults(self.input, self.process.input_schema) # pylint: disable=no-member if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum(self.input, self.process.slug, self.process.version) # pylint: disable=no-member elif render_name: self._render_name() self.save_storage(self.output, self.process.output_schema) # pylint: disable=no-member if self.status != Data.STATUS_ERROR: hydrate_size(self) # If only specified fields are updated (e.g. in executor), size needs to be added if 'update_fields' in kwargs: kwargs['update_fields'].append('size') # Input Data objects are validated only upon creation as they can be deleted later. skip_missing_data = not create validate_schema( self.input, self.process.input_schema, skip_missing_data=skip_missing_data # pylint: disable=no-member ) render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) # pylint: disable=no-member self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError( "`descriptor_schema` must be defined if `descriptor` is given") if self.status != Data.STATUS_ERROR: path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(self.pk)) output_schema = self.process.output_schema # pylint: disable=no-member if self.status == Data.STATUS_DONE: validate_schema(self.output, output_schema, path_prefix=path_prefix) else: validate_schema(self.output, output_schema, path_prefix=path_prefix, test_required=False) with transaction.atomic(): super().save(*args, **kwargs) # We can only save dependencies after the data object has been saved. This # is why a transaction block is needed and the save method must be called first. if create: self.save_dependencies(self.input, self.process.input_schema) # pylint: disable=no-member self.create_entity()
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response( { 'collections': [ 'Invalid pk "{}" - object does not exist.'.format( collection_id) ] }, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.has_perm('view_collection', obj=collection): raise exceptions.PermissionDenied( "You don't have `ADD` permission on collection (id: {})." .format(collection_id)) else: raise exceptions.NotFound( "Collection not found (id: {}).".format(collection_id)) # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug) process_query = get_objects_for_user(request.user, 'view_process', process_query) try: process = process_query.latest() except Process.DoesNotExist: return Response( { 'process': [ 'Invalid process slug "{}" - object does not exist.'. format(process_slug) ] }, status=status.HTTP_400_BAD_REQUEST) request.data['process'] = process.pk # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema( process_input, process.input_schema): if 'default' in field_schema and field_schema[ 'name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[ Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP ], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(DataViewSet, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp
def save(self, render_name=False, *args, **kwargs): """Save the data model.""" # Generate the descriptor if one is not already set. if self.name != self._original_name: self.named_by_user = True create = self.pk is None if create: # Default values for INPUT input_schema = self.process.input_schema # pylint: disable=no-member for field_schema, fields, path in iterate_schema( self.input, input_schema): if 'default' in field_schema and field_schema[ 'name'] not in fields: dict_dot(self.input, path, field_schema['default']) if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum(self.input, self.process.slug, self.process.version) # pylint: disable=no-member elif render_name: self._render_name() self.save_storage(self.output, self.process.output_schema) # pylint: disable=no-member if self.status != Data.STATUS_ERROR: hydrate_size(self) if create: validate_schema(self.input, self.process.input_schema) # pylint: disable=no-member render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) # pylint: disable=no-member self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError( "`descriptor_schema` must be defined if `descriptor` is given") if self.status != Data.STATUS_ERROR: path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(self.pk)) output_schema = self.process.output_schema # pylint: disable=no-member if self.status == Data.STATUS_DONE: validate_schema(self.output, output_schema, path_prefix=path_prefix) else: validate_schema(self.output, output_schema, path_prefix=path_prefix, test_required=False) with transaction.atomic(): super(Data, self).save(*args, **kwargs) # We can only save dependencies after the data object has been saved. This # is why a transaction block is needed and the save method must be called first. if create: self.save_dependencies(self.input, self.process.input_schema) # pylint: disable=no-member if create: self.create_entity()
def calculate_checksum(apps, schema_editor): Data = apps.get_model("flow", "Data") for data in Data.objects.all(): data.checksum = get_data_checksum(data.input, data.process.slug, data.process.version) data.save()
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response({'collections': ['Invalid pk "{}" - object does not exist.'.format(collection_id)]}, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug).order_by('version') if not process_query.exists(): # XXX: security - is it ok to reveal which processes (don't) exist? return Response({'process': ['Invalid process slug "{}" - object does not exist.'.format(process_slug)]}, status=status.HTTP_400_BAD_REQUEST) process = process_query.last() request.data['process'] = process.pk # check that user has permission on the process if not request.user.has_perm('view_process', obj=process): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema(process_input, process.input_schema): if 'default' in field_schema and field_schema['name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(ResolweCreateDataModelMixin, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp
def save(self, render_name=False, *args, **kwargs): # pylint: disable=keyword-arg-before-vararg """Save the data model.""" if self.name != self._original_name: self.named_by_user = True create = self.pk is None if create: fill_with_defaults(self.input, self.process.input_schema) # pylint: disable=no-member if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum( self.input, self.process.slug, self.process.version) # pylint: disable=no-member elif render_name: self._render_name() self.save_storage(self.output, self.process.output_schema) # pylint: disable=no-member if self.status != Data.STATUS_ERROR: hydrate_size(self) # If only specified fields are updated (e.g. in executor), size needs to be added if 'update_fields' in kwargs: kwargs['update_fields'].append('size') # Input Data objects are validated only upon creation as they can be deleted later. skip_missing_data = not create validate_schema( self.input, self.process.input_schema, skip_missing_data=skip_missing_data # pylint: disable=no-member ) render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) # pylint: disable=no-member self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError("`descriptor_schema` must be defined if `descriptor` is given") if self.status != Data.STATUS_ERROR: output_schema = self.process.output_schema # pylint: disable=no-member if self.status == Data.STATUS_DONE: validate_schema( self.output, output_schema, data_location=self.location, skip_missing_data=True ) else: validate_schema( self.output, output_schema, data_location=self.location, test_required=False ) with transaction.atomic(): self._perform_save(*args, **kwargs) # We can only save dependencies after the data object has been saved. This # is why a transaction block is needed and the save method must be called first. if create: self.save_dependencies(self.input, self.process.input_schema) # pylint: disable=no-member self.create_entity()