def testGetByValue_file(self): do = DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result') file_identifiers = [ filename_1, '$%s' % md5_1, '@%s' % do.uuid, '%s@%s' % (filename_1, do.uuid), '%s$%s' % (filename_1, md5_1), '%s$%s@%s' % (filename_1, md5_1, do.uuid), '$%s@%s' % (md5_1, do.uuid), ] for identifier in file_identifiers: retrieved_do = DataObject.get_by_value(identifier, 'file') self.assertEqual(do.uuid, retrieved_do.uuid)
def testCalculateContentsFingerprint_integer(self): contents = {'type': 'integer', 'value':17} do = DataObject.get_by_value(contents['value'], contents['type']) self.assertEqual( do.calculate_contents_fingerprint(), calculate_contents_fingerprint(contents))
def _create_file_data_object(): data_object = DataObject.create_and_initialize_file_resource( filename='myfile.dat', source_type='imported', imported_from_url='file:///data/myfile.dat', import_comments='Test data', md5='081deeb1218a094526005f5f00ffd0a1' ) return data_object
def _create_file_data_object_2(): data_object = DataObject.create_and_initialize_file_resource( filename='myfile.dat', source_type='imported', imported_from_url='file:///data/myfile.dat', import_comments='Test data', md5='4175ed4ee06b828ff008949e28a61bf2' ) return data_object
def testUpdateUploadStatus(self): file_data = fixtures.data_objects.file_data_object['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) s.save() s2 = DataObjectUpdateSerializer(data_object) s2.update( data_object, {'value': {'upload_status': 'error'}}) self.assertEqual(s2.data['value']['upload_status'], 'error')
def testUpdateProtectedValue(self): file_data = fixtures.data_objects.file_data_object['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) s.save() s2 = DataObjectUpdateSerializer(data_object) with self.assertRaises(ValidationError): s2.update( data_object, {'type': 'string'})
def _process_preexisting_and_unsaved_data_nodes(self): # All DataNodes are in self._unsaved_data_nodes, but # we need to remove any of these that are preexisting. data_node_uuids = self._unsaved_root_data_nodes.keys() preexisting_data_nodes = DataNode.objects.filter(uuid__in=data_node_uuids) for node in preexisting_data_nodes: self._preexisting_data_nodes[node.uuid] = node self._unsaved_root_data_nodes.pop(node.uuid) # This steps converts from a root node with tree data cached (if parallel) # to nested nodes, each with cached contents for one scalar data object for data_node in self._unsaved_root_data_nodes.values(): self._expand_data_node_contents(data_node, data_node._cached_contents) file_references = {} for data_node in self._unsaved_data_nodes.values(): # Process DataObject contents = data_node._cached_expanded_contents if not contents: continue if isinstance(contents, dict): data_node.data_object = self._create_unsaved_data_object(contents) self._unsaved_data_objects[ data_node.data_object.uuid] = data_node.data_object elif data_node.type == 'file': nodes_with_this_reference = file_references.setdefault(contents, []) nodes_with_this_reference.append(data_node) else: # For non-file values, create a new node data_node.data_object = DataObject( type=data_node.type, data={'value': contents}) self._unsaved_data_objects[ data_node.data_object.uuid] = data_node.data_object file_lookups = DataObject.filter_multiple_by_name_or_id_or_tag_or_hash( file_references.keys()) for reference, data_objects in file_lookups.iteritems(): if len(data_objects) == 0: raise serializers.ValidationError( 'No file found for reference "%s"' % reference) if len(data_objects) > 1: raise serializers.ValidationError( 'Found %s files for reference "%s", expected 1.' % (len(dataobjects), reference)) for data_node in file_references[reference]: data_node.data_object = data_objects[0] self._preexisting_data_objects[ data_objects[0].uuid] = data_objects[0] data_object_uuids = self._unsaved_data_objects.keys() preexisting_data_objects = DataObject.objects.filter( uuid__in=data_object_uuids) for data_object in preexisting_data_objects: self._preexisting_data_objects[data_object.uuid] = data_object self._unsaved_data_objects.pop(data_object.uuid)
def testRender_file(self): file_data = fixtures.data_objects.file_data_object['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) rendered_data = s.data value = rendered_data['value'] self.assertEqual(value['filename'], file_data['filename']) self.assertEqual(value['md5'], file_data['md5']) self.assertEqual(value['source_type'], file_data['source_type']) self.assertEqual(value['import_comments'], file_data['import_comments']) self.assertEqual(value['imported_from_url'], file_data['imported_from_url'])
def testCreateAndInitializeFileResource_import(self): imported_from_url = 'file:///data/'+filename_1 comments = 'Test data' do = DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result', imported_from_url=imported_from_url, import_comments=comments) self.assertEqual(do.file_resource.md5, md5_1) self.assertEqual(do.file_resource.upload_status, 'incomplete') self.assertTrue('work' in do.file_resource.file_url) self.assertEqual(do.file_resource.import_comments, comments) self.assertEqual( do.file_resource.imported_from_url, imported_from_url)
def testCalculateContentsFingerprint_file(self): contents = { 'type': 'file', 'value': { 'md5': md5_1, 'filename': filename_1 } } do = DataObject.create_and_initialize_file_resource( filename=contents['value']['filename'], md5=contents['value']['md5'], source_type='result') self.assertEqual(do.calculate_contents_fingerprint(), calculate_contents_fingerprint(contents))
def testGetByValue_twoMatches(self): do = DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result') retrieved_do = DataObject.get_by_value(filename_1, 'file') self.assertEqual(do.uuid, retrieved_do.uuid) DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result') with self.assertRaises(ValidationError): DataObject.get_by_value(filename_1, 'file')
def testCreate_AlreadyExists(self): file_data = copy.deepcopy(fixtures.data_objects.file_data_object)['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) rendered_1 = s.data data_object_count_before = DataObject.objects.count() s = DataObjectSerializer(data=rendered_1) s.is_valid(raise_exception=True) data_object = s.save() # Verify that no new object was created data_object_count_after = DataObject.objects.count() self.assertEqual(data_object_count_before, data_object_count_after)
def testCreate_ErrorAlreadyExistsWithMismatch(self): file_data = copy.deepcopy(fixtures.data_objects.file_data_object)['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) data_object_count_before = DataObject.objects.count() rendered_1 = s.data rendered_1['value']['md5'] = '192f08c86f675deca469ea50ffac38e0' s = DataObjectSerializer(data=rendered_1) with self.assertRaises(ValidationError): s.is_valid(raise_exception=True) data_object = s.save() # Verify that no new object was created data_object_count_after = DataObject.objects.count() self.assertEqual(data_object_count_before, data_object_count_after)
def testRoundTrip_file(self): file_data = fixtures.data_objects.file_data_object['value'] data_object = DataObject.create_and_initialize_file_resource(**file_data) s = DataObjectSerializer(data_object, context=get_mock_context()) rendered_1 = s.data # update UUID to avoid collision input_2 = copy.deepcopy(rendered_1) input_2['uuid'] = str(uuid.uuid4()) s = DataObjectSerializer(data=input_2) s.is_valid(raise_exception=True) data_object = s.save() s = DataObjectSerializer(data_object, context=get_mock_context()) rendered_2 = s.data self.assertEqual(rendered_1['type'], rendered_2['type']) self.assertEqual(rendered_1['datetime_created'], rendered_2['datetime_created']) self.assertNotEqual(rendered_1['uuid'], rendered_2['uuid']) self.assertNotEqual(rendered_1['url'], rendered_2['url']) self.assertEqual(rendered_1['value']['filename'], rendered_2['value']['filename']) self.assertEqual(rendered_1['value']['md5'], rendered_2['value']['md5']) self.assertEqual(rendered_1['value']['import_comments'], rendered_2['value']['import_comments']) self.assertEqual(rendered_1['value']['imported_from_url'], rendered_2['value']['imported_from_url']) self.assertEqual(rendered_1['value']['upload_status'], rendered_2['value']['upload_status']) self.assertEqual(rendered_1['value']['source_type'], rendered_2['value']['source_type']) self.assertEqual(rendered_1['value']['file_url'], rendered_2['value']['file_url'])
def _validate_and_cache_file(self, data): value = data.get('_value_info') if value is not None and not isinstance(value, dict): # If it's a string, treat it as a data_object identifier and # look it up. data_objects = DataObject.filter_by_name_or_id_or_tag_or_hash(value) if data_objects.count() == 0: raise serializers.ValidationError( 'No matching DataObject found for "%s"' % value) elif data_objects.count() > 1: raise serializers.ValidationError( 'Multiple matching DataObjects found for "%s"' % value) self._cached_data_object = data_objects.first() self._do_create_new_data_object = False else: if data.get('uuid'): try: self._cached_data_object = DataObject.objects.get( uuid=data.get('uuid')) self._do_create_new_data_object=False self._verify_data_object_matches_data( self._cached_data_object, data) return data except DataObject.DoesNotExist: # Create new, with given UUID data.pop('_value_info') self._cached_data_object = self.Meta.model(**data) self._do_create_new_data_object = True else: # Otherwise, create new. data.pop('_value_info') self._cached_data_object = self.Meta.model(**data) self._do_create_new_data_object = True try: self._cached_data_object.full_clean() except django.core.exceptions.ValidationError as e: raise serializers.ValidationError(e.message_dict) return data
def _extend_all_paths_and_add_data_at_leaves( self, data_node, contents, path, data_type): # Recursive function that extends 'path' until reaching a leaf node, # where data is finally added. # 'path' is the partial path to some intermediate # node, while 'contents' is the representation of all branches and # leaves beyond that path. # For example, given path (0,2) and contents [['10','20']['30','40']], # the function places all data under (0,2) and ignores the other # root-level branch (1,2). # The function adds these four data objects at # their corresponding paths: # 10 at [(0,2), (0,2), (0,2)] # 20 at [(0,2), (0,2), (1,2)] # 30 at [(0,2), (1,2), (0,2)] # 40 at [(0,2), (1,2), (1,2)] if not isinstance(contents, list): if isinstance(contents, dict): s = DataObjectSerializer(data=contents, context=self.context) s.is_valid(raise_exception=True) data_object = s.save() else: data_object = DataObject.get_by_value( contents, data_type) data_node.add_data_object(path, data_object, save=True) return elif len(contents) == 0: # An empty list represents a node of degree 0 data_node.setattrs_and_save_with_retries({ 'degree': 0}) else: for i in range(len(contents)): path_i = copy.deepcopy(path) path_i.append((i, len(contents))) self._extend_all_paths_and_add_data_at_leaves( data_node, contents[i], path_i, data_type)
def testGetByValue_invalidValue(self): for (type, invalid_value) in self.INVALID_VALUE_SETS: with self.assertRaises(ValidationError): if invalid_value is not None: DataObject.get_by_value(invalid_value, type)
def testGetByValue(self): for (type, value) in self.VALUE_SETS: do = DataObject.get_by_value(value, type) self.assertEqual(do.value, value)
def testSubstitutionValue(self): for (type, value) in self.VALUE_SETS: do = DataObject.get_by_value(value, type) self.assertEqual(do.substitution_value, str(value))
def testIsReady(self): for (type, value) in self.VALUE_SETS: do = DataObject.get_by_value(value, type) self.assertTrue(do.is_ready)
def testGetByValue_noMatch(self): with self.assertRaises(ValidationError): DataObject.get_by_value('noMatch', 'file')
def testSubstitutionValue_file(self): do = DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result') self.assertEqual(do.substitution_value, filename_1)
def testValue_file(self): do = DataObject.create_and_initialize_file_resource( filename=filename_1, md5=md5_1, source_type='result') self.assertEqual(do.value, do.file_resource)
def testGetByValue_invalidType(self): with self.assertRaises(ValidationError): DataObject.get_by_value('cantelope', 'fruit')
def TestFilterByNameOrId(self): file_data_object = _create_file_data_object() query_string = file_data_object.filename results = DataObject.filter_by_name_or_id_or_tag(query_string) self.assertEqual(results.count(), 1) self.assertEqual(results.first().id, file_data_object.id)