def test_get_project__returns_none(self): # given entity_map = EntityMap() # when output = entity_map.get_project() # then self.assertEqual(output, None)
def test_get_project__returns_project(self): # given entity_map = EntityMap() project_entity = Entity('project', 'project_0', {}) entity_map.add_entity(project_entity) # when output = entity_map.get_project() # then self.assertEqual(output, project_entity)
def test_load(self): # given: spreadsheet_json = _create_spreadsheet_json() # when: entity_map = EntityMap.load(spreadsheet_json) # then: self.assertEqual(['project', 'biomaterial', 'file', 'protocol'], list(entity_map.get_entity_types())) # and: # TODO shouldn't entity id's be unique and that there's no need to specify entity type? biomaterial1 = entity_map.get_entity('biomaterial', 'biomaterial_id_1') self._assert_correct_entity(biomaterial1, entity_id='biomaterial_id_1', entity_type='biomaterial', content={'key': 'biomaterial_1'}) # and: biomaterial2 = entity_map.get_entity('biomaterial', 'biomaterial_id_2') links = { 'biomaterial': ['biomaterial_id_1'], 'process': ['process_id_1'] } self._assert_correct_entity(biomaterial2, entity_id='biomaterial_id_2', entity_type='biomaterial', content={'key': 'biomaterial_2'}, links=links) # and: protocol1 = entity_map.get_entity('protocol', 'protocol_id_1') self.assertEqual({'key': 'protocol_1'}, protocol1.content)
def _create_test_entity_map(self) -> EntityMap: product = Entity('product', 'product_1', {'k': 'v'}) project = Entity('project', 'id', {'k': 'v'}) user1 = Entity('user', 'user_1', {'k': 'v'}) user2 = Entity('user', 'user_2', {'k': 'v'}, { 'content': { 'k': 'v0' }, '_links': { 'self': { 'href': 'url' } } }, is_reference=True) user3 = Entity('user', 'user_3', {'k': 'v'}, { 'content': { 'k': 'v0' }, '_links': { 'self': { 'href': 'url' } } }, is_reference=True) entity_map = EntityMap(product, user1, user2, user3, project) return entity_map
def test_generate_direct_links_link_not_found_error(self): # given spreadsheet_json = { 'project': { 'dummy-project-id': { 'content': { 'key': 'project_1' } } }, 'file': { 'file_id_1': { 'content': { 'key': 'file_1' }, 'links_by_entity': { 'biomaterial': ['biomaterial_id_1'], 'protocol': ['protocol_id_1', 'protocol_id_2'] } } } } entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(self.mocked_template_manager, entity_map) with self.assertRaises(LinkedEntityNotFound) as context: entity_linker.handle_links_from_spreadsheet() self.assertEqual('biomaterial', context.exception.entity) self.assertEqual('biomaterial_id_1', context.exception.id)
def add_entities(self, entity_map: EntityMap, submission_url: str) -> Submission: submission = Submission(self.ingest_api, submission_url) submission.define_manifest(entity_map) for e in entity_map.get_new_entities(): self.add_entity(e, submission_url) submission.add_entity(e) return submission
def dry_run_import_file(self, file_path, project_uuid=None): spreadsheet_json, template_mgr, errors = self.generate_json(file_path, project_uuid) if errors: return None, errors entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(template_mgr, entity_map) entity_linker.handle_links_from_spreadsheet() return entity_map, []
def link_entities(self, entity_map: EntityMap, submission: Submission): progress = 0 for entity in entity_map.get_entities(): for link in entity.direct_links: to_entity = entity_map.get_entity(link['entity'], link['id']) try: self.link_entity(entity, to_entity, relationship=link['relationship'], is_collection=link.get('is_collection', True)) progress = progress + 1 expected_links = int(submission.manifest.get('expectedLinks', 0)) if progress % self.PROGRESS_CTR == 0 or (progress == expected_links): manifest_url = self.ingest_api.get_link_from_resource(submission.manifest, 'self') self.ingest_api.patch(manifest_url, {'actualLinks': progress}) self.logger.info(f"links progress: {progress}/ {submission.manifest.get('expectedLinks')}") except Exception as link_error: error_message = f'''The {entity.type} with id {entity.id} could not be linked to {to_entity.type} \ with id {to_entity.id}.''' self.logger.error(error_message) self.logger.error(f'{str(link_error)}') raise
def test_handle_links_from_spreadsheet__with_external_links(self): # given with open( os.path.dirname(__file__) + '/spreadsheet_with_external_links.json') as file: spreadsheet_json = json.load(file) mocked_template_manager = MagicMock(name='template_manager') mocked_template_manager.get_schema_url = MagicMock(return_value='url') self.mocked_template_manager = mocked_template_manager entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(self.mocked_template_manager, entity_map) # when output = entity_linker.handle_links_from_spreadsheet() # then lib_prep_protocol = output.get_entity('protocol', 'librep-protocol-uuid') self.assertTrue(lib_prep_protocol.is_linking_reference) self.assertTrue(lib_prep_protocol.is_reference) seq_protocol = output.get_entity('protocol', 'seq-protocol-uuid') self.assertTrue(seq_protocol.is_linking_reference) self.assertTrue(seq_protocol.is_reference) cell_suspension = output.get_entity('biomaterial', 'cell-suspension-uuid') self.assertTrue(cell_suspension.is_linking_reference) self.assertTrue(cell_suspension.is_reference) file1 = output.get_entity('file', 'seq-file-uuid-1') self.assertFalse(file1.is_linking_reference) self.assertTrue(file1.is_reference) file2 = output.get_entity('file', 'seq-file-uuid-2') self.assertFalse(file2.is_linking_reference) self.assertTrue(file2.is_reference) assay_process = output.get_entity('process', 'assay_process-uuid') self.assertTrue(assay_process.is_linking_reference) self.assertTrue(assay_process.is_reference) assay_process_content = { 'process_core': { 'process_description': 'desc', 'process_id': 'assay_process' }, 'schema_type': 'process', 'describedBy': 'url' } self.assertEqual(assay_process.content, assay_process_content)
def test_load__is_reference(self): # given: spreadsheet_json = { 'biomaterial': { 'biomaterial_uuid': { 'content': { 'key': 'value' }, 'is_reference': True } } } # when: entity_map = EntityMap.load(spreadsheet_json) # then: self.assertEqual(['biomaterial'], list(entity_map.get_entity_types()))
def test_generate_direct_links_multiple_process_links(self): # given spreadsheet_json = { 'project': { 'dummy-project-id': { 'content': { 'key': 'project_1' } } }, 'biomaterial': { 'biomaterial_id_1': { 'content': { 'key': 'biomaterial_1' }, 'links_by_entity': { 'process': ['process_id_1', 'process_id_2'] } } }, 'process': { 'process_id_1': { 'content': { 'key': 'process_1' } }, 'process_id_2': { 'content': { 'key': 'process_2' } } } } entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(self.mocked_template_manager, entity_map) with self.assertRaises(MultipleProcessesFound) as context: entity_linker.handle_links_from_spreadsheet() self.assertEqual('biomaterial', context.exception.from_entity.type) self.assertEqual(['process_id_1', 'process_id_2'], context.exception.process_ids)
def test_submit(self, submission_constructor): # given: submission = self._mock_submission(submission_constructor) # and: product = Entity('product', 'product_1', {}) project = Entity('project', 'id', {}) user = Entity('user', 'user_1', {}) entity_map = EntityMap(product, user, project) # when: submitter = IngestSubmitter(self.ingest_api) submitter.add_entity = MagicMock() submitter.add_entities(entity_map, submission_url='url') # then: submission_constructor.assert_called_with(self.ingest_api, 'url') submission.define_manifest.assert_called_with(entity_map) submission.add_entity.assert_has_calls( [call(product), call(user)], any_order=True)
def test_generate_direct_links_invalid_spreadsheet_link(self): # given spreadsheet_json = { 'project': { 'dummy-project-id': { 'content': { 'key': 'project_1' } } }, 'biomaterial': { 'biomaterial_id_1': { 'content': { 'key': 'biomaterial_1' }, 'links_by_entity': { 'file': ['file_id_1'] } } }, 'file': { 'file_id_1': { 'content': { 'key': 'file_1' } } } } entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(self.mocked_template_manager, entity_map) with self.assertRaises(InvalidLinkInSpreadsheet) as context: entity_linker.handle_links_from_spreadsheet() self.assertEqual('biomaterial', context.exception.from_entity.type) self.assertEqual('file', context.exception.link_entity_type) self.assertEqual('biomaterial_id_1', context.exception.from_entity.id) self.assertEqual('file_id_1', context.exception.link_entity_id)
def import_file(self, file_path, submission_url, is_update=False, project_uuid=None, update_project=False) -> Tuple[ Submission, TemplateManager]: try: if project_uuid: self.submitter.link_submission_to_project(project_uuid, submission_url) submission = None template_mgr = None spreadsheet_json, template_mgr, errors = self.generate_json(file_path, is_update, project_uuid=project_uuid, update_project=update_project) entity_map = EntityMap.load(spreadsheet_json) self.ingest_api.delete_submission_errors(submission_url) if errors: self.report_errors(submission_url, errors) elif is_update: self.submitter.update_entities(entity_map) else: entity_linker = EntityLinker(template_mgr, entity_map) entity_linker.handle_links_from_spreadsheet() submission = self._submit_new_entities(entity_map, submission_url) project = entity_map.get_project() if project and project_uuid and update_project: self.submitter.update_entity(project) except HTTPError as httpError: self.logger.exception(httpError) status = httpError.response.status_code text = httpError.response.text importer_error = ImporterError(f'Received an HTTP {status} from {httpError.request.url}: {text}') self.ingest_api.create_submission_error(submission_url, importer_error.getJSON()) return None, template_mgr except Exception as e: self.ingest_api.create_submission_error(submission_url, ImporterError(str(e)).getJSON()) self.logger.error(str(e), exc_info=True) return None, template_mgr finally: self.logger.info(f'Submission in {submission_url} is done!') return submission, template_mgr
def test_load__is_linking_reference(self): # given: spreadsheet_json = { 'biomaterial': { 'biomaterial_id': { 'content': { 'key': 'biomaterial_3' }, 'links_by_entity': { 'biomaterial': ['biomaterial_id_2'], 'process': ['process_id_2'] }, 'external_links_by_entity': { 'biomaterial': ['biomaterial_uuid'] }, }, } } # when: entity_map = EntityMap.load(spreadsheet_json) # then: self.assertEqual(['biomaterial'], list(entity_map.get_entity_types()))
def define_manifest(self, entity_map: EntityMap): # TODO provide a better way to serialize manifest_json = { 'totalCount': entity_map.count_total(), 'expectedBiomaterials': entity_map.count_entities_of_type('biomaterial'), 'expectedProcesses': entity_map.count_entities_of_type('process'), 'expectedFiles': entity_map.count_entities_of_type('file'), 'expectedProtocols': entity_map.count_entities_of_type('protocol'), 'expectedProjects': entity_map.count_entities_of_type('project'), 'expectedLinks': entity_map.count_links(), 'actualLinks': 0 } self.manifest = self.ingest_api.create_submission_manifest( self.submission_url, manifest_json) return self.manifest
def test_submit_linked_entity(self, submission_constructor): # given: submission = self._mock_submission(submission_constructor) # and: user = Entity('user', 'user_1', {}) entity_map = EntityMap(user) # and: link_to_user = { 'entity': 'user', 'id': 'user_1', 'relationship': 'wish_list' } linked_product = Entity('product', 'product_1', {}, direct_links=[link_to_user], is_reference=False, is_linking_reference=False) project = Entity('project', 'id', {}, is_reference=False, is_linking_reference=False) entity_map.add_entity(linked_product) entity_map.add_entity(project) # when: submitter = IngestSubmitter(self.ingest_api) submitter.add_entity = MagicMock() submitter.link_submission_to_project = MagicMock() submitter.PROGRESS_CTR = 1 submitter.add_entities(entity_map, submission_url='url') # then: submission_constructor.assert_called_with(self.ingest_api, 'url') submission.define_manifest.assert_called_with(entity_map) submission.add_entity.assert_has_calls( [call(user), call(linked_product)], any_order=True)
def test_generate_direct_links_file_to_file_no_process(self): # given spreadsheet_json = { 'project': { 'dummy-project-id': { 'content': { 'key': 'project_1' } } }, 'file': { 'file_id_1': { 'content': { 'key': 'file_1' } }, 'file_id_2': { 'content': { 'key': 'file_2' }, 'links_by_entity': { 'file': ['file_id_1'], 'protocol': ['protocol_id_1', 'protocol_id_2'] } } }, 'protocol': { 'protocol_id_1': { 'content': { 'key': 'protocol_1' } }, 'protocol_id_2': { 'content': { 'key': 'protocol_2' } } } } expected_json = { 'project': { 'dummy-project-id': { 'content': { 'key': 'project_1' } } }, 'file': { 'file_id_1': { 'content': { 'key': 'file_1' }, 'direct_links': [{ 'entity': 'process', 'id': 'process_id_1', 'relationship': 'inputToProcesses' }, { 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'project', 'is_collection': False }] }, 'file_id_2': { 'content': { 'key': 'file_2' }, 'links_by_entity': { 'file': ['file_id_1'], }, 'direct_links': [{ 'entity': 'process', 'id': 'process_id_1', 'relationship': 'derivedByProcesses' }, { 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'project', 'is_collection': False }] } }, 'process': { 'process_id_1': { 'content': { 'key': 'process_1' }, 'direct_links': [{ 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'projects' }, { 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'project', 'is_collection': False }, { 'entity': 'protocol', 'id': 'protocol_id_1', 'relationship': 'protocols' }, { 'entity': 'protocol', 'id': 'protocol_id_2', 'relationship': 'protocols' }] } }, 'protocol': { 'protocol_id_1': { 'content': { 'key': 'protocol_1' }, 'direct_links': [{ 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'project', 'is_collection': False }] }, 'protocol_id_2': { 'content': { 'key': 'protocol_2' }, 'direct_links': [{ 'entity': 'project', 'id': 'dummy-project-id', 'relationship': 'project', 'is_collection': False }] } } } entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(self.mocked_template_manager, entity_map) output = entity_linker.handle_links_from_spreadsheet() self._assert_equal_direct_links(expected_json, output)
def update_entities(self, entity_map: EntityMap): updated_entities = [self.update_entity(e) for e in entity_map.get_entities() if e.is_reference] return updated_entities
def test_count_links(self): entity_map = EntityMap() # no element self.assertEqual(entity_map.count_links(), 0) # has 1 element without links entity_map.add_entity(Entity('product', 'product_0', {})) self.assertEqual(entity_map.count_links(), 0) # has 1 element with links entity_map.add_entity( Entity('product', 'product_1', {}, direct_links=[{}, {}, {}])) self.assertEqual(entity_map.count_links(), 3) # has many element with links entity_map.add_entity( Entity('product', 'product_2', {}, direct_links=[{}, {}, {}, {}])) self.assertEqual(entity_map.count_links(), 7)
def test_count_total(self): # given: zero_map = EntityMap() # and: one_map = EntityMap() one_map.add_entity(Entity('product', 'product_1', {})) # and: three_map = EntityMap() three_map.add_entity(Entity('profile', 'profile_1', {})) for product_id in range(0, 2): three_map.add_entity(Entity('product', f'product_{product_id}', {})) # expect: self.assertEqual(0, zero_map.count_total()) self.assertEqual(1, one_map.count_total()) self.assertEqual(3, three_map.count_total())