def upload_spreadsheet(): try: logger.info("Uploading spreadsheet") token = _check_token() path = _save_spreadsheet() ingest_api = IngestApi() ingest_api.set_token(token) importer = XlsImporter(ingest_api) project = _check_for_project(ingest_api) project_uuid = None if project and project.get('uuid'): project_uuid = project.get('uuid').get('uuid') submission_url = ingest_api.createSubmission(token) _submit_spreadsheet_data(importer, path, submission_url, project_uuid) return create_upload_success_response(submission_url) except SpreadsheetUploadError as spreadsheetUploadError: return create_upload_failure_response(spreadsheetUploadError.http_code, spreadsheetUploadError.message, spreadsheetUploadError.details) except Exception as err: logger.error(traceback.format_exc()) return create_upload_failure_response( 500, "We experienced a problem while uploading your spreadsheet", str(err))
class TestIngest(unittest.TestCase): def setUp(self): self.deployment = os.environ.get('DEPLOYMENT_ENV', None) if self.deployment not in DEPLOYMENTS: raise RuntimeError(f'DEPLOYMENT_ENV environment variable must be one of {DEPLOYMENTS}') self.ingest_client_api = IngestApi(url=f"https://api.ingest.{self.deployment}.data.humancellatlas.org") self.s2s_token_client = S2STokenClient() gcp_credentials_file = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') self.s2s_token_client.setup_from_file(gcp_credentials_file) self.token_manager = TokenManager(self.s2s_token_client) self.ingest_broker = IngestUIAgent(self.deployment) self.ingest_api = IngestApiAgent(deployment=self.deployment) def ingest_and_upload_only(self, dataset_name): dataset_fixture = DatasetFixture(dataset_name, self.deployment) runner = DatasetRunner(self.deployment) runner.valid_run(dataset_fixture) return runner def ingest(self, dataset_name): dataset_fixture = DatasetFixture(dataset_name, self.deployment) runner = DatasetRunner(self.deployment) runner.complete_run(dataset_fixture) return runner def _create_submission_envelope(self): token = self.token_manager.get_token() self.ingest_client_api.set_token(f'Bearer {token}') submission = self.ingest_client_api.create_submission() submission_url = submission["_links"]["self"]["href"] submission_envelope = self.ingest_api.envelope(envelope_id=None, url=submission_url) return submission_envelope # TODO move this to ingest client api def _get_entities(self, url, entity_type): r = requests.get(url, headers={'Content-type': 'application/json'}) r.raise_for_status() response = r.json() if response.get('_embedded') and response['_embedded'].get(entity_type): return response['_embedded'][entity_type] else: return [] def ingest_analysis(self, dataset_name): analysis_fixture = AnalysisSubmissionFixture() runner = AnalysisSubmissionRunner(self.deployment, self.ingest_broker, self.ingest_api, self.token_manager, self.ingest_client_api) dataset_fixture = DatasetFixture(dataset_name, self.deployment) runner.run(dataset_fixture, analysis_fixture) self.assertTrue(runner.bundle_manifest_uuid, 'The analysis process should be attached to an input bundle manifest') derived_files_url = runner.analysis_process['_links']['derivedFiles'][ 'href'] derived_files = self._get_entities(derived_files_url, 'files') analysis_files = runner.analysis_submission.get_files() derived_file_uuids = [file['uuid']['uuid'] for file in derived_files] analysis_file_uuids = [file['uuid']['uuid'] for file in analysis_files] self.assertTrue(derived_file_uuids, 'There must be files in the analysis submission') self.assertEqual(derived_file_uuids, analysis_file_uuids, 'The analyses files must be linked to the analyses process.') input_files_url = runner.analysis_process['_links']['inputFiles'][ 'href'] input_files = self._get_entities(input_files_url, 'files') primary_submission_files = runner.primary_submission.get_files() input_file_uuids = [file['uuid']['uuid'] for file in input_files] primary_submission_file_uuids = [file['uuid']['uuid'] for file in primary_submission_files] self.assertTrue(input_file_uuids, 'There must be files from the primary submission') self.assertEqual(input_file_uuids, primary_submission_file_uuids, 'The primary submission files must be linked to the analyses process.') input_bundle_manifest_url = \ runner.analysis_process['_links']['inputBundleManifests']['href'] attached_bundle_manifests = self._get_entities( input_bundle_manifest_url, 'bundleManifests') self.assertEqual(len(attached_bundle_manifests), 1, 'There should only be one input bundle manifest for the analyses process') self.assertEqual(attached_bundle_manifests[0]['bundleUuid'], runner.bundle_manifest_uuid, 'The input bundle manifest for the analyses process is incorrect') return runner def ingest_big_submission(self): metadata_fixture = MetadataFixture() runner = BigSubmissionRunner(self.deployment, self.ingest_client_api, self.token_manager) runner.run(metadata_fixture) def ingest_updates(self): runner = UpdateSubmissionRunner(self.deployment, self.ingest_broker, self.ingest_api, self.ingest_client_api) runner.run() self.assertEqual(len(runner.updated_bundle_fqids), 1, "There should be 1 bundle updated.")
class IngestApiAgent: def __init__(self, deployment): self.deployment = deployment self.ingest_api_url = self._ingest_api_url() self.ingest_auth_agent = IngestAuthAgent() self._set_up_ingest_client() def _set_up_ingest_client(self): self.ingest_api = IngestApi(url=self.ingest_api_url) auth_header = self.ingest_auth_agent.make_auth_header() self.ingest_api.set_token(auth_header['Authorization']) def project(self, project_id): return IngestApiAgent.Project(project_id=project_id, ingest_api_agent=self) def submission(self, submission_id): return IngestApiAgent.SubmissionEnvelope(envelope_id=submission_id, ingest_api_agent=self) def new_submission(self, is_update=False): submission_data = self.ingest_api.create_submission( update_submission=is_update) return IngestApiAgent.SubmissionEnvelope(ingest_api_agent=self, data=submission_data) def iter_submissions(self): for page in self.iter_pages('/submissionEnvelopes', page_size=500): for submission_data in page['submissionEnvelopes']: yield IngestApiAgent.SubmissionEnvelope(data=submission_data, ingest_api_agent=self) """ Get a collection resource. Iterates through all pages gathering results and returns a list. """ def get_all(self, path_or_url, result_element_we_are_interested_in): results = [] for page in self.iter_pages(path_or_url): results += page[result_element_we_are_interested_in] return results """ Iterate through a collection using HATEOAS pagination, yielding pages. """ def iter_pages(self, path_or_url, page_size=100): path_or_url += f"?size={page_size}" while True: data = self.get(path_or_url) if '_embedded' not in data: break yield data['_embedded'] if 'next' in data['_links']: path_or_url = data['_links']['next']['href'] else: break """ Get a singleton resource. """ def get(self, path_or_url): if path_or_url.startswith('http'): url = path_or_url else: url = f"{self.ingest_api_url}{path_or_url}" response = requests.get( url, headers=self.ingest_auth_agent.make_auth_header()) if response.ok: return response.json() else: raise RuntimeError(f"GET {url} got {response}") def post(self, url, content, params={}): auth_header = self.ingest_auth_agent.make_auth_header() response = requests.post(url, json=content, headers=auth_header, params=params) response.raise_for_status() return response.json() def put(self, url, content=None): auth_header = self.ingest_auth_agent.make_auth_header() if content: response = requests.put(url, json=content, headers=auth_header) else: response = requests.put(url, headers=auth_header) response.raise_for_status() return response.json() def _ingest_api_url(self): if self.deployment == 'prod': return "https://api.ingest.data.humancellatlas.org" else: return f"https://api.ingest.{self.deployment}.data.humancellatlas.org" class Project: def __init__(self, project_id, ingest_api_agent): self.project_id = project_id self.api = ingest_api_agent self.data = None self._load() @property def uuid(self): return self.data['uuid'] def submission_envelopes(self): data = self.api.get( self.data['_links']['submissionEnvelopes']['href']) return [ IngestApiAgent.SubmissionEnvelope(data=subm_data, ingest_api_agent=self.api) \ for subm_data in data['_embedded']['submissionEnvelopes'] ] def _load(self): self.data = self.api.get(f"/projects/{self.project_id}") class SubmissionEnvelope: # May be primed wih data, or of you supply an ID, we will go get the data def __init__(self, ingest_api_agent, envelope_id=None, data=None): if not envelope_id and not data: raise RuntimeError( "either envelope_id or data must be provided") self.api = ingest_api_agent self.data = None if envelope_id: self.envelope_id = envelope_id self._load() else: self.data = data self.envelope_id = data['_links']['self']['href'].split( '/')[-1] def __str__(self): return f"SubmissionEnvelope(id={self.envelope_id}, uuid={self.uuid}, " \ f"status={self.status})" def _link_to(self, endpoint_path): return self.data['_links'][endpoint_path]['href'] def files(self): return self.api.get_all(self.data['_links']['files']['href'], 'files') def metadata_documents(self, metadata_type: str = None): self._check_metadata_type(metadata_type) result_type = _pluralized_type[metadata_type] metadata_link = self._link_to(result_type) return self.api.get_all(metadata_link, result_type) def add_biomaterial(self, biomaterial_content, update_target_uuid: str = None): return self._add_metadata('biomaterial', biomaterial_content, update_target_uuid=update_target_uuid) def _add_metadata(self, metadata_type, metadata_content, update_target_uuid: str = None): self._check_metadata_type(metadata_type) endpoint_path = _pluralized_type[metadata_type] metadata_link = self._link_to(endpoint_path) params = { 'updatingUuid': update_target_uuid } if update_target_uuid else {} return self.api.post(metadata_link, metadata_content, params=params) @staticmethod def _check_metadata_type(metadata_type): if not metadata_type: raise RuntimeError('`metadata_type` must be specified') if not metadata_type in _pluralized_type: raise KeyError(f'Unknown metadata type [{metadata_type}].') def iter_files(self): url = self.data['_links']['files']['href'] for page in self.api.iter_pages(url): for file in page['files']: yield file def reload(self): self._load() return self def check_validation(self): self._load() if self.status == 'Invalid': raise Exception("envelope status is Invalid") return self.status in ['Valid', 'Submitted'] def check_status(self): self._load() return self.status @property def status(self): return self.data['submissionState'] @property def uuid(self): return self.data['uuid']['uuid'] def upload_credentials(self): """ Return upload area credentials or None if this envelope doesn't have an upload area yet """ staging_details = self.data.get('stagingDetails', None) if staging_details and 'stagingAreaLocation' in staging_details: return staging_details.get('stagingAreaLocation', {}).get('value', None) return None def bundles(self): url = self.data['_links']['bundleManifests']['href'] manifests = self.api.get_all(url, 'bundleManifests') return [manifest['bundleUuid'] for manifest in manifests] def complete(self): completion_endpoint = self._link_to('submit') self.api.put(completion_endpoint) def _load(self): self.data = self.api.get( f"/submissionEnvelopes/{self.envelope_id}")