def _stage_data_files_using_s3_sync(self, files): Progress.report("STAGING FILES using hca cli...") self.select_upload_area() self.upload_files(files) self.forget_about_upload_area()
def get_upload_area_credentials(self): Progress.report("WAITING FOR STAGING AREA...") self.upload_credentials = WaitFor( self._get_upload_area_credentials).to_return_a_value_other_than( other_than_value=None, timeout_seconds=2 * MINUTE) Progress.report(" credentials received.\n")
def stage_data_files(self, files): Progress.report("STAGING FILES...\n") self._stage_data_files_using_s3_sync(files)
def create_analysis_submission(self): submission = self.ingest_client_api.create_submission() submission_url = submission["_links"]["self"]["href"].rsplit("{")[0] Progress.report(f"SECONDARY submission ID is {submission_url}\n") self.analysis_submission = self.ingest_api.envelope(envelope_id=None, url=submission_url) process = self.ingest_client_api.create_entity( submission_url, self.analysis_fixture.analysis_process, 'processes') protocol = self.ingest_client_api.create_entity( submission_url, self.analysis_fixture.analysis_protocol, 'protocols') input_files = self.primary_submission.get_files() self.analysis_process = process self.analysis_protocol = protocol self.ingest_client_api.link_entity(process, protocol, 'protocols') add_input_bundle_url = process['_links']['add-input-bundles']['href'] input_bundle_uuid = self.bundle_manifest_uuid bundle_refs_dict = {'bundleUuids': [input_bundle_uuid]} r = self.session.post(add_input_bundle_url, headers=self._get_headers(), json=bundle_refs_dict) r.raise_for_status() files = self.analysis_fixture.files add_input_file_url = process['_links']['inputFiles']['href'] input_file_uuids = [file['uuid']['uuid'] for file in input_files] for file_uuid in input_file_uuids: r = self.session.post(add_input_file_url, json.dumps({"inputFileUuid": file_uuid}), headers=self._get_headers()) r.raise_for_status() add_reference_files_url = process['_links']['add-file-reference'][ 'href'] for file_content in files: analysis_filename = file_content['file_core']['file_name'] file = {'fileName': analysis_filename, 'content': file_content} r = self.session.put(add_reference_files_url, json.dumps(file), headers=self._get_headers()) r.raise_for_status() self.submission_manager = SubmissionManager(self.analysis_submission) self.submission_manager.get_upload_area_credentials() # TODO restrict permission in the s3 bucket # FIXME The following is a workaround because of the issue when uploading files from an s3 bucket. This is # very slow as it's uploading files one at a time, fix this # self.submission_manager.stage_data_files('s3://org-humancellatlas-ingest-integration-test/analyses-data') self.submission_manager.select_upload_area() self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/metrics_summary.csv' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/filtered_gene_bc_matrices_h5.h5' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/molecule_info.h5' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/genes.tsv' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/barcodes.tsv' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/matrix.mtx' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/barcodes.tsv' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/possorted_genome_bam.bam.bai' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_genes.tsv' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_gene_bc_matrices_h5.h5' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/web_summary.html' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_matrix.mtx' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/possorted_genome_bam.bam' ) self.submission_manager.upload_files( 's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_barcodes.tsv' ) self.submission_manager.forget_about_upload_area()