def _stage_data_files_using_s3_sync(self, files):
     Progress.report("STAGING FILES using hca cli...")
     self.select_upload_area()
     self.upload_files(files)
     self.forget_about_upload_area()
 def get_upload_area_credentials(self):
     Progress.report("WAITING FOR STAGING AREA...")
     self.upload_credentials = WaitFor(
         self._get_upload_area_credentials).to_return_a_value_other_than(
             other_than_value=None, timeout_seconds=2 * MINUTE)
     Progress.report(" credentials received.\n")
 def stage_data_files(self, files):
     Progress.report("STAGING FILES...\n")
     self._stage_data_files_using_s3_sync(files)
Пример #4
0
    def create_analysis_submission(self):
        submission = self.ingest_client_api.create_submission()
        submission_url = submission["_links"]["self"]["href"].rsplit("{")[0]
        Progress.report(f"SECONDARY submission ID is {submission_url}\n")
        self.analysis_submission = self.ingest_api.envelope(envelope_id=None,
                                                            url=submission_url)
        process = self.ingest_client_api.create_entity(
            submission_url, self.analysis_fixture.analysis_process,
            'processes')
        protocol = self.ingest_client_api.create_entity(
            submission_url, self.analysis_fixture.analysis_protocol,
            'protocols')
        input_files = self.primary_submission.get_files()
        self.analysis_process = process
        self.analysis_protocol = protocol
        self.ingest_client_api.link_entity(process, protocol, 'protocols')

        add_input_bundle_url = process['_links']['add-input-bundles']['href']
        input_bundle_uuid = self.bundle_manifest_uuid
        bundle_refs_dict = {'bundleUuids': [input_bundle_uuid]}
        r = self.session.post(add_input_bundle_url,
                              headers=self._get_headers(),
                              json=bundle_refs_dict)
        r.raise_for_status()
        files = self.analysis_fixture.files

        add_input_file_url = process['_links']['inputFiles']['href']
        input_file_uuids = [file['uuid']['uuid'] for file in input_files]
        for file_uuid in input_file_uuids:
            r = self.session.post(add_input_file_url,
                                  json.dumps({"inputFileUuid": file_uuid}),
                                  headers=self._get_headers())
            r.raise_for_status()

        add_reference_files_url = process['_links']['add-file-reference'][
            'href']
        for file_content in files:
            analysis_filename = file_content['file_core']['file_name']
            file = {'fileName': analysis_filename, 'content': file_content}
            r = self.session.put(add_reference_files_url,
                                 json.dumps(file),
                                 headers=self._get_headers())
            r.raise_for_status()

        self.submission_manager = SubmissionManager(self.analysis_submission)
        self.submission_manager.get_upload_area_credentials()
        # TODO restrict permission in the s3 bucket
        # FIXME The following is a workaround because of the issue when uploading files from an s3 bucket. This is
        #  very slow as it's uploading files one at a time, fix this
        # self.submission_manager.stage_data_files('s3://org-humancellatlas-ingest-integration-test/analyses-data')

        self.submission_manager.select_upload_area()
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/metrics_summary.csv'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/filtered_gene_bc_matrices_h5.h5'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/molecule_info.h5'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/genes.tsv'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/barcodes.tsv'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/matrix.mtx'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/barcodes.tsv'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/possorted_genome_bam.bam.bai'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_genes.tsv'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_gene_bc_matrices_h5.h5'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/web_summary.html'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_matrix.mtx'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/possorted_genome_bam.bam'
        )
        self.submission_manager.upload_files(
            's3://org-humancellatlas-ingest-integration-test/analysis-data/raw_barcodes.tsv'
        )
        self.submission_manager.forget_about_upload_area()