Exemplo n.º 1
0
def get_pipeline_inputs(job, input_flag, input_file, encryption_key=None, per_file_encryption=False,
                        gdc_download_token=None):
    """
    Get the input file from s3 or disk and write to file store.

    :param str input_flag: The name of the flag
    :param str input_file: The value passed in the config file
    :param str encryption_key: Path to the encryption key if encrypted with sse-c
    :param bool per_file_encryption: If encrypted, was the file encrypted using the per-file method?
    :param str gdc_download_token: The download token to obtain files from the GDC
    :return: fsID for the file
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    job.fileStore.logToMaster('Obtaining file (%s) to the file job store' % input_flag)
    if input_file.startswith(('http', 'https', 'ftp')):
        input_file = get_file_from_url(job, input_file, encryption_key=encryption_key,
                                       per_file_encryption=per_file_encryption,
                                       write_to_jobstore=True)
    elif input_file.startswith(('S3', 's3')):
        input_file = get_file_from_s3(job, input_file, encryption_key=encryption_key,
                                      per_file_encryption=per_file_encryption,
                                      write_to_jobstore=True)
    elif input_file.startswith(('GDC', 'gdc')):
        input_file = get_file_from_gdc(job, input_file, gdc_download_token=gdc_download_token,
                                       write_to_jobstore=True)
    else:
        assert os.path.exists(input_file), 'Bogus Input : ' + input_file
        input_file = job.fileStore.writeGlobalFile(input_file)
    return input_file
    def _get_MHC_file(job):
        """
        Get the MHC file and write to jobstore

        :return: FSID for the MHC file
        """
        mhc_file = get_file_from_s3(
            job,
            'S3://cgl-protect-data/hg19_references/mhc_pathway_genes.json.tar.gz',
            write_to_jobstore=False)
        return {'genes_file': job.fileStore.writeGlobalFile(mhc_file)}
Exemplo n.º 3
0
    def _get_test_rsem_file(job, test_src_folder):
        """
        Get the test rsem file and write to jobstore

        :return: FSID for the rsem file
        """
        rsem_file = get_file_from_s3(job,
                                    'S3://cgl-pipeline-inputs/protect/unit_results/expression/'
                                    'rsem.genes.results',
                                    write_to_jobstore=False)
        return job.fileStore.writeGlobalFile(rsem_file)
    def _get_MHC_file(job):
        """
        Get the MHC file and write to jobstore

        :return: FSID for the MHC file
        """
        mhc_file = get_file_from_s3(job,
                                    'S3://cgl-protect-data/hg19_references/mhc_pathway_genes.json.tar.gz',
                                    write_to_jobstore=False)
        return {
            'genes_file': job.fileStore.writeGlobalFile(mhc_file)}
Exemplo n.º 5
0
 def _download_files(job):
     """
     Attempts to download an unencrypted file, a file encrypted with a key, and a file encrypted
     with a hash of a master key.
     """
     keyfile = os.path.abspath('test.key')
     with open(keyfile, 'w') as k_f:
         k_f.write('protectwillhelpwithimmunotherapy')
     http_base = 'https://s3-us-west-2.amazonaws.com/pimmuno-test-data/CI_test_input/'
     s3_base = 'S3://pimmuno-test-data/CI_test_input/'
     unencrypted = 'unencrypted.file'
     encrypted_with_key = 'encrypted_with_test_key.file'
     encrypted_with_hash = 'encrypted_with_key_hash.file'
     # Download with https schema
     get_file_from_s3(job, http_base + unencrypted, write_to_jobstore=False)
     get_file_from_s3(job, http_base + encrypted_with_key, encryption_key=keyfile,
                      per_file_encryption=False, write_to_jobstore=False)
     get_file_from_s3(job, http_base + encrypted_with_hash, encryption_key=keyfile,
                      write_to_jobstore=False)
     # Download with S3 schema
     get_file_from_s3(job, s3_base + unencrypted, write_to_jobstore=False)
     # Test wrong schema
     try:
         get_file_from_s3(job, 's' + s3_base + encrypted_with_hash, encryption_key=keyfile,
                          write_to_jobstore=False)
     except RuntimeError as err:
         if 'Unexpected url scheme' not in err.message:
             raise
     # Test downloading encrypted file without key
     try:
         get_file_from_s3(job, s3_base + encrypted_with_hash, write_to_jobstore=False)
     except RuntimeError as err:
         if '400' not in err.message:
             raise
     # Test downloading file encrypted with hash using the master (this emulates downloading
     # file with the wrong key)
     try:
         get_file_from_s3(job, s3_base + encrypted_with_hash, encryption_key=keyfile,
                          per_file_encryption=False, write_to_jobstore=False)
     except RuntimeError as err:
         if '403' not in err.message:
             raise
     # Test downloading unencrypted with a key
     try:
         get_file_from_s3(job, s3_base + unencrypted, encryption_key=keyfile,
                          per_file_encryption=False, write_to_jobstore=False)
     except RuntimeError as err:
         if '400' not in err.message:
             raise
     # Test downloading non-existent file
     try:
         get_file_from_s3(job, s3_base + unencrypted + 'xx', write_to_jobstore=False)
     except RuntimeError as err:
         if 'exist on s3?' not in err.message:
             raise