Пример #1
0
 def test_benchmark_none1(self):
     input_json = {
         'input_size_in_bytes': {
             'fastq1': 93520,
             'fastq2': 97604,
             'bwa_index': 3364568
         }
     }
     with self.assertRaises(B.AppNameUnavailableException):
         B.benchmark('some_weird_name', input_json, raise_error=True)
Пример #2
0
 def get_benchmarking(self, input_size_in_bytes):
     benchmark_parameters = copy.deepcopy(self.args.input_parameters)
     benchmark_parameters.update(
         self.args.additional_benchmarking_parameters)
     try:
         res = B.benchmark(
             self.args.app_name, {
                 'input_size_in_bytes': input_size_in_bytes,
                 'parameters': benchmark_parameters
             })
     except Exception as e:
         try:
             res
             raise Exception("Benchmarking not working. : {}".format(
                 str(res)))
         except:
             raise Exception("Benchmarking not working. : None. %s" %
                             str(e))
     if res is not None:
         logger.info(str(res))
         instance_type = res['aws']['recommended_instance_type']
         ebs_size = 10 if res['total_size_in_GB'] < 10 else int(
             res['total_size_in_GB']) + 1
         ebs_opt = res['aws']['EBS_optimized']
         return {
             'instance_type': instance_type,
             'EBS_optimized': ebs_opt,
             'ebs_size': ebs_size
         }
     else:
         return {'instance_type': '', 'EBS_optimized': '', 'ebs_size': 0}
Пример #3
0
 def test_benchmark10(self):
     input_json = {'input_size_in_bytes': {'input_pairs': 1000000000}}
     res = B.benchmark('pairs-patch', input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.micro'
Пример #4
0
 def test_benchmark6(self):
     input_json = {'input_size_in_bytes': {'input_pairsam': 1000000000}}
     res = B.benchmark('pairsam-markasdup', input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'r5a.large'
Пример #5
0
def update_config(config, app_name, input_files, parameters):

    if config['instance_type'] != '' and config['ebs_size'] != 0 and config[
            'EBS_optimized'] != '':
        pass
    else:
        input_size_in_bytes = dict()
        for argname, f in input_files.iteritems():
            bucket = f['bucket_name']
            s3 = s3_utils.s3Utils(bucket, bucket, bucket)
            if isinstance(f['object_key'], list):
                size = []
                for key in f['object_key']:
                    try:
                        size.append(s3.get_file_size(key, bucket))
                    except:
                        raise Exception("Can't get input file size")
            else:
                try:
                    size = s3.get_file_size(f['object_key'], bucket)
                except:
                    raise Exception("Can't get input file size")
            input_size_in_bytes.update({str(argname): size})

        print({"input_size_in_bytes": input_size_in_bytes})
        try:
            res = B.benchmark(
                app_name, {
                    'input_size_in_bytes': input_size_in_bytes,
                    'parameters': parameters
                })
        except:
            try:
                res
                raise Exception("Benchmarking not working. : {}".format(
                    str(res)))
            except:
                raise Exception("Benchmarking not working. : None")

        if res is not None:
            logger.info(str(res))
            instance_type = res['aws']['recommended_instance_type']
            ebs_size = 10 if res['total_size_in_GB'] < 10 else int(
                res['total_size_in_GB']) + 1
            ebs_opt = res['aws']['EBS_optimized']

            if config['instance_type'] == '':
                config['instance_type'] = instance_type
            if config['ebs_size'] == 0:
                config['ebs_size'] = ebs_size
            if config['EBS_optimized'] == '':
                config['EBS_optimized'] = ebs_opt

        elif config['instance_type'] == '':
            raise Exception("instance type cannot be determined nor given")
        elif config['ebs_size'] == 0:
            raise Exception("ebs_size cannot be determined nor given")
        elif config['EBS_optimized'] == '':
            raise Exception("EBS_optimized cannot be determined nor given")
Пример #6
0
 def test_benchmark_compartments_caller(self):
     print("compartments-caller")
     input_json = {'input_size_in_bytes': {'mcoolfile': 32000000000}}
     res = B.benchmark('compartments-caller', input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.small'
Пример #7
0
 def test_mergebed(self):
     print("mergebed")
     input_sizes = {'input_bed': [400000000, 500000000]}
     res = B.benchmark('mergebed', {'input_size_in_bytes': input_sizes})
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.micro'
Пример #8
0
 def test_benchmark_insulation_scores_and_boundaries_caller(self):
     print("insulation-scores-and-boundaries-caller")
     input_json = {'input_size_in_bytes': {'mcoolfile': 32000000000}}
     res = B.benchmark('insulation-scores-and-boundaries-caller',
                       input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.small'
Пример #9
0
 def test_benchmark1(self):
     res = B.benchmark('md5',
                       {'input_size_in_bytes': {
                           'input_file': 200000000
                       }})
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.micro'
     print(res)
Пример #10
0
 def test_benchmark_merge_fastq(self):
     print("merge_fastq")
     input_sizes = {'input_fastqs': [GB2B(4), GB2B(5)]}
     res = B.benchmark('merge-fastq', {'input_size_in_bytes': input_sizes})
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.medium'
     assert int(res['total_size_in_GB']) == 54
Пример #11
0
 def test_benchmark_bamqc(self):
     print("bamqc")
     input_sizes = {'bamfile': GB2B(4)}
     res = B.benchmark('bamqc', {'input_size_in_bytes': input_sizes})
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.medium'
     assert int(res['total_size_in_GB']) == 14
Пример #12
0
 def test_benchmark_atacseq_postaln(self):
     print("testing atacseq-postaln")
     input_sizes = {'atac.tas': [827000000]}
     res = B.benchmark('encode-atacseq-postaln',
                       {'input_size_in_bytes': input_sizes})
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
Пример #13
0
 def test_benchmark_none2(self):
     input_json = {
         'input_size_in_bytes': {
             'fastq1': 93520,
             'fastq2': 97604,
             'bwa_index': 3364568
         }
     }
     res = B.benchmark('some_weird_name', input_json)
     assert res is None
Пример #14
0
 def test_benchmark_chipseq(self):
     print("testing chipseq")
     input_sizes = {
         'chip.fastqs': [2000000000, 3000000000],
         'chip.ctl_fastqs': [3000000000, 2000000000],
         'chip.bwa_idx_tar': 5000000000
     }
     res = B.benchmark('encode-chipseq',
                       {'input_size_in_bytes': input_sizes})
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
Пример #15
0
 def test_encode_rnaseq_stranded(self):
     print("rnaseq_stranded")
     input_json = {
         'input_size_in_bytes': {
             'rna.fastqs_R1': GB2B(10),
             'rna.align_index': GB2B(3)
         }
     }
     res = B.benchmark('encode-rnaseq-stranded', input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'm5a.4xlarge'
Пример #16
0
 def test_benchmark_fastqc_old(self):
     res = B.benchmark(
         'fastqc-0-11-4-1', {
             'input_size_in_bytes': {
                 'input_fastq': 20000000000
             },
             'parameters': {
                 'threads': 2
             }
         })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.micro'
     print(res)
Пример #17
0
 def test_benchmark4(self):
     res = B.benchmark(
         'pairsam-parse-sort', {
             'input_size_in_bytes': {
                 'bam': 1000000000
             },
             'parameters': {
                 'nThreads': 16
             }
         })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
     print(res)
Пример #18
0
 def test_benchmark5(self):
     input_json = {
         'input_size_in_bytes': {
             'input_pairsams': [1000000000, 2000000000, 3000000000]
         },
         'parameters': {
             'nThreads': 32
         }
     }
     res = B.benchmark('pairsam-merge', input_json)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
     print(res)
Пример #19
0
 def test_benchmark9(self):
     input_json = {
         'input_size_in_bytes': {
             'input_pairs': [1000000000, 2000000000, 3000000000]
         },
         'parameters': {
             'ncores': 16,
             'maxmem': '1900g'
         }
     }
     res = B.benchmark('hi-c-processing-partb', input_json)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'x1.32xlarge'
Пример #20
0
 def test_benchmark_chipseq_aln_chip(self):
     print("testing chipseq")
     input_sizes = {
         'chip.fastqs': [2000000000, 3000000000],
         'chip.bwa_idx_tar': 5000000000
     }
     res = B.benchmark('encode-chipseq-aln-chip', {
         'input_size_in_bytes': input_sizes,
         'parameters': {
             'chip.bwa.cpu': 16
         }
     })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
Пример #21
0
 def test_repliseq(self):
     print("repliseq se")
     input_json = {
         'input_size_in_bytes': {
             'fastq': MB2B(270),
             'bwaIndex': GB2B(3.21)
         },
         'parameters': {
             'nthreads': 4
         }
     }
     res = B.benchmark('repliseq-parta', input_json)
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.xlarge'
Пример #22
0
 def test_benchmark_chipseq_postaln(self):
     print("testing chipseq")
     input_sizes = {
         'chip.tas': [2000000000, 3000000000],
         'chip.ctl_tas': [3000000000, 2000000000],
         'chip.bam2ta_no_filt_R1.ta': [5000000000, 6000000000]
     }
     res = B.benchmark('encode-chipseq-postaln', {
         'input_size_in_bytes': input_sizes,
         'parameters': {
             'chip.spp_cpu': 4
         }
     })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
Пример #23
0
 def test_benchmark3(self):
     input_json = {
         'input_size_in_bytes': {
             'fastq1': 93520000,
             'fastq2': 97604000,
             'bwa_index': 3364568000
         },
         'parameters': {
             'nThreads': 4
         }
     }
     res = B.benchmark('bwa-mem', input_json)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.xlarge'
     print(res)
Пример #24
0
 def test_benchmark11(self):
     input_json = {
         'input_size_in_bytes': {
             'input_cool': 1000000000,
             'input_hic': 2000000000
         },
         'parameters': {
             'ncores': 1
         }
     }
     res = B.benchmark('hi-c-processing-partc', input_json)
     print('hi-c-processing-partc')
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'r5a.large'
Пример #25
0
 def test_benchmark_atacseq_aln(self):
     print("testing atacseq-aln")
     input_sizes = {
         'atac.fastqs': [1200000000, 1200000000, 1500000000, 1500000000],
         'atac.bowtie2_idx_tar': 5000000000
     }
     res = B.benchmark(
         'encode-atacseq-aln', {
             'input_size_in_bytes': input_sizes,
             'parameters': {
                 'atac.bowtie2.cpu': 4,
                 'atac.paired_end': True
             }
         })
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'c5.xlarge'
Пример #26
0
 def test_benchmark_atacseq(self):
     print("testing atacseq")
     input_sizes = {
         'atac.fastqs': [2000000000, 3000000000],
         'atac.bowtie2_idx_tar': 5000000000
     }
     res = B.benchmark(
         'encode-atacseq', {
             'input_size_in_bytes': input_sizes,
             'parameters': {
                 'atac.bowtie2.cpu': 4
             }
         })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
     assert res['min_CPU'] == 6
     assert int(res['total_size_in_GB']) == 55
Пример #27
0
 def test_benchmark13(self):
     input_json = {
         'input_size_in_bytes': {
             'input_pairs': [1000000000, 2000000000, 3000000000]
         },
         'parameters': {
             'nthreads': 8,
             'maxmem': '32g'
         }
     }
     res = B.benchmark('hi-c-processing-pairs', input_json)
     print('hi-c-processing-pairs')
     print("benchmark13")
     print(res)
     assert 'aws' in res
     assert res['min_CPU'] == 8
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'r5a.2xlarge'
Пример #28
0
 def test_benchmark_chipseq_postaln2(self):
     print("testing chipseq")
     input_sizes = {
         'chip.tas': [MB2B(115.37), MB2B(115.37)],
         'chip.ctl_tas': [MB2B(220.56), MB2B(220.56)],
         'chip.bam2ta_no_filt_R1.ta': [MB2B(140.59),
                                       MB2B(140.59)]
     }
     res = B.benchmark(
         'encode-chipseq-postaln', {
             'input_size_in_bytes': input_sizes,
             'parameters': {
                 'chip.spp_cpu': 4,
                 'chip.pipeline_type': 'tf'
             }
         })
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 'r5a.4xlarge'
Пример #29
0
 def test_benchmark12(self):
     input_sizes = {
         'input_bams': [1000000000, 2000000000],
         'chromsize': 200000
     }
     input_json = {
         'input_size_in_bytes': input_sizes,
         'parameters': {
             'nthreads_parse_sort': 1,
             'nthreads_merge': 8
         }
     }
     res = B.benchmark('hi-c-processing-bam', input_json)
     print('hi-c-processing-bam')
     print("benchmark12")
     print(res)
     assert 'aws' in res
     assert 'recommended_instance_type' in res['aws']
     assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
     assert res['min_CPU'] == 8
Пример #30
0
def update_config(cfg, app_name, input_files, parameters):
    # deal with missing fields
    if "instance_type" not in cfg:
        cfg["instance_type"] = ""
    if "ebs_size" not in cfg:
        cfg["ebs_size"] = 0
    if "EBS_optimized" not in cfg:
        cfg['EBS_optimized'] = ""
    if "ebs_type" not in cfg:
        cfg['ebs_type'] = 'gp2'
    if "ebs_iops" not in cfg:
        cfg['ebs_iops'] = ''
    if "shutdown_min" not in cfg:
        cfg['shutdown_min'] = 'now'
    if 'password' not in cfg:
        cfg['password'] = ''
    if 'key_name' not in cfg:
        cfg['key_name'] = ''
    # add benchmarking result
    if cfg['instance_type'] != '' and cfg['ebs_size'] != 0 and cfg[
            'EBS_optimized'] != '':
        pass
    else:
        input_size_in_bytes = dict()
        for argname, f in input_files.iteritems():
            bucket = f['bucket_name']
            if isinstance(f['object_key'], list):
                size = flatten(
                    run_on_nested_arrays1(f['object_key'], get_file_size,
                                          **{'bucket': bucket}))
            else:
                size = get_file_size(f['object_key'], bucket)
            input_size_in_bytes.update({str(argname): size})

        print({"input_size_in_bytes": input_size_in_bytes})
        if not app_name:
            err_msg = "app_name must be provided to use Benchmarking." + \
                      "Without app_name, instance_type, ebs_size and EBS_optimized must be" + \
                      "in the config field of the execution json."
            raise Exception(err_msg)
        try:
            res = B.benchmark(
                app_name, {
                    'input_size_in_bytes': input_size_in_bytes,
                    'parameters': parameters
                })
        except:
            try:
                res
                raise Exception("Benchmarking not working. : {}".format(
                    str(res)))
            except:
                raise Exception("Benchmarking not working. : None")

        if res is not None:
            logger.info(str(res))
            instance_type = res['aws']['recommended_instance_type']
            ebs_size = 10 if res['total_size_in_GB'] < 10 else int(
                res['total_size_in_GB']) + 1
            ebs_opt = res['aws']['EBS_optimized']

            if cfg['instance_type'] == '':
                cfg['instance_type'] = instance_type
            if cfg['ebs_size'] == 0:
                cfg['ebs_size'] = ebs_size
            if cfg['EBS_optimized'] == '':
                cfg['EBS_optimized'] = ebs_opt

        elif cfg['instance_type'] == '':
            raise Exception("instance type cannot be determined nor given")
        elif cfg['ebs_size'] == 0:
            raise Exception("ebs_size cannot be determined nor given")
        elif cfg['EBS_optimized'] == '':
            raise Exception("EBS_optimized cannot be determined nor given")