示例#1
0
def run_dataset(prob_label):
    """Run the experiment"""
    sample_source, n = get_sample_source(prob_label)

    # ///////  submit jobs //////////
    # create folder name string
    home = os.path.expanduser("~")
    foldername = os.path.join(home, "freqopttest_slurm", 'e%d'%ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(
        foldername=foldername, job_name_base="e%d_"%ex, parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters, do_clean_up=True)
    n_methods = len(method_job_funcs)
    # repetitions x  #methods
    aggregators = np.empty((reps, n_methods ), dtype=object)
    d = sample_source.dim()
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.'%fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)
                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    test_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d)" % (f.__name__, r ))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            test_result = aggregators[r, mi].get_final_result().result
            test_results[r, mi] = test_result

            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            glo.ex_save_result(ex, test_result, prob_label, fname)

    func_names = [f.__name__ for f in method_job_funcs]
    func2labels = exglobal.get_func2label_map()
    method_labels = [func2labels[f] for f in func_names if f in func2labels]
    # save results 
    results = {'results': test_results, 'n': n, 'data_fname':label2fname[prob_label],
            'alpha': alpha, 'J': J, 'sample_source': sample_source, 
            'tr_proportion': 0.5, 'method_job_funcs': method_job_funcs, 
            'prob_label': prob_label, 'method_labels': method_labels}
    
    # class name 
    fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s'%fname)
示例#2
0
def run_dataset(prob_label):
    """Run the experiment"""
    sample_source, n = get_sample_source(prob_label)

    # ///////  submit jobs //////////
    # create folder name string
    home = os.path.expanduser("~")
    foldername = os.path.join(home, "freqopttest_slurm", 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters, do_clean_up=True)
    n_methods = len(method_job_funcs)
    # repetitions x  #methods
    aggregators = np.empty((reps, n_methods), dtype=object)
    d = sample_source.dim()
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.' % fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)
                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex5Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    test_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d)" % (f.__name__, r))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            test_result = aggregators[r, mi].get_final_result().result
            test_results[r, mi] = test_result

            func_name = f.__name__
            fname = '%s-%s-J%d_r%d_d%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            glo.ex_save_result(ex, test_result, prob_label, fname)

    func_names = [f.__name__ for f in method_job_funcs]
    func2labels = exglobal.get_func2label_map()
    method_labels = [func2labels[f] for f in func_names if f in func2labels]
    # save results
    results = {
        'results': test_results,
        'n': n,
        'data_fname': label2fname[prob_label],
        'alpha': alpha,
        'J': J,
        'sample_source': sample_source,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
        'method_labels': method_labels
    }

    # class name
    fname = 'ex%d-%s-me%d_J%d_rs%d_nma%d_d%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, J, reps, n, d, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#3
0
def run_problem(prob_label):
    """Run the experiment"""
    L = get_pqsource_list(prob_label)
    prob_params, ps, data_sources = zip(*L)
    # make them lists
    prob_params = list(prob_params)
    ps = list(ps)
    data_sources = list(data_sources)

    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    from kgof.config import expr_configs
    tmp_dir = expr_configs['scratch_path']
    foldername = os.path.join(tmp_dir, 'kgof_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    #engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    n_methods = len(method_job_funcs)
    # repetitions x len(prob_params) x #methods
    aggregators = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \
                    %(prob_label, func_name, sample_size, r, param, alpha,
                            tr_proportion)
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info('%s exists. Load and return.' % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, pi, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    p = ps[pi]
                    job = Ex2Job(SingleResultAggregator(), p, data_sources[pi],
                                 prob_label, r, f, param)
                    agg = engine.submit_job(job)
                    aggregators[r, pi, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, param=%.3g)" %
                            (f.__name__, r, param))
                # let the aggregator finalize things
                aggregators[r, pi, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, pi, mi].get_final_result().result
                job_results[r, pi, mi] = job_result

    #func_names = [f.__name__ for f in method_job_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        'job_results': job_results,
        'prob_params': prob_params,
        'alpha': alpha,
        'repeats': reps,
        'ps': ps,
        'list_data_source': data_sources,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
        'sample_size': sample_size,
    }

    # class name
    fname = 'ex%d-%s-me%d_n%d_rs%d_pmi%g_pma%g_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, sample_size, reps, min(prob_params),
                max(prob_params), alpha, tr_proportion)

    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#4
0
def run_problem(folder_path, prob_label):
    """Run the experiment"""

    pl = exglo.parse_prob_label(prob_label)
    is_h0 = pl['is_h0']
    n = pl['n']
    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    #tmp_dir = tempfile.gettempdir()
    from fsic.config import expr_configs
    tmp_dir = expr_configs['scratch_dir']
    foldername = os.path.join(tmp_dir, 'wj_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x sample_sizes x #methods
    aggregators = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            # name used to save the result
            func_name = f.__name__
            fname = '%s-%s-r%d_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, r, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.' % fname)
                job_result = glo.ex_load_result(ex, prob_label, fname)

                sra = SingleResultAggregator()
                sra.submit_result(SingleResult(job_result))
                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex4Job(SingleResultAggregator(), folder_path, prob_label,
                             r, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d, n=%d)" %
                        (f.__name__, r, n))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()

            # aggregators[i].get_final_result() returns a SingleResult instance,
            # which we need to extract the actual result
            job_result = aggregators[r, mi].get_final_result().result
            job_results[r, mi] = job_result

    #func_names = [f.__name__ for f in method_job_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    # - Do not store PairedSource because it can be very big.
    results = {
        'job_results': job_results,
        'n': n,
        'is_h0': is_h0,
        'alpha': alpha,
        'repeats': reps,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
    }

    # class name
    fname = 'ex%d-%s-me%d_rs%d_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, reps, alpha, tr_proportion)
    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#5
0
def run_problem(prob_label):
    """Run the experiment"""
    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    from kmod.config import expr_configs
    tmp_dir = expr_configs['scratch_path']
    foldername = os.path.join(tmp_dir, 'kmod_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    partitions = expr_configs['slurm_partitions']
    if partitions is None:
        engine = SlurmComputationEngine(batch_parameters)
    else:
        engine = SlurmComputationEngine(batch_parameters, partition=partitions)
    n_methods = len(method_funcs)

    # problem setting
    ns, P, Q, ds, = get_ns_pqrsource(prob_label)

    # repetitions x len(ns) x #methods
    aggregators = np.empty((reps, len(ns), n_methods), dtype=object)

    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = '%s-%s-n%d_r%d_a%.3f.p' \
                        %(prob_label, func_name, n, r, alpha,)
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info('%s exists. Load and return.' % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, ni, mi] = sra
                else:
                    # result not exists or rerun
                    job = Ex1Job(SingleResultAggregator(), P, Q, ds,
                                 prob_label, r, f, n)

                    agg = engine.submit_job(job)
                    aggregators[r, ni, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_funcs):
                logger.info("Collecting result (%s, r=%d, n=%d)" %
                            (f.__name__, r, n))
                # let the aggregator finalize things
                aggregators[r, ni, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, ni, mi].get_final_result().result
                job_results[r, ni, mi] = job_result

    #func_names = [f.__name__ for f in method_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        'job_results': job_results,
        'P': P,
        'Q': Q,
        'data_source': ds,
        'alpha': alpha,
        'repeats': reps,
        'ns': ns,
        'method_funcs': method_funcs,
        'prob_label': prob_label,
    }

    # class name
    fname = 'ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f.p' \
        %(ex, prob_label, n_methods, reps, min(ns), max(ns), alpha,)

    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
示例#6
0
def run_problem(prob_label):
    """Run the experiment"""
    ns, p, ds = get_ns_pqsource(prob_label)
    # ///////  submit jobs //////////
    # create folder name string
    # result_folder = glo.result_folder()
    from sbibm.third_party.kgof.config import expr_configs

    tmp_dir = expr_configs["scratch_path"]
    foldername = os.path.join(tmp_dir, "kgof_slurm", "e%d" % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    # engine = SerialComputationEngine()
    # engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    engine = SlurmComputationEngine(batch_parameters)
    n_methods = len(method_job_funcs)
    # repetitions x len(ns) x #methods
    aggregators = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = "%s-%s-n%d_r%d_a%.3f_trp%.2f.p" % (
                    prob_label,
                    func_name,
                    n,
                    r,
                    alpha,
                    tr_proportion,
                )
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info("%s exists. Load and return." % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, ni, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    job = Ex1Job(SingleResultAggregator(), p, ds, prob_label,
                                 r, f, n)
                    agg = engine.submit_job(job)
                    aggregators[r, ni, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(ns), n_methods), dtype=object)
    for r in range(reps):
        for ni, n in enumerate(ns):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, n=%rd)" %
                            (f.__name__, r, n))
                # let the aggregator finalize things
                aggregators[r, ni, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, ni, mi].get_final_result().result
                job_results[r, ni, mi] = job_result

    # func_names = [f.__name__ for f in method_job_funcs]
    # func2labels = exglobal.get_func2label_map()
    # method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        "job_results": job_results,
        "data_source": ds,
        "alpha": alpha,
        "repeats": reps,
        "ns": ns,
        "p": p,
        "tr_proportion": tr_proportion,
        "method_job_funcs": method_job_funcs,
        "prob_label": prob_label,
    }

    # class name
    fname = "ex%d-%s-me%d_rs%d_nmi%d_nma%d_a%.3f_trp%.2f.p" % (
        ex,
        prob_label,
        n_methods,
        reps,
        min(ns),
        max(ns),
        alpha,
        tr_proportion,
    )

    glo.ex_save_result(ex, results, fname)
    logger.info("Saved aggregated results to %s" % fname)
示例#7
0
                %(prob_label, func_name, J, r, d, alpha, tr_proportion)
            if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                logger.info('%s exists. Load and return.'%fname)
                test_result = glo.ex_load_result(ex, prob_label, fname)

                sra = SingleResultAggregator()
                if test_result is SingleResult:
                    sra.submit_result(test_result)
                else:
                    sra.submit_result(SingleResult(test_result))

                aggregators[r, mi] = sra
            else:
                # result not exists or rerun
                job = Ex4Job(SingleResultAggregator(), prob_label, r, n, f)
                agg = engine.submit_job(job)
                aggregators[r, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    test_results = np.empty((reps, n_methods), dtype=object)
    for r in range(reps):
        for mi, f in enumerate(method_job_funcs):
            logger.info("Collecting result (%s, r=%d)" % (f.__name__, r ))
            # let the aggregator finalize things
            aggregators[r, mi].finalize()