Exemplo n.º 1
0
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None):
    results = glo.ex_load_result(ex, fname)
    value_accessor = lambda job_results: job_results['time_secs']
    vf_pval = np.vectorize(value_accessor)
    # results['job_results'] is a dictionary:
    # {'test_result': (dict from running perform_test(te) '...':..., }
    times = vf_pval(results['job_results'])
    repeats, _, n_methods = results['job_results'].shape
    time_avg = np.mean(times, axis=0)
    time_std = np.std(times, axis=0)

    xvalues = func_xvalues(results)

    #ns = np.array(results[xkey])
    #te_proportion = 1.0 - results['tr_proportion']
    #test_sizes = ns*te_proportion
    line_styles = func_plot_fmt_map()
    method_labels = get_func2label_map()

    func_names = [f.__name__ for f in results['method_job_funcs']]
    for i in range(n_methods):
        te_proportion = 1.0 - results['tr_proportion']
        fmt = line_styles[func_names[i]]
        #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plt.errorbar(xvalues,
                     time_avg[:, i],
                     yerr=time_std[:, i],
                     fmt=fmt,
                     label=method_label)

    ylabel = 'Time (s)'
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.xlim([np.min(xvalues), np.max(xvalues)])
    plt.xticks(xvalues, xvalues)
    plt.legend(loc='best')
    plt.gca().set_yscale('log')
    title = '%s. %d trials. ' % (
        results['prob_label'],
        repeats) if func_title is None else func_title(results)
    plt.title(title)
    #plt.grid()
    return results
Exemplo n.º 2
0
def run_problem(prob_label):
    """Run the experiment"""
    L = get_pqsource_list(prob_label)
    prob_params, ps, data_sources = zip(*L)
    # make them lists
    prob_params = list(prob_params)
    ps = list(ps)
    data_sources = list(data_sources)

    # ///////  submit jobs //////////
    # create folder name string
    #result_folder = glo.result_folder()
    from kgof.config import expr_configs
    tmp_dir = expr_configs['scratch_path']
    foldername = os.path.join(tmp_dir, 'kgof_slurm', 'e%d' % ex)
    logger.info("Setting engine folder to %s" % foldername)

    # create parameter instance that is needed for any batch computation engine
    logger.info("Creating batch parameter instance")
    batch_parameters = BatchClusterParameters(foldername=foldername,
                                              job_name_base="e%d_" % ex,
                                              parameter_prefix="")

    # Use the following line if Slurm queue is not used.
    #engine = SerialComputationEngine()
    engine = SlurmComputationEngine(batch_parameters)
    #engine = SlurmComputationEngine(batch_parameters, partition='wrkstn,compute')
    n_methods = len(method_job_funcs)
    # repetitions x len(prob_params) x #methods
    aggregators = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                # name used to save the result
                func_name = f.__name__
                fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \
                    %(prob_label, func_name, sample_size, r, param, alpha,
                            tr_proportion)
                if not is_rerun and glo.ex_file_exists(ex, prob_label, fname):
                    logger.info('%s exists. Load and return.' % fname)
                    job_result = glo.ex_load_result(ex, prob_label, fname)

                    sra = SingleResultAggregator()
                    sra.submit_result(SingleResult(job_result))
                    aggregators[r, pi, mi] = sra
                else:
                    # result not exists or rerun

                    # p: an UnnormalizedDensity object
                    p = ps[pi]
                    job = Ex2Job(SingleResultAggregator(), p, data_sources[pi],
                                 prob_label, r, f, param)
                    agg = engine.submit_job(job)
                    aggregators[r, pi, mi] = agg

    # let the engine finish its business
    logger.info("Wait for all call in engine")
    engine.wait_for_all()

    # ////// collect the results ///////////
    logger.info("Collecting results")
    job_results = np.empty((reps, len(prob_params), n_methods), dtype=object)
    for r in range(reps):
        for pi, param in enumerate(prob_params):
            for mi, f in enumerate(method_job_funcs):
                logger.info("Collecting result (%s, r=%d, param=%.3g)" %
                            (f.__name__, r, param))
                # let the aggregator finalize things
                aggregators[r, pi, mi].finalize()

                # aggregators[i].get_final_result() returns a SingleResult instance,
                # which we need to extract the actual result
                job_result = aggregators[r, pi, mi].get_final_result().result
                job_results[r, pi, mi] = job_result

    #func_names = [f.__name__ for f in method_job_funcs]
    #func2labels = exglobal.get_func2label_map()
    #method_labels = [func2labels[f] for f in func_names if f in func2labels]

    # save results
    results = {
        'job_results': job_results,
        'prob_params': prob_params,
        'alpha': alpha,
        'repeats': reps,
        'ps': ps,
        'list_data_source': data_sources,
        'tr_proportion': tr_proportion,
        'method_job_funcs': method_job_funcs,
        'prob_label': prob_label,
        'sample_size': sample_size,
    }

    # class name
    fname = 'ex%d-%s-me%d_n%d_rs%d_pmi%g_pma%g_a%.3f_trp%.2f.p' \
        %(ex, prob_label, n_methods, sample_size, reps, min(prob_params),
                max(prob_params), alpha, tr_proportion)

    glo.ex_save_result(ex, results, fname)
    logger.info('Saved aggregated results to %s' % fname)
Exemplo n.º 3
0
def plot_prob_reject(ex,
                     fname,
                     func_xvalues,
                     xlabel,
                     func_title=None,
                     return_plot_values=False):
    """
    plot the empirical probability that the statistic is above the threshold.
    This can be interpreted as type-1 error (when H0 is true) or test power 
    (when H1 is true). The plot is against the specified x-axis.

    - ex: experiment number 
    - fname: file name of the aggregated result
    - func_xvalues: function taking aggregated results dictionary and return the values 
        to be used for the x-axis values.            
    - xlabel: label of the x-axis. 
    - func_title: a function: results dictionary -> title of the plot
    - return_plot_values: if true, also return a PlotValues as the second
      output value.

    Return loaded results
    """
    #from IPython.core.debugger import Tracer
    #Tracer()()

    results = glo.ex_load_result(ex, fname)

    def rej_accessor(jr):
        rej = jr['test_result']['h0_rejected']
        # When used with vectorize(), making the value float will make the resulting
        # numpy array to be of float. nan values can be stored.
        return float(rej)

    #value_accessor = lambda job_results: job_results['test_result']['h0_rejected']
    vf_pval = np.vectorize(rej_accessor)
    # results['job_results'] is a dictionary:
    # {'test_result': (dict from running perform_test(te) '...':..., }
    rejs = vf_pval(results['job_results'])
    repeats, _, n_methods = results['job_results'].shape

    # yvalues (corresponding to xvalues) x #methods
    mean_rejs = np.mean(rejs, axis=0)
    #print mean_rejs
    #std_pvals = np.std(rejs, axis=0)
    #std_pvals = np.sqrt(mean_rejs*(1.0-mean_rejs))

    xvalues = func_xvalues(results)

    #ns = np.array(results[xkey])
    #te_proportion = 1.0 - results['tr_proportion']
    #test_sizes = ns*te_proportion
    line_styles = func_plot_fmt_map()
    method_labels = get_func2label_map()

    func_names = [f.__name__ for f in results['method_job_funcs']]
    plotted_methods = []
    for i in range(n_methods):
        te_proportion = 1.0 - results['tr_proportion']
        fmt = line_styles[func_names[i]]
        #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plotted_methods.append(method_label)
        plt.plot(xvalues, mean_rejs[:, i], fmt, label=method_label)
    '''
    else:
        # h0 is true 
        z = stats.norm.isf( (1-confidence)/2.0)
        for i in range(n_methods):
            phat = mean_rejs[:, i]
            conf_iv = z*(phat*(1-phat)/repeats)**0.5
            #plt.errorbar(test_sizes, phat, conf_iv, fmt=line_styles[i], label=method_labels[i])
            plt.plot(test_sizes, mean_rejs[:, i], line_styles[i], label=method_labels[i])
    '''

    ylabel = 'Rejection rate'
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.xticks(np.hstack((xvalues)))

    alpha = results['alpha']
    plt.legend(loc='best')
    title = '%s. %d trials. $\\alpha$ = %.2g.' % (
        results['prob_label'], repeats,
        alpha) if func_title is None else func_title(results)
    plt.title(title)
    plt.grid()
    if return_plot_values:
        return results, PlotValues(xvalues=xvalues,
                                   methods=plotted_methods,
                                   plot_matrix=mean_rejs.T)
    else:
        return results