示例#1
0
文件: MNIST.py 项目: halo8218/MNIST
def MBAP(pruning_rate_per_layer, is_first, is_last):
    _, mask_1 = utils.prune(L1, pruning_rate_per_layer)
    L1_ap = tf.cond(is_first, lambda: L1 * mask_1, lambda: L1)
    L2_ap = tf.nn.relu(tf.matmul(L1_ap, W2) + B2)
    _, mask_2 = utils.prune(L2_ap, pruning_rate_per_layer)
    pruned_L2_ap = tf.cond(is_last, lambda: L2_ap * mask_2, lambda: L2_ap)
    model_ap = tf.matmul(pruned_L2_ap, W3) + B3
    return model_ap
示例#2
0
def add_event():
    if request.method == 'GET':
        return render_template('forms/add_event.html', event=None)

    form = request.form.copy()
    utils.prune(form)

    utils.format_time(form, 'start')
    utils.format_time(form, 'end')

    events.add(form)
    return redirect(url_for('hub'))
示例#3
0
def edit_event():
    if request.method == 'GET':
        ID = request.args.get('ID')
        event = events.get(ID)
        utils.reverse_format_time(event, 'start')
        utils.reverse_format_time(event, 'end')
        return render_template('forms/add_event.html', event=event)

    form = request.form.copy()
    utils.prune(form)
    utils.format_time(form, 'start')
    utils.format_time(form, 'end')
    events.update(form['ID'], form)
    return redirect(url_for('admin'))
示例#4
0
文件: MNIST.py 项目: halo8218/MNIST
def MBFD(pruning_rate_per_layer):
    #L1_mb, _ = utils.prune(L1, pruning_rate_per_layer)
    L1_mb = L1
    L2_mb_prev = tf.matmul(L1_mb, W2)
    pruned_L2_mb_prev, _ = utils.prune(L2_mb_prev, pruning_rate_per_layer)
    L2_mb = tf.nn.relu(pruned_L2_mb_prev + B2)
    model_mb = tf.matmul(L2_mb, W3) + B3
    return model_mb
示例#5
0
文件: MNIST.py 项目: halo8218/MNIST
def MFD(pruning_rate_per_layer):
    adv_feat_mf_1, adv_feat_mf_2 = compare(is_grad_compare)
    #L1_mf = L1 * utils.mask_vec(adjusted_feat_1, pruning_rate_per_layer)
    L1_mf = L1
    L2_mf_prev = tf.matmul(L1_mf, W2)
    _, mask = utils.prune(L2_mf_prev / adv_feat_mf_2, pruning_rate_per_layer)
    L2_mf_prev = L2_mf_prev * mask
    L2_mf = tf.nn.relu(L2_mf_prev + B2)
    model_mf = tf.matmul(L2_mf, W3) + B3
    return model_mf
示例#6
0
文件: CIFAR10.py 项目: halo8218/MNIST
def MBAP(pruning_rate_per_layer, is_first):
    _, mask_1 = utils.prune_conv_feature(h_conv1, pruning_rate_per_layer)
    h_conv1_ap = tf.cond(is_first, lambda: h_conv1 * mask_1, lambda: h_conv1)
    h_pool1_ap = tf.nn.max_pool(h_conv1_ap,
                                ksize=[1, 3, 3, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')
    h_conv2_ap_prev = tf.nn.conv2d(h_pool1_ap,
                                   W_conv2,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    h_conv2_ap = tf.nn.relu(h_conv2_ap_prev + b_conv2)
    h_conv2_ap, _ = utils.prune_conv_feature(h_conv2_ap,
                                             pruning_rate_per_layer)
    h_pool2_ap = tf.nn.max_pool(h_conv2_ap,
                                ksize=[1, 3, 3, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')
    h_conv3_ap_prev = tf.nn.conv2d(h_pool2_ap,
                                   W_conv3,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    h_conv3_ap = tf.nn.relu(h_conv3_ap_prev + b_conv3)
    h_conv3_ap, _ = utils.prune_conv_feature(h_conv3_ap,
                                             pruning_rate_per_layer)
    h_conv4_ap_prev = tf.nn.conv2d(h_conv3_ap,
                                   W_conv4,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    h_conv4_ap = tf.nn.relu(h_conv4_ap_prev + b_conv4)
    h_conv4_ap, _ = utils.prune_conv_feature(h_conv4_ap,
                                             pruning_rate_per_layer)
    h_conv5_ap_prev = tf.nn.conv2d(h_conv4_ap,
                                   W_conv5,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
    h_conv5_ap = tf.nn.relu(h_conv5_ap_prev + b_conv5)
    h_conv5_ap, _ = utils.prune_conv_feature(h_conv5_ap,
                                             pruning_rate_per_layer)
    h_conv5_ap_flat = tf.reshape(h_conv5_ap, [-1, 8 * 8 * 128])
    h_fc1_ap_prev = tf.matmul(h_conv5_ap_flat, W_fc1)
    h_fc1_ap = tf.nn.relu(h_fc1_ap_prev + b_fc1)
    h_fc1_ap, _ = utils.prune(h_fc1_ap, pruning_rate_per_layer)

    model_ap = tf.matmul(h_fc1_ap, W_fc2) + b_fc2

    return model_ap
    def prune(splited):
        # Prune split with empty sentence
        old2new = dict()
        new_splited = list()
        for (idx, span) in enumerate(splited):
            span['text'] = utils.prune(span['text'])
            if span['text'] == '':
                continue
            old2new[idx] = len(new_splited)
            new_splited.append(span)

        # Update ifobj after prune
        for item in new_splited:
            if item['ifobj'] is not None:
                item['ifobj'] = [old2new[obj] for obj in item['ifobj'] if obj in old2new]
        
        return new_splited
示例#8
0
    def __init__(self,
                 base_dir,
                 batch_size,
                 mode=1,
                 cls=1,
                 prune=None,
                 de_norm=False):
        TRAIN = 1
        TEST = 2

        self.base_dir = base_dir
        self.batch_size = batch_size
        ds_dir = os.path.join(self.base_dir, 'dataset/class_{}'.format(cls))
        if mode == TRAIN:
            self.x = utils.pickle_load(ds_dir + '/imgs_train.pkl')
            self.y = utils.pickle_load(ds_dir + '/marks_train.pkl')
        elif mode == TEST:
            self.x = utils.pickle_load(ds_dir + '/imgs_test.pkl')
            self.y = utils.pickle_load(ds_dir + '/marks_test.pkl')
        else:
            raise ("Invalid option, should be one {} or {}".format(
                TRAIN, TEST))

        if de_norm:
            self.x = utils.de_norm(self.x)
            self.y = utils.de_norm(self.y)

        self.labels = np.array(
            [1 if np.sum(mask) > 0 else 0 for mask in self.y])

        if prune is not None:
            self.x, self.y, self.labels = utils.prune(self.x, self.y,
                                                      self.labels, prune)

        self.x = utils.norm(self.x)
        self.y = utils.norm(self.y)
        self.classes = np.unique(self.labels)
        self.per_class_ids = {}
        ids = np.array(range(len(self.x)))
        for c in self.classes:
            self.per_class_ids[c] = ids[self.labels == c]

        print(Counter(self.labels))
示例#9
0
def plot_jct_cdf(logfile_paths,
                 labels,
                 v100s,
                 p100s,
                 k80s,
                 max_input_job_rate,
                 policies,
                 min_job_id,
                 max_job_id,
                 partition=True,
                 finish_time_fairness=False,
                 output_directory=None):
    from utils import get_jcts, prune

    lambdas = list(set([x[5] for x in logfile_paths]))
    lambdas.sort(reverse=True)
    print(policies)

    for l in lambdas:
        handles_in_legend = []
        labels_in_legend = []

        input_job_rate = 3600.0 / l
        if input_job_rate > max_input_job_rate:
            continue
        print("Input job rate: %.2f" % input_job_rate)

        plt.figure(figsize=(8, 3))
        if partition:
            axes = [
                plt.subplot2grid((1, 2), (0, 0), rowspan=1),
                plt.subplot2grid((1, 2), (0, 1), rowspan=1),
            ]
            titles = ["Short jobs", "Long jobs"]
        else:
            axes = [
                plt.subplot2grid((1, 1), (0, 0), rowspan=1),
            ]
            titles = [None]
            if not finish_time_fairness:
                axes.append(axes[0].inset_axes([0.4, 0.2, 0.5, 0.6]))
                titles.append(None)

        if finish_time_fairness:
            relevant_logfile_paths = list(
                reversed(
                    prune(logfile_paths,
                          v100s,
                          p100s,
                          k80s,
                          "isolated",
                          seed=0)))
            relevant_logfile_paths = [
                x for x in relevant_logfile_paths if x[0] == l
            ]
            if len(relevant_logfile_paths) != 1:
                continue
            isolated_jcts = get_jcts(relevant_logfile_paths[0][1],
                                     seed=0,
                                     min_job_id=min_job_id,
                                     max_job_id=max_job_id)
            isolated_jcts.sort(key=lambda x: x[1])
        linestyles = ['--', '-.', ':', '--', '-.']
        for i, policy in enumerate(policies):
            relevant_logfile_paths = list(
                reversed(
                    prune(logfile_paths, v100s, p100s, k80s, policy, seed=0)))
            relevant_logfile_paths = [
                x for x in relevant_logfile_paths if x[0] == l
            ]
            if len(relevant_logfile_paths) != 1:
                continue
            jcts = get_jcts(relevant_logfile_paths[0][1],
                            seed=0,
                            min_job_id=min_job_id,
                            max_job_id=max_job_id)
            jcts.sort(key=lambda x: x[1])
            partition_point = int(len(jcts) * 0.8)
            if finish_time_fairness:
                jcts = [x[0] / y[0] for (x, y) in zip(jcts, isolated_jcts)]
            else:
                jcts = [x[0] for x in jcts]

            print("%s: %.2f" % (policy, np.mean(jcts)))
            if partition:
                jcts = np.split(np.array(jcts), [partition_point])
            else:
                jcts = [np.array(jcts), np.array(jcts)]
            for j, (ax, jcts_segment) in enumerate(zip(axes, jcts)):
                jcts_segment.sort()
                percentiles = [(k + 1) / len(jcts_segment)
                               for k in range(len(jcts_segment))]

                if "Gavel" in labels[policy]:
                    handle = ax.plot(jcts_segment,
                                     percentiles,
                                     color=current_palette[i],
                                     linestyle='-',
                                     linewidth=3)
                else:
                    handle = ax.plot(jcts_segment,
                                     percentiles,
                                     color=current_palette[i],
                                     linestyle=linestyles[i])
                if j == 0:
                    handles_in_legend.append(handle[0])
                    labels_in_legend.append(labels[policy])

        for i, (ax, title) in enumerate(zip(axes, titles)):
            if finish_time_fairness:
                if partition:
                    ax.set_xlabel("FTF" + "\n" + title)
                else:
                    ax.set_xlabel("FTF")
                ax.set_xlim([0, 4])
                ax.set_xticks([0, 1, 2, 3, 4])
                ax.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
            else:
                if partition:
                    ax.set_xlabel("JCT (hrs)" + "\n" + title)
                else:
                    if i == 0:
                        ax.set_xlabel("JCT (hrs)")
                if not partition:
                    if i == 0:
                        ax.set_xlim([0, 500])
                        ax.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
                    else:
                        ax.set_xlim([0, 25])
                        ax.set_xticks([0, 5, 10, 15, 20, 25])
                        ax.set_yticks([0, 0.33, 0.67, 1.0])
            if i == 0:
                ax.set_ylabel("Fraction of jobs")
            if partition:
                if i > 0:
                    ax.set_yticklabels(["", "", "", "", "", ""])
            ax.set_ylim([0, 1.0])
        sns.despine()

        if not partition and not finish_time_fairness:
            axes[0].indicate_inset_zoom(axes[1], linewidth=3)

        leg = plt.figlegend(handles=handles_in_legend,
                            labels=labels_in_legend,
                            ncol=3,
                            frameon=False,
                            loc='upper center')

        bb = leg.get_bbox_to_anchor().inverse_transformed(axes[0].transAxes)
        bb.y0 += 0.22
        bb.y1 += 0.22
        leg.set_bbox_to_anchor(bb, transform=axes[0].transAxes)

        if output_directory is not None:
            output_filename = os.path.join(
                output_directory,
                "input_job_rate=%d.pdf" % (input_job_rate * 10))
            with PdfPages(output_filename) as pdf:
                pdf.savefig(bbox_inches='tight')

        plt.show()
示例#10
0
def plot_metric_vs_inverse_lambda_different_metric_fns(logfile_paths,
                                                       labels,
                                                       v100s,
                                                       p100s,
                                                       k80s,
                                                       policies,
                                                       metric_fns,
                                                       metric_fn_labels,
                                                       metric_label,
                                                       xmin=0,
                                                       xmax=None,
                                                       ymin=0,
                                                       ymax=None,
                                                       output_filename=None,
                                                       verbose=False):
    from utils import prune

    plt.figure(figsize=(8, 3))
    ax = plt.subplot2grid((1, 1), (0, 0), colspan=1)

    data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []}
    print(policies)
    for policy in policies:
        relevant_logfile_paths = list(
            reversed(prune(logfile_paths, v100s, p100s, k80s, policy)))
        for metric_fn_label, metric_fn in zip(metric_fn_labels, metric_fns):
            lambdas = [x[0] for x in relevant_logfile_paths]
            input_job_rates = [3600.0 / x for x in lambdas]
            metrics = [metric_fn(x[1]) for x in relevant_logfile_paths]
            seeds = [x[2] for x in relevant_logfile_paths]
            policies = [
                labels[policy] + " (%s)" % metric_fn_label
                for i in range(len(metrics))
            ]

            import pandas as pd
            data["input_job_rate"] += input_job_rates
            data["metric"] += metrics
            data["seed"] += seeds
            data["policy"] += policies
    if verbose:
        import pandas as pd
        df = pd.DataFrame(data)
        print(df.groupby(["policy", "input_job_rate"]).mean())

    sns.lineplot(x='input_job_rate',
                 y='metric',
                 style='policy',
                 hue='policy',
                 data=data,
                 ci='sd',
                 markers=True)

    ax.set_xlabel("Input job rate (jobs/hr)")
    ax.set_ylabel(metric_label)
    ax.set_xlim([xmin, xmax])
    ax.set_ylim([ymin, ymax])
    sns.despine()

    leg = plt.legend(frameon=False)
    bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
    bb.y0 += 0.22
    bb.y1 += 0.22
    leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

    if output_filename is not None:
        with PdfPages(output_filename) as pdf:
            pdf.savefig(bbox_inches='tight')

    plt.show()
示例#11
0
def plot_metric_vs_inverse_lambda_different_mechanisms(all_logfile_paths,
                                                       labels,
                                                       label_modifiers,
                                                       v100s,
                                                       p100s,
                                                       k80s,
                                                       policies,
                                                       metric_fn,
                                                       metric_label,
                                                       xmax=None,
                                                       ymax=None,
                                                       output_filename=None,
                                                       extrapolate=False):
    from utils import prune

    plt.figure(figsize=(4.5, 3))
    ax = plt.subplot2grid((1, 1), (0, 0), colspan=1)

    data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []}
    print(policies)
    for policy in policies:
        for logfile_paths, label_modifier in zip(all_logfile_paths,
                                                 label_modifiers):
            relevant_logfile_paths = list(
                reversed(prune(logfile_paths, v100s, p100s, k80s, policy)))
            label = labels[policy] + label_modifier

            lambdas = []
            metrics = []
            seeds = []
            for x in relevant_logfile_paths:
                metric = metric_fn(x[1])
                if metric is not None:
                    lambdas.append(x[0])
                    metrics.append(metric)
                    seeds.append(x[2])
            input_job_rates = [3600.0 / x for x in lambdas]

            policies = [label for i in range(len(metrics))]
            data["input_job_rate"] += input_job_rates
            data["metric"] += metrics
            data["seed"] += seeds
            data["policy"] += policies
            if len(input_job_rates) > 0 and extrapolate:
                data["input_job_rate"] += [max(input_job_rates) + 0.2]
                data["metric"] += [105.0]
                data["seed"] += [0]
                data["policy"] += [label]

    sns.lineplot(x='input_job_rate',
                 y='metric',
                 style='policy',
                 hue='policy',
                 data=data,
                 ci='sd',
                 markers=True)

    ax.set_xlabel("Input job rate (jobs/hr)")
    ax.set_ylabel(metric_label)
    ax.set_xlim([0, xmax])
    ax.set_ylim([0, ymax])
    sns.despine()

    leg = plt.legend(loc='upper left', frameon=False)
    bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
    bb.y0 += 0.1
    bb.y1 += 0.1
    leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

    if output_filename is not None:
        with PdfPages(output_filename) as pdf:
            pdf.savefig(bbox_inches='tight')

    plt.show()
示例#12
0
 def pruning(self, num_keep):
     with torch.no_grad():
         self.mask_normal = prune(self.alphas_normal, num_keep,
                                  self.mask_normal)
         self.mask_reduce = prune(self.alphas_reduce, num_keep,
                                  self.mask_reduce)
示例#13
0
 def pruning(self, num_keep):
   self._mask = prune(self._arch_parameters, num_keep, self._mask)
示例#14
0
def main(data, target, args):
    # Names for various stuff
    model_name = 'model_{0}_{1}_{2}.lp'.format(args.data, args.kernel, args.function)
    param_name = 'model_{0}_{1}_{2}.prm'.format(args.data, args.kernel, args.function)
    solution_name = 'solution_{0}_{1}_{2}.sol'.format(args.data, args.kernel, args.function)
    ultrametric_name = 'ultrametric_{0}_{1}_{2}'.format(args.data, args.kernel, args.function)
    var_name = 'var_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function)
    obj_name = 'obj_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data, args.kernel, args.function)
    laminar_name = 'laminar_{0}_{1}_{2}.pkl'.format(args.data, args.kernel, args.function)
    tree_name = 'lp_tree_{0}_{1}_{2}_{3}.pdf'.format(args.data, args.kernel, args.function, args.eps)
    err_dict_name = 'error_{0}_{1}_{2}_{3}.txt'.format(args.data, args.kernel, args.function, args.eps)


    # Test other hierarchical clustering algorithms
    one_target = map(lambda x: x + 1, target)
    k = args.prune
    y = pdist(data, metric='euclidean')
    Z = []
    Z.append(hac.linkage(y, method='single'))
    Z.append(hac.linkage(y, method='complete'))
    Z.append(hac.linkage(y, method='average'))
    ward = hac.linkage(data, method='ward')
    Z.append(ward)
    errors = []
    while Z:
        x = Z.pop(0)
        pred = hac.fcluster(x, k, 'maxclust')
        # print('pred = ', pred)
        err = utils.error(list(pred), one_target)
        errors.append(err)
    # K means
    clf = KMeans(k)
    pred = clf.fit_predict(data)
    pred = map(lambda x: x + 1, pred)
    err = utils.error(list(pred), one_target)
    errors.append(err)
    # print('kmeans = ', pred)
    error_dict = {'single linkage': errors[0], 'complete linkage': errors[1], 'average linkage': errors[2], 'ward': errors[3]}
    error_dict['kmeans'] = errors[4]
    print(error_dict)

    # initialize model
    if args.function == 'linear':
        m = init_model(data, args.kernel, args.triangle, utils.linear)
    if args.function == 'quadratic':
        m = init_model(data, args.kernel, args.triangle, utils.quadratic)
    if args.function == 'cubic':
        m = init_model(data, args.kernel, args.triangle, utils.cubic)
    if args.function == 'logarithm':
        m = init_model(data, args.kernel, args.triangle, utils.logarithm)
    if args.function == 'exponential':
        m = init_model(data, args.kernel, args.triangle, utils.exponential)
    m._n = data.shape[0]

    # Check if reading solution from file
    if args.solution:
        print('Reading LP solution from ', args.solution)
        solution_dict = read_solution(m, args.solution)
    else:
        start = time.time()
        print('Optimizing over model')
        m.optimize()
        flag = args.triangle
        while flag and time.time() - start < args.time:
            print("Time_diff = {}".format(time.time() - start))
            m.optimize()
            # Feed solution to separation oracle
            flag = separation_oracle(m, args.triangle)
        end = time.time()
        print('Total time to optimize = {0}'.format(end - start))
        print('Writing solution to ', solution_name)
        m.write(solution_name)
        print('Saving model to ', model_name)
        m.write(model_name)
        solution_dict = get_solution_dict(solution_name)

    # print('Triangle inequality satisfied: ', check_triangle_constraints(m))
    # print('Spreading constraints satisfied: ', check_spreading_constraints(m))

    # Get ultrametric from LP
    print('Rounding LP')
    if args.function == 'linear':
        d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.linear)
        utils.inverse_ultrametric(d, utils.inverse_linear)
    elif args.function == 'quadratic':
        d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.quadratic)
        utils.inverse_ultrametric(d, utils.inverse_quadratic)
    elif args.function == 'cubic':
        d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.cubic)
        utils.inverse_ultrametric(d, utils.inverse_cubic)
    elif args.function == 'exponential':
        d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.exponential)
        utils.inverse_ultrametric(d, utils.inverse_exponential)
    elif args.function == 'logarithm':
        d = get_ultrametric_from_lp(m, solution_dict, args.eps, utils.logarithm)
        utils.inverse_ultrametric(d, utils.inverse_logarithm)

    print('d = ', d)
    cost = utils.get_cost(m, d)
    print('Cost of hierarchy: ', cost)
    print('Check ultrametric: ', utils.check_ultrametric(d))
    # print(d)
    total_obj = utils.get_total(m)
    print('Total objective = ', total_obj)
    print('Scaled cost = ', cost/total_obj)

    utils.complete_ultrametric(d)
    print('Building laminar list')
    L = utils.build_laminar_list(d)
    # print('Laminar list = ', L)
    print('Check laminar: ', utils.test_laminar(L))
    labels = [1]*m._n
    pruned = utils.prune(L, one_target, k, labels)
    print('Error on pruning: ', pruned[0])
    error_dict['lp rounding'] = pruned[0]
    with open(err_dict_name, 'wb') as f:
        f.write(str(error_dict))

    # Build and draw the hierarchy
    G = utils.build_hierarchy(d)
    print('Drawing tree to ', tree_name)
    utils.draw(G, target, m._n, tree_name)
示例#15
0
def plot_metric_vs_inverse_lambda(logfile_paths,
                                  labels,
                                  v100s,
                                  p100s,
                                  k80s,
                                  policies,
                                  metric_fn,
                                  metric_label,
                                  xmax=None,
                                  ymax=None,
                                  output_filename=None,
                                  extrapolate=False,
                                  verbose=False):
    from utils import prune

    plt.figure(figsize=(8, 3))
    ax = plt.subplot2grid((1, 1), (0, 0), colspan=1)

    data = {"input_job_rate": [], "metric": [], "seed": [], "policy": []}
    print(policies)
    for policy in policies:
        relevant_logfile_paths = list(
            reversed(prune(logfile_paths, v100s, p100s, k80s, policy)))
        lambdas = [x[0] for x in relevant_logfile_paths]
        input_job_rates = [3600.0 / x for x in lambdas]
        metrics = [metric_fn(x[1]) for x in relevant_logfile_paths]
        seeds = [x[2] for x in relevant_logfile_paths]
        policies = [labels[policy] for i in range(len(metrics))]

        data["input_job_rate"] += input_job_rates
        data["metric"] += metrics
        data["seed"] += seeds
        data["policy"] += policies
        if len(input_job_rates) > 0 and extrapolate:
            data["input_job_rate"] += [max(input_job_rates) + 0.4]
            data["metric"] += [105.0]
            data["seed"] += [0]
            data["policy"] += [labels[policy]]
    if verbose:
        df = pd.DataFrame(data)
        grouped_df = df.groupby(["policy", "input_job_rate", "seed"])
        for name_of_the_group, group in grouped_df:
            print(name_of_the_group)
            print(group.mean())

    sns.lineplot(x='input_job_rate',
                 y='metric',
                 style='policy',
                 hue='policy',
                 data=data,
                 ci='sd',
                 markers=True)

    ax.set_xlabel("Input job rate (jobs/hr)")
    ax.set_ylabel(metric_label)
    ax.set_xlim([0, xmax])
    ax.set_ylim([0, ymax])
    sns.despine()

    leg = plt.legend(loc='upper left', frameon=False)
    bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
    bb.y0 += 0.22
    bb.y1 += 0.22
    leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

    if output_filename is not None:
        with PdfPages(output_filename) as pdf:
            pdf.savefig(bbox_inches='tight')

    plt.show()
示例#16
0
    def __init__(
        self,
        data_src,
        batch_size=5,
        dataset='MNIST',
        rst=64,
        prune_classes=None,
        k_shot=5,
    ):
        self.batch_size = batch_size
        self.data_src = data_src

        if dataset == 'chest':
            if self.data_src == self.TEST:
                x, y = utils.load_test_data(rst)
                self.dataset_x = x
                self.dataset_y = y

            else:
                x, y = utils.load_train_data(rst)
                self.dataset_x = x
                self.dataset_y = y

        elif dataset == 'flowers':
            x, y = utils.pickle_load(BASE_DIR +
                                     '/dataset/flowers/imgs_labels.pkl')
            to_train_classes = self.to_train_classes
            to_test_classes = self.to_test_classes

            if self.data_src == self.TEST:
                to_keep = np.array(
                    [i for i, l in enumerate(y) if l in to_test_classes])
                x, y = x[to_keep], y[to_keep]
                self.dataset_x = x
                # TODO start from 0
                self.dataset_y = y
            else:
                to_keep = np.array(
                    [i for i, l in enumerate(y) if l in to_train_classes])
                x, y = x[to_keep], y[to_keep]
                self.dataset_x = x
                # TODO start from 0
                self.dataset_y = y

        else:  # multi chest
            x, y = self._load_data(rst)
            x = utils.denormalize(x)
            to_train_classes = self.to_train_classes
            to_test_classes = self.to_test_classes

            if self.data_src == self.TEST:
                to_keep = []
                counter = {12: 0, 13: 0, 14: 0}
                for i, l in enumerate(y):
                    if l not in to_train_classes and counter[l] < k_shot:
                        to_keep.append(i)
                        counter[l] += 1
                to_keep = np.array(to_keep)
                if len(to_keep) > 0:
                    x, y = x[to_keep], y[to_keep]
                self.dataset_x = x
                self.dataset_y = np.array([l for l in y])
            else:
                to_keep = np.array(
                    [i for i, l in enumerate(y) if l in to_train_classes])
                x, y = x[to_keep], y[to_keep]
                self.dataset_x = x
                self.dataset_y = np.array([l for l in y])

        # Normalize between -1 and 1
        self.dataset_x = utils.normalize(self.dataset_x)

        print(self.dataset_x.shape[0], self.dataset_y.shape[0])
        assert (self.dataset_x.shape[0] == self.dataset_y.shape[0])

        # Compute per class instance count.
        classes = np.unique(self.dataset_y)
        self.classes = classes
        per_class_count = list()
        for c in classes:
            per_class_count.append(np.sum(np.array(self.dataset_y == c)))

        if prune_classes:
            self.dataset_x, self.dataset_y = utils.prune(
                self.dataset_x, self.dataset_y, prune_classes)

        # Recount after pruning
        per_class_count = list()
        for c in classes:
            per_class_count.append(np.sum(np.array(self.dataset_y == c)))
        self.per_class_count = per_class_count

        # List of labels
        self.label_table = [str(c) for c in range(len(self.classes))]

        # Preload all the labels.
        self.labels = self.dataset_y[:]

        # per class ids
        self.per_class_ids = dict()
        ids = np.array(range(len(self.dataset_x)))
        for c in classes:
            self.per_class_ids[c] = ids[self.labels == c]
示例#17
0
def main(data, target, args):
    model_name = 'model_{0}_{1}_{2}.lp'.format(args.data, args.kernel,
                                               args.function)
    param_name = 'model_{0}_{1}_{2}.prm'.format(args.data, args.kernel,
                                                args.function)
    solution_name = 'solution_{0}_{1}_{2}.sol'.format(args.data, args.kernel,
                                                      args.function)
    ultrametric_name = 'ultrametric_{0}_{1}_{2}'.format(
        args.data, args.kernel, args.function)
    var_name = 'var_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data,
                                                args.kernel, args.function)
    obj_name = 'obj_{0}_{1}_{2}_{3}.pkl'.format(args.data, args.data,
                                                args.kernel, args.function)
    laminar_name = 'laminar_{0}_{1}_{2}.pkl'.format(args.data, args.kernel,
                                                    args.function)
    tree_name = 'ip_tree_{0}_{1}_{2}.pdf'.format(args.data, args.kernel,
                                                 args.function)
    if args.kernel == 'cosine':
        y = pdist(data, metric='cosine')
        # Make condensed distance matrix into redundant form
        similarity = 1 - y
        similarity = squareform(similarity)
    if args.kernel == 'gaussian':
        y = pdist(data, metric='sqeuclidean')
        s = 1
        y = 1 - np.exp(-(y**2) / (2 * s**2))
        # Make condensed distance matrix into redundant form
        similarity = 1 - y
        similarity = squareform(similarity)
    if args.kernel == 'sqeuclidean':
        y = pdist(data, metric='sqeuclidean')
        similarity = -y
        similarity = squareform(similarity)
    if args.function == 'linear':
        m = init_model(data, similarity, target, utils.linear)
    elif args.function == 'quadratic':
        m = init_model(data, similarity, target, utils.quadratic)
    elif args.function == 'cubic':
        m = init_model(data, similarity, target, utils.cubic)
    elif args.function == 'exponential':
        m = init_model(data, similarity, target, utils.exponential)
    elif args.function == 'logarithm':
        m = init_model(data, similarity, target, utils.logarithm)
    else:
        exit(0)
    print('Saving model')
    m.write(model_name)
    # Use concurrent optimization
    m.params.method = 3
    # Limit memory
    m.params.NodeFileStart = 10
    # Limit number of threads
    m.params.Threads = args.num_threads
    # Set MIP Focus
    m.params.MIPFocus = 3
    # Tune parameters
    print('Tuning parameters')
    m.params.tuneResults = 1
    m.tune()
    if m.tuneResultCount > 0:
        m.getTuneResult(0)
    # Set MIP Gap
    m.params.MIPGap = 0.01
    print('Saving model parameters')
    m.write(param_name)
    print('Saving objective functions')
    with open(obj_name, 'wb') as f:
        pickle.dump(m._obj, f)
    print('Optimizing over model')
    m._n = data.shape[0]
    m.optimize(callback_function)
    if m.status == GRB.Status.OPTIMAL:
        # Write solution
        m.write(solution_name)
        print('Check binary triangle for solution: ', check_binary_triangle(m))

        # Get ultrametric
        if args.function == 'linear':
            d = get_ultrametric(m, utils.linear)
            utils.inverse_ultrametric(d, utils.inverse_linear)
        elif args.function == 'quadratic':
            d = get_ultrametric(m, utils.quadratic)
            utils.inverse_ultrametric(d, utils.inverse_quadratic)
        elif args.function == 'cubic':
            d = get_ultrametric(m, utils.cubic)
            utils.inverse_ultrametric(d, utils.inverse_cubic)
        elif args.function == 'exponential':
            d = get_ultrametric(m, utils.exponential)
            utils.inverse_ultrametric(d, utils.inverse_exponential)
        elif args.function == 'logarithm':
            d = get_ultrametric(m, utils.logarithm)
            utils.inverse_ultrametric(d, utils.inverse_logarithm)

        print('d = ', d)
        print('Check ultrametric: ', utils.check_ultrametric(d))
        cost = utils.get_cost(m, d)
        print('Cost of hierarchy = ', cost)
        total_obj = utils.get_total(m)
        print('Total cost = ', total_obj)
        print('Scaled cost = ', cost / total_obj)

        # Complete ultrametric
        utils.complete_ultrametric(d)

        # Build laminar list from d
        print('building laminar list')
        L = utils.build_laminar_list(d)
        print('L = ', L)
        print('Check laminar: ', utils.test_laminar(L))
        labels = [1] * m._n
        one_target = map(lambda x: x + 1, target)

        # Prune laminar list
        pruned = utils.prune(L, one_target, args.prune, labels)
        print('Error on pruning: ', pruned[0])
        with open(ultrametric_name, 'wb') as f:
            pickle.dump(d, f)

        # Build hierarchy
        print('Building hierarchy')
        G = utils.build_hierarchy(d)

        # Draw hierarchy
        print('Drawing hierarchy to ', tree_name)
        utils.draw(G, target, m._n, tree_name)
    elif m.status == GRB.Status.INFEASIBLE:
        # Compute IIS, for debugging purposes
        m.computeIIS()
        m.write('infeasible.ilp')